From 397a542d8bec6b426ef40a972cb267a03018c6c0 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 26 May 2021 16:08:25 -0400 Subject: [PATCH 001/979] Add initial log-normalization for v5 objects --- DESCRIPTION | 2 + NAMESPACE | 9 ++++ R/preprocessing5.R | 117 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 128 insertions(+) create mode 100644 R/preprocessing5.R diff --git a/DESCRIPTION b/DESCRIPTION index 3ae038404..317e834ed 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -24,6 +24,7 @@ Authors@R: c( ) URL: https://satijalab.org/seurat, https://github.com/satijalab/seurat BugReports: https://github.com/satijalab/seurat/issues +Remotes: mojaveazure/seurat-object@feat/standard Depends: R (>= 4.0.0), methods @@ -92,6 +93,7 @@ Collate: 'mixscape.R' 'objects.R' 'preprocessing.R' + 'preprocessing5.R' 'tree.R' 'utilities.R' 'zzz.R' diff --git a/NAMESPACE b/NAMESPACE index fad1dd5b0..fd9747c6a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -43,10 +43,13 @@ S3method(GetTissueCoordinates,VisiumV1) S3method(HVFInfo,SCTAssay) S3method(IntegrateEmbeddings,IntegrationAnchorSet) S3method(IntegrateEmbeddings,TransferAnchorSet) +S3method(LogNormalize5,default) +S3method(LogNormalize5,spam) S3method(MappingScore,AnchorSet) S3method(MappingScore,default) S3method(NormalizeData,Assay) S3method(NormalizeData,Seurat) +S3method(NormalizeData,StdAssay) S3method(NormalizeData,default) S3method(ProjectUMAP,DimReduc) S3method(ProjectUMAP,Seurat) @@ -226,6 +229,7 @@ export(LoadSTARmap) export(Loadings) export(LocalStruct) export(LogNormalize) +export(LogNormalize5) export(LogSeuratCommand) export(LogVMR) export(Luminance) @@ -379,15 +383,18 @@ importFrom(RcppAnnoy,AnnoyHamming) importFrom(RcppAnnoy,AnnoyManhattan) importFrom(Rtsne,Rtsne) importFrom(SeuratObject,"DefaultAssay<-") +importFrom(SeuratObject,"DefaultLayer<-") importFrom(SeuratObject,"Idents<-") importFrom(SeuratObject,"Index<-") importFrom(SeuratObject,"JS<-") importFrom(SeuratObject,"Key<-") +importFrom(SeuratObject,"LayerData<-") importFrom(SeuratObject,"Loadings<-") importFrom(SeuratObject,"Misc<-") importFrom(SeuratObject,"Project<-") importFrom(SeuratObject,"Tool<-") importFrom(SeuratObject,"VariableFeatures<-") +importFrom(SeuratObject,.MARGIN) importFrom(SeuratObject,AddMetaData) importFrom(SeuratObject,Assays) importFrom(SeuratObject,Cells) @@ -397,6 +404,7 @@ importFrom(SeuratObject,CreateAssayObject) importFrom(SeuratObject,CreateDimReducObject) importFrom(SeuratObject,CreateSeuratObject) importFrom(SeuratObject,DefaultAssay) +importFrom(SeuratObject,DefaultLayer) importFrom(SeuratObject,Distances) importFrom(SeuratObject,Embeddings) importFrom(SeuratObject,FetchData) @@ -411,6 +419,7 @@ importFrom(SeuratObject,Indices) importFrom(SeuratObject,IsGlobal) importFrom(SeuratObject,JS) importFrom(SeuratObject,Key) +importFrom(SeuratObject,LayerData) importFrom(SeuratObject,Loadings) importFrom(SeuratObject,LogSeuratCommand) importFrom(SeuratObject,Misc) diff --git a/R/preprocessing5.R b/R/preprocessing5.R new file mode 100644 index 000000000..10b1b0a03 --- /dev/null +++ b/R/preprocessing5.R @@ -0,0 +1,117 @@ +#' @include generics.R +#' @include preprocessing.R +#' @importFrom methods slot +#' +NULL + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Generics +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#' @export +#' +LogNormalize5 <- function(data, scale.factor = 1e4, verbose = TRUE) { + UseMethod(generic = 'LogNormalize5', object = data) +} + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Functions +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Methods for Seurat-defined generics +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#' @method LogNormalize5 default +#' @export +#' +LogNormalize5.default <- LogNormalize + +#' @importFrom utils txtProgressBar setTxtProgressBar +#' @method LogNormalize5 spam +#' @export +#' +LogNormalize5.spam <- function(data, scale.factor = 1e4, verbose = TRUE) { + PackageCheck('spam') + csums <- spam::colSums(data) + if (isTRUE(x = verbose)) { + pb <- txtProgressBar(file = stderr(), style = 3) + } + for (i in seq_len(length.out = ncol(x = data))) { + idx <- which(x = slot(object = data, name = 'colindices') == i) + slot(object = data, name = 'entries')[idx] <- log1p( + x = slot(object = data, name = 'entries')[idx] / csums[i] * scale.factor + ) + if (isTRUE(x = verbose)) { + setTxtProgressBar(pb = pb, value = i / ncol(x = data)) + } + } + if (isTRUE(x = verbose)) { + close(con = pb) + } + return(data) +} + +#' @importFrom SeuratObject .MARGIN DefaultLayer DefaultLayer<- LayerData LayerData<- +#' +#' @method NormalizeData StdAssay +#' @export +#' +NormalizeData.StdAssay <- function( + object, + scale.factor = 1e4, + layer = NULL, + save = 'normalized', + default = TRUE, + verbose = TRUE, + ... +) { + layer <- layer %||% DefaultLayer(object = object) + if (save == DefaultLayer(object = object)) { + default <- FALSE + } + data <- LayerData(object = object, layer = layer, fast = TRUE) + if (inherits(x = data, what = 'spam') && .MARGIN(object = object, type = 'cells') == 1) { + data <- SparseNormalize(data = data, scale.factor = scale.factor, verbose = verbose) + } else { + data <- LogNormalize5(data = data, scale.factor = scale.factor, verbose = verbose) + } + LayerData(object = object, layer = save) <- data + if (isTRUE(x = default)) { + DefaultLayer(object = object) <- save + } + gc(verbose = FALSE) + return(object) +} + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Methods for R-defined generics +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Internal +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#' @importFrom utils txtProgressBar setTxtProgressBar +#' +SparseNormalize <- function(data, scale.factor = 1e4, verbose = TRUE) { + p <- slot(object = data, name = 'rowpointers') + if (isTRUE(x = verbose)) { + pb <- txtProgressBar(style = 3L, file = stderr()) + } + np <- length(x = p) - 1 + for (i in seq_len(length.out = np)) { + idx <- seq.int(from = p[i], to = p[i + 1] - 1) + xidx <- slot(object = data, name = 'entries')[idx] + slot(object = data, name = 'entries')[idx] <- log1p( + x = xidx / sum(xidx) * scale.factor + ) + if (isTRUE(x = verbose)) { + setTxtProgressBar(pb = pb, value = i / np) + } + } + if (isTRUE(x = verbose)) { + close(con = pb) + } + return(data) +} From 262ef227b582cd921157796db2bdb45d835b276e Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 26 May 2021 16:08:53 -0400 Subject: [PATCH 002/979] Add renv to isolate v5 from v4 --- .Rbuildignore | 2 + .Rprofile | 1 + renv/.gitignore | 4 + renv/activate.R | 440 ++++++++++++++++++++++++++++++++++++++++++++++ renv/settings.dcf | 7 + 5 files changed, 454 insertions(+) create mode 100644 .Rprofile create mode 100644 renv/.gitignore create mode 100644 renv/activate.R create mode 100644 renv/settings.dcf diff --git a/.Rbuildignore b/.Rbuildignore index bfb0f62ca..e67eeb260 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,3 +1,5 @@ +^renv$ +^renv\.lock$ ^.*\.Rproj$ ^\.Rproj\.user$ ^.*\.old$ diff --git a/.Rprofile b/.Rprofile new file mode 100644 index 000000000..81b960f5c --- /dev/null +++ b/.Rprofile @@ -0,0 +1 @@ +source("renv/activate.R") diff --git a/renv/.gitignore b/renv/.gitignore new file mode 100644 index 000000000..62e60ad4a --- /dev/null +++ b/renv/.gitignore @@ -0,0 +1,4 @@ +library/ +lock/ +python/ +staging/ diff --git a/renv/activate.R b/renv/activate.R new file mode 100644 index 000000000..5be4bbd65 --- /dev/null +++ b/renv/activate.R @@ -0,0 +1,440 @@ + +local({ + + # the requested version of renv + version <- "0.12.5" + + # the project directory + project <- getwd() + + # avoid recursion + if (!is.na(Sys.getenv("RENV_R_INITIALIZING", unset = NA))) + return(invisible(TRUE)) + + # signal that we're loading renv during R startup + Sys.setenv("RENV_R_INITIALIZING" = "true") + on.exit(Sys.unsetenv("RENV_R_INITIALIZING"), add = TRUE) + + # signal that we've consented to use renv + options(renv.consent = TRUE) + + # load the 'utils' package eagerly -- this ensures that renv shims, which + # mask 'utils' packages, will come first on the search path + library(utils, lib.loc = .Library) + + # check to see if renv has already been loaded + if ("renv" %in% loadedNamespaces()) { + + # if renv has already been loaded, and it's the requested version of renv, + # nothing to do + spec <- .getNamespaceInfo(.getNamespace("renv"), "spec") + if (identical(spec[["version"]], version)) + return(invisible(TRUE)) + + # otherwise, unload and attempt to load the correct version of renv + unloadNamespace("renv") + + } + + # load bootstrap tools + bootstrap <- function(version, library) { + + # attempt to download renv + tarball <- tryCatch(renv_bootstrap_download(version), error = identity) + if (inherits(tarball, "error")) + stop("failed to download renv ", version) + + # now attempt to install + status <- tryCatch(renv_bootstrap_install(version, tarball, library), error = identity) + if (inherits(status, "error")) + stop("failed to install renv ", version) + + } + + renv_bootstrap_tests_running <- function() { + getOption("renv.tests.running", default = FALSE) + } + + renv_bootstrap_repos <- function() { + + # check for repos override + repos <- Sys.getenv("RENV_CONFIG_REPOS_OVERRIDE", unset = NA) + if (!is.na(repos)) + return(repos) + + # if we're testing, re-use the test repositories + if (renv_bootstrap_tests_running()) + return(getOption("renv.tests.repos")) + + # retrieve current repos + repos <- getOption("repos") + + # ensure @CRAN@ entries are resolved + repos[repos == "@CRAN@"] <- "https://cloud.r-project.org" + + # add in renv.bootstrap.repos if set + default <- c(CRAN = "https://cloud.r-project.org") + extra <- getOption("renv.bootstrap.repos", default = default) + repos <- c(repos, extra) + + # remove duplicates that might've snuck in + dupes <- duplicated(repos) | duplicated(names(repos)) + repos[!dupes] + + } + + renv_bootstrap_download <- function(version) { + + # if the renv version number has 4 components, assume it must + # be retrieved via github + nv <- numeric_version(version) + components <- unclass(nv)[[1]] + + methods <- if (length(components) == 4L) { + list( + renv_bootstrap_download_github + ) + } else { + list( + renv_bootstrap_download_cran_latest, + renv_bootstrap_download_cran_archive + ) + } + + for (method in methods) { + path <- tryCatch(method(version), error = identity) + if (is.character(path) && file.exists(path)) + return(path) + } + + stop("failed to download renv ", version) + + } + + renv_bootstrap_download_impl <- function(url, destfile) { + + mode <- "wb" + + # https://bugs.r-project.org/bugzilla/show_bug.cgi?id=17715 + fixup <- + Sys.info()[["sysname"]] == "Windows" && + substring(url, 1L, 5L) == "file:" + + if (fixup) + mode <- "w+b" + + utils::download.file( + url = url, + destfile = destfile, + mode = mode, + quiet = TRUE + ) + + } + + renv_bootstrap_download_cran_latest <- function(version) { + + repos <- renv_bootstrap_download_cran_latest_find(version) + + message("* Downloading renv ", version, " from CRAN ... ", appendLF = FALSE) + + info <- tryCatch( + utils::download.packages( + pkgs = "renv", + repos = repos, + destdir = tempdir(), + quiet = TRUE + ), + condition = identity + ) + + if (inherits(info, "condition")) { + message("FAILED") + return(FALSE) + } + + message("OK") + info[1, 2] + + } + + renv_bootstrap_download_cran_latest_find <- function(version) { + + all <- renv_bootstrap_repos() + + for (repos in all) { + + db <- tryCatch( + as.data.frame( + x = utils::available.packages(repos = repos), + stringsAsFactors = FALSE + ), + error = identity + ) + + if (inherits(db, "error")) + next + + entry <- db[db$Package %in% "renv" & db$Version %in% version, ] + if (nrow(entry) == 0) + next + + return(repos) + + } + + fmt <- "renv %s is not available from your declared package repositories" + stop(sprintf(fmt, version)) + + } + + renv_bootstrap_download_cran_archive <- function(version) { + + name <- sprintf("renv_%s.tar.gz", version) + repos <- renv_bootstrap_repos() + urls <- file.path(repos, "src/contrib/Archive/renv", name) + destfile <- file.path(tempdir(), name) + + message("* Downloading renv ", version, " from CRAN archive ... ", appendLF = FALSE) + + for (url in urls) { + + status <- tryCatch( + renv_bootstrap_download_impl(url, destfile), + condition = identity + ) + + if (identical(status, 0L)) { + message("OK") + return(destfile) + } + + } + + message("FAILED") + return(FALSE) + + } + + renv_bootstrap_download_github <- function(version) { + + enabled <- Sys.getenv("RENV_BOOTSTRAP_FROM_GITHUB", unset = "TRUE") + if (!identical(enabled, "TRUE")) + return(FALSE) + + # prepare download options + pat <- Sys.getenv("GITHUB_PAT") + if (nzchar(Sys.which("curl")) && nzchar(pat)) { + fmt <- "--location --fail --header \"Authorization: token %s\"" + extra <- sprintf(fmt, pat) + saved <- options("download.file.method", "download.file.extra") + options(download.file.method = "curl", download.file.extra = extra) + on.exit(do.call(base::options, saved), add = TRUE) + } else if (nzchar(Sys.which("wget")) && nzchar(pat)) { + fmt <- "--header=\"Authorization: token %s\"" + extra <- sprintf(fmt, pat) + saved <- options("download.file.method", "download.file.extra") + options(download.file.method = "wget", download.file.extra = extra) + on.exit(do.call(base::options, saved), add = TRUE) + } + + message("* Downloading renv ", version, " from GitHub ... ", appendLF = FALSE) + + url <- file.path("https://api.github.com/repos/rstudio/renv/tarball", version) + name <- sprintf("renv_%s.tar.gz", version) + destfile <- file.path(tempdir(), name) + + status <- tryCatch( + renv_bootstrap_download_impl(url, destfile), + condition = identity + ) + + if (!identical(status, 0L)) { + message("FAILED") + return(FALSE) + } + + message("OK") + return(destfile) + + } + + renv_bootstrap_install <- function(version, tarball, library) { + + # attempt to install it into project library + message("* Installing renv ", version, " ... ", appendLF = FALSE) + dir.create(library, showWarnings = FALSE, recursive = TRUE) + + # invoke using system2 so we can capture and report output + bin <- R.home("bin") + exe <- if (Sys.info()[["sysname"]] == "Windows") "R.exe" else "R" + r <- file.path(bin, exe) + args <- c("--vanilla", "CMD", "INSTALL", "-l", shQuote(library), shQuote(tarball)) + output <- system2(r, args, stdout = TRUE, stderr = TRUE) + message("Done!") + + # check for successful install + status <- attr(output, "status") + if (is.numeric(status) && !identical(status, 0L)) { + header <- "Error installing renv:" + lines <- paste(rep.int("=", nchar(header)), collapse = "") + text <- c(header, lines, output) + writeLines(text, con = stderr()) + } + + status + + } + + renv_bootstrap_prefix <- function() { + + # construct version prefix + version <- paste(R.version$major, R.version$minor, sep = ".") + prefix <- paste("R", numeric_version(version)[1, 1:2], sep = "-") + + # include SVN revision for development versions of R + # (to avoid sharing platform-specific artefacts with released versions of R) + devel <- + identical(R.version[["status"]], "Under development (unstable)") || + identical(R.version[["nickname"]], "Unsuffered Consequences") + + if (devel) + prefix <- paste(prefix, R.version[["svn rev"]], sep = "-r") + + # build list of path components + components <- c(prefix, R.version$platform) + + # include prefix if provided by user + prefix <- Sys.getenv("RENV_PATHS_PREFIX") + if (nzchar(prefix)) + components <- c(prefix, components) + + # build prefix + paste(components, collapse = "/") + + } + + renv_bootstrap_library_root_name <- function(project) { + + # use project name as-is if requested + asis <- Sys.getenv("RENV_PATHS_LIBRARY_ROOT_ASIS", unset = "FALSE") + if (asis) + return(basename(project)) + + # otherwise, disambiguate based on project's path + id <- substring(renv_bootstrap_hash_text(project), 1L, 8L) + paste(basename(project), id, sep = "-") + + } + + renv_bootstrap_library_root <- function(project) { + + path <- Sys.getenv("RENV_PATHS_LIBRARY", unset = NA) + if (!is.na(path)) + return(path) + + path <- Sys.getenv("RENV_PATHS_LIBRARY_ROOT", unset = NA) + if (!is.na(path)) { + name <- renv_bootstrap_library_root_name(project) + return(file.path(path, name)) + } + + file.path(project, "renv/library") + + } + + renv_bootstrap_validate_version <- function(version) { + + loadedversion <- utils::packageDescription("renv", fields = "Version") + if (version == loadedversion) + return(TRUE) + + # assume four-component versions are from GitHub; three-component + # versions are from CRAN + components <- strsplit(loadedversion, "[.-]")[[1]] + remote <- if (length(components) == 4L) + paste("rstudio/renv", loadedversion, sep = "@") + else + paste("renv", loadedversion, sep = "@") + + fmt <- paste( + "renv %1$s was loaded from project library, but this project is configured to use renv %2$s.", + "Use `renv::record(\"%3$s\")` to record renv %1$s in the lockfile.", + "Use `renv::restore(packages = \"renv\")` to install renv %2$s into the project library.", + sep = "\n" + ) + + msg <- sprintf(fmt, loadedversion, version, remote) + warning(msg, call. = FALSE) + + FALSE + + } + + renv_bootstrap_hash_text <- function(text) { + + hashfile <- tempfile("renv-hash-") + on.exit(unlink(hashfile), add = TRUE) + + writeLines(text, con = hashfile) + tools::md5sum(hashfile) + + } + + renv_bootstrap_load <- function(project, libpath, version) { + + # try to load renv from the project library + if (!requireNamespace("renv", lib.loc = libpath, quietly = TRUE)) + return(FALSE) + + # warn if the version of renv loaded does not match + renv_bootstrap_validate_version(version) + + # load the project + renv::load(project) + + TRUE + + } + + # construct path to library root + root <- renv_bootstrap_library_root(project) + + # construct library prefix for platform + prefix <- renv_bootstrap_prefix() + + # construct full libpath + libpath <- file.path(root, prefix) + + # attempt to load + if (renv_bootstrap_load(project, libpath, version)) + return(TRUE) + + # load failed; inform user we're about to bootstrap + prefix <- paste("# Bootstrapping renv", version) + postfix <- paste(rep.int("-", 77L - nchar(prefix)), collapse = "") + header <- paste(prefix, postfix) + message(header) + + # perform bootstrap + bootstrap(version, libpath) + + # exit early if we're just testing bootstrap + if (!is.na(Sys.getenv("RENV_BOOTSTRAP_INSTALL_ONLY", unset = NA))) + return(TRUE) + + # try again to load + if (requireNamespace("renv", lib.loc = libpath, quietly = TRUE)) { + message("* Successfully installed and loaded renv ", version, ".") + return(renv::load()) + } + + # failed to download or load renv; warn the user + msg <- c( + "Failed to find an renv installation: the project will not be loaded.", + "Use `renv::activate()` to re-initialize the project." + ) + + warning(paste(msg, collapse = "\n"), call. = FALSE) + +}) diff --git a/renv/settings.dcf b/renv/settings.dcf new file mode 100644 index 000000000..bf722e818 --- /dev/null +++ b/renv/settings.dcf @@ -0,0 +1,7 @@ +external.libraries: +ignored.packages: +package.dependency.fields: Imports, Depends, LinkingTo +r.version: +snapshot.type: implicit +use.cache: TRUE +vcs.ignore.library: TRUE From 29cdfdc008b8d9f762e0d66c445374ac3618b3b4 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Sun, 26 Sep 2021 17:39:30 -0400 Subject: [PATCH 003/979] Reclass the default NormalizeData for v3/v4 Now for matrix and dgCMatrix only --- R/preprocessing.R | 4 +++- R/zzz.R | 7 +++++++ man/NormalizeData.Rd | 4 ++-- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index 335dd78d4..75a8eb9b8 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -2382,9 +2382,11 @@ FindSpatiallyVariableFeatures.Seurat <- function( #' #' @rdname NormalizeData #' @concept preprocessing +#' +#' @method NormalizeData V3Matrix #' @export #' -NormalizeData.default <- function( +NormalizeData.V3Matrix <- function( object, normalization.method = "LogNormalize", scale.factor = 1e4, diff --git a/R/zzz.R b/R/zzz.R index 30a582801..3dc72747e 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -36,6 +36,13 @@ seurat_default_options <- list( Seurat.warn.vlnplot.split = TRUE ) +#' @importFrom methods setClassUnion +#' @importClassesFrom Matrix dgCMatrix +#' +NULL + +setClassUnion(name = 'V3Matrix', members = c('matrix', 'dgCMatrix')) + AttachDeps <- function(deps) { for (d in deps) { if (!paste0('package:', d) %in% search()) { diff --git a/man/NormalizeData.Rd b/man/NormalizeData.Rd index cb3ac1635..0d8b48e5b 100644 --- a/man/NormalizeData.Rd +++ b/man/NormalizeData.Rd @@ -2,14 +2,14 @@ % Please edit documentation in R/generics.R, R/preprocessing.R \name{NormalizeData} \alias{NormalizeData} -\alias{NormalizeData.default} +\alias{NormalizeData.V3Matrix} \alias{NormalizeData.Assay} \alias{NormalizeData.Seurat} \title{Normalize Data} \usage{ NormalizeData(object, ...) -\method{NormalizeData}{default}( +\method{NormalizeData}{V3Matrix}( object, normalization.method = "LogNormalize", scale.factor = 10000, From 7add16e6b3c8f267d7d1806de7554a9bf4452b78 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Sun, 26 Sep 2021 17:40:48 -0400 Subject: [PATCH 004/979] Add new default NormalizeData for v5 Built to be matrix-backend agnostic Built to be directionality-agnostic Include optimized method for sparse matrices --- NAMESPACE | 5 +- R/preprocessing5.R | 128 +++++++++++++++++++++++++++++++++++---------- 2 files changed, 103 insertions(+), 30 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 2184cc6d0..1cbacb630 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -44,12 +44,12 @@ S3method(HVFInfo,SCTAssay) S3method(IntegrateEmbeddings,IntegrationAnchorSet) S3method(IntegrateEmbeddings,TransferAnchorSet) S3method(LogNormalize5,default) -S3method(LogNormalize5,spam) S3method(MappingScore,AnchorSet) S3method(MappingScore,default) S3method(NormalizeData,Assay) S3method(NormalizeData,Seurat) S3method(NormalizeData,StdAssay) +S3method(NormalizeData,V3Matrix) S3method(NormalizeData,default) S3method(ProjectUMAP,DimReduc) S3method(ProjectUMAP,Seurat) @@ -397,6 +397,7 @@ importFrom(SeuratObject,"Project<-") importFrom(SeuratObject,"Tool<-") importFrom(SeuratObject,"VariableFeatures<-") importFrom(SeuratObject,.MARGIN) +importFrom(SeuratObject,.SparseSlots) importFrom(SeuratObject,AddMetaData) importFrom(SeuratObject,Assays) importFrom(SeuratObject,Cells) @@ -409,6 +410,7 @@ importFrom(SeuratObject,DefaultAssay) importFrom(SeuratObject,DefaultLayer) importFrom(SeuratObject,Distances) importFrom(SeuratObject,Embeddings) +importFrom(SeuratObject,Features) importFrom(SeuratObject,FetchData) importFrom(SeuratObject,GetAssayData) importFrom(SeuratObject,GetImage) @@ -419,6 +421,7 @@ importFrom(SeuratObject,Images) importFrom(SeuratObject,Index) importFrom(SeuratObject,Indices) importFrom(SeuratObject,IsGlobal) +importFrom(SeuratObject,IsSparse) importFrom(SeuratObject,JS) importFrom(SeuratObject,Key) importFrom(SeuratObject,LayerData) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 10b1b0a03..890f79d7f 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -10,7 +10,7 @@ NULL #' @export #' -LogNormalize5 <- function(data, scale.factor = 1e4, verbose = TRUE) { +LogNormalize5 <- function(data, scale.factor = 1e4, margin = 2L, verbose = TRUE) { UseMethod(generic = 'LogNormalize5', object = data) } @@ -22,28 +22,36 @@ LogNormalize5 <- function(data, scale.factor = 1e4, verbose = TRUE) { # Methods for Seurat-defined generics #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -#' @method LogNormalize5 default -#' @export +#' @importFrom utils setTxtProgressBar txtProgressBar #' -LogNormalize5.default <- LogNormalize - -#' @importFrom utils txtProgressBar setTxtProgressBar -#' @method LogNormalize5 spam +#' @method LogNormalize5 default #' @export #' -LogNormalize5.spam <- function(data, scale.factor = 1e4, verbose = TRUE) { - PackageCheck('spam') - csums <- spam::colSums(data) +LogNormalize5.default <- function( + data, + scale.factor = 1e4, + margin = 2L, + verbose = TRUE +) { + margin <- SeuratObject:::.CheckFmargin(fmargin = margin) + ncells <- dim(x = data)[margin] if (isTRUE(x = verbose)) { pb <- txtProgressBar(file = stderr(), style = 3) } - for (i in seq_len(length.out = ncol(x = data))) { - idx <- which(x = slot(object = data, name = 'colindices') == i) - slot(object = data, name = 'entries')[idx] <- log1p( - x = slot(object = data, name = 'entries')[idx] / csums[i] * scale.factor - ) + for (i in seq_len(length.out = ncells)) { + x <- if (margin == 1L) { + data[i, ] + } else { + data[, i] + } + xnorm <- log1p(x = x / sum(x) * scale.factor) + if (margin == 1L) { + data[i, ] <- xnorm + } else { + data[, i] <- xnorm + } if (isTRUE(x = verbose)) { - setTxtProgressBar(pb = pb, value = i / ncol(x = data)) + setTxtProgressBar(pb = pb, value = i / ncells) } } if (isTRUE(x = verbose)) { @@ -52,14 +60,58 @@ LogNormalize5.spam <- function(data, scale.factor = 1e4, verbose = TRUE) { return(data) } -#' @importFrom SeuratObject .MARGIN DefaultLayer DefaultLayer<- LayerData LayerData<- +#' @importFrom SeuratObject IsSparse .MARGIN +#' +#' @method NormalizeData default +#' @export +#' +NormalizeData.default <- function( + object, + method = c('LogNormalize'), + scale.factor = 1e4, + cmargin = 2L, + margin = 1L, + block.size = NULL, + verbose = TRUE, + ... +) { + method <- method[1L] + method <- match.arg(arg = method) + # TODO: enable parallelization via future + normalized <- switch( + EXPR = method, + 'LogNormalize' = { + if (IsSparse(x = object) && .MARGIN(object = object) == cmargin) { + SparseNormalize( + data = object, + scale.factor = scale.factor, + verbose = verbose + ) + } else { + LogNormalize5( + data = object, + scale.factor = scale.factor, + margin = cmargin, + verbose = verbose + ) + } + } + ) + return(normalized) +} + +#' @importFrom SeuratObject .MARGIN Cells DefaultLayer DefaultLayer<- Features +#' LayerData LayerData<- #' #' @method NormalizeData StdAssay #' @export #' NormalizeData.StdAssay <- function( object, + method = 'LogNormalize', scale.factor = 1e4, + margin = 1L, + block.size = NULL, layer = NULL, save = 'normalized', default = TRUE, @@ -70,13 +122,22 @@ NormalizeData.StdAssay <- function( if (save == DefaultLayer(object = object)) { default <- FALSE } - data <- LayerData(object = object, layer = layer, fast = TRUE) - if (inherits(x = data, what = 'spam') && .MARGIN(object = object, type = 'cells') == 1) { - data <- SparseNormalize(data = data, scale.factor = scale.factor, verbose = verbose) - } else { - data <- LogNormalize5(data = data, scale.factor = scale.factor, verbose = verbose) - } - LayerData(object = object, layer = save) <- data + data <- NormalizeData( + object = LayerData(object = object, layer = layer, fast = TRUE), + method = method, + scale.factor = 1e4, + cmargin = .MARGIN(object = object, type = 'cells'), + margin = margin, + block.size = block.size, + verbose = verbose, + ... + ) + LayerData( + object = object, + layer = save, + features = Features(x = object, layer = layer), + cells = Cells(x = object, layer = layer) + ) <- data if (isTRUE(x = default)) { DefaultLayer(object = object) <- save } @@ -92,18 +153,23 @@ NormalizeData.StdAssay <- function( # Internal #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#' @importFrom SeuratObject .SparseSlots #' @importFrom utils txtProgressBar setTxtProgressBar #' SparseNormalize <- function(data, scale.factor = 1e4, verbose = TRUE) { - p <- slot(object = data, name = 'rowpointers') + entryname <- .SparseSlots(x = data, type = 'entries') + p <- slot(object = data, name = .SparseSlots(x = data, type = 'pointers')) + if (p[1L] == 0) { + p <- p + 1L + } + np <- length(x = p) - 1L if (isTRUE(x = verbose)) { pb <- txtProgressBar(style = 3L, file = stderr()) } - np <- length(x = p) - 1 for (i in seq_len(length.out = np)) { - idx <- seq.int(from = p[i], to = p[i + 1] - 1) - xidx <- slot(object = data, name = 'entries')[idx] - slot(object = data, name = 'entries')[idx] <- log1p( + idx <- seq.int(from = p[i], to = p[i + 1] - 1L) + xidx <- slot(object = data, name = entryname)[idx] + slot(object = data, name = entryname)[idx] <- log1p( x = xidx / sum(xidx) * scale.factor ) if (isTRUE(x = verbose)) { @@ -115,3 +181,7 @@ SparseNormalize <- function(data, scale.factor = 1e4, verbose = TRUE) { } return(data) } + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# S4 Methods +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% From 233da8a0e22879ab7ea8b1e68d06b850fac8a2b4 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Sun, 26 Sep 2021 17:41:47 -0400 Subject: [PATCH 005/979] Update packages in renv --- renv.lock | 1068 +++++++++++++++++++++++++++++++++++++++++++++++ renv/.gitignore | 1 + renv/activate.R | 290 +++++++++++-- 3 files changed, 1328 insertions(+), 31 deletions(-) create mode 100644 renv.lock diff --git a/renv.lock b/renv.lock new file mode 100644 index 000000000..78249d425 --- /dev/null +++ b/renv.lock @@ -0,0 +1,1068 @@ +{ + "R": { + "Version": "4.1.0", + "Repositories": [ + { + "Name": "CRAN", + "URL": "https://cloud.r-project.org" + } + ] + }, + "Packages": { + "BH": { + "Package": "BH", + "Version": "1.75.0-0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "e4c04affc2cac20c8fec18385cd14691" + }, + "FNN": { + "Package": "FNN", + "Version": "1.1.3", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "b56998fff55e4a4b4860ad6e8c67e0f9" + }, + "KernSmooth": { + "Package": "KernSmooth", + "Version": "2.23-20", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "8dcfa99b14c296bc9f1fd64d52fd3ce7" + }, + "MASS": { + "Package": "MASS", + "Version": "7.3-54", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "0e59129db205112e3963904db67fd0dc" + }, + "Matrix": { + "Package": "Matrix", + "Version": "1.3-4", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "4ed05e9c9726267e4a5872e09c04587c" + }, + "R6": { + "Package": "R6", + "Version": "2.5.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "470851b6d5d0ac559e9d01bb352b4021" + }, + "RANN": { + "Package": "RANN", + "Version": "2.6.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "d128ea05a972d3e67c6f39de52c72bd7" + }, + "RColorBrewer": { + "Package": "RColorBrewer", + "Version": "1.1-2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "e031418365a7f7a766181ab5a41a5716" + }, + "ROCR": { + "Package": "ROCR", + "Version": "1.0-11", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "cc151930e20e16427bc3d0daec62b4a9" + }, + "RSpectra": { + "Package": "RSpectra", + "Version": "0.16-0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "a41329d24d5a98eaed2bd0159adb1b5f" + }, + "Rcpp": { + "Package": "Rcpp", + "Version": "1.0.7", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "dab19adae4440ae55aa8a9d238b246bb" + }, + "RcppAnnoy": { + "Package": "RcppAnnoy", + "Version": "0.0.19", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "5681153e3eb103725e35ac5f7ebca910" + }, + "RcppArmadillo": { + "Package": "RcppArmadillo", + "Version": "0.10.6.0.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "d34bcef6e2df81ab2d44c7fbe8b4d3f0" + }, + "RcppEigen": { + "Package": "RcppEigen", + "Version": "0.3.3.9.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "ddfa72a87fdf4c80466a20818be91d00" + }, + "RcppProgress": { + "Package": "RcppProgress", + "Version": "0.4.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "1c0aa18b97e6aaa17f93b8b866c0ace5" + }, + "Rtsne": { + "Package": "Rtsne", + "Version": "0.15", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "f153432c4ca15b937ccfaa40f167c892" + }, + "SeuratObject": { + "Package": "SeuratObject", + "Version": "4.9.9.9000", + "Source": "GitHub", + "RemoteType": "github", + "RemoteHost": "api.github.com", + "RemoteRepo": "seurat-object", + "RemoteUsername": "mojaveazure", + "RemoteRef": "feat/standard", + "RemoteSha": "e9029cdf82d0d0c2811cc84931f27aa7f5ff3c4c", + "Hash": "86d79ca1e6507e84353cb34cfe2d3811" + }, + "abind": { + "Package": "abind", + "Version": "1.4-5", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "4f57884290cc75ab22f4af9e9d4ca862" + }, + "askpass": { + "Package": "askpass", + "Version": "1.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "e8a22846fff485f0be3770c2da758713" + }, + "base64enc": { + "Package": "base64enc", + "Version": "0.1-3", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "543776ae6848fde2f48ff3816d0628bc" + }, + "bitops": { + "Package": "bitops", + "Version": "1.0-7", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "b7d8d8ee39869c18d8846a184dd8a1af" + }, + "brio": { + "Package": "brio", + "Version": "1.1.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "2f01e16ff9571fe70381c7b9ae560dc4" + }, + "bslib": { + "Package": "bslib", + "Version": "0.3.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "074ebc936dbcecd7115ed8083643b550" + }, + "caTools": { + "Package": "caTools", + "Version": "1.18.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "34d90fa5845004236b9eacafc51d07b2" + }, + "cachem": { + "Package": "cachem", + "Version": "1.0.6", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "648c5b3d71e6a37e3043617489a0a0e9" + }, + "callr": { + "Package": "callr", + "Version": "3.7.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "461aa75a11ce2400245190ef5d3995df" + }, + "cli": { + "Package": "cli", + "Version": "3.0.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "e3ae5d68dea0c55a12ea12a9fda02e61" + }, + "cluster": { + "Package": "cluster", + "Version": "2.1.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "ce49bfe5bc0b3ecd43a01fe1b01c2243" + }, + "codetools": { + "Package": "codetools", + "Version": "0.2-18", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "019388fc48e48b3da0d3a76ff94608a8" + }, + "colorspace": { + "Package": "colorspace", + "Version": "2.0-2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "6baccb763ee83c0bd313460fdb8b8a84" + }, + "commonmark": { + "Package": "commonmark", + "Version": "1.7", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "0f22be39ec1d141fd03683c06f3a6e67" + }, + "cowplot": { + "Package": "cowplot", + "Version": "1.1.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "b418e8423699d11c7f2087c2bfd07da2" + }, + "cpp11": { + "Package": "cpp11", + "Version": "0.4.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "40ba3fd26c8f61d8d14d334bc7761df9" + }, + "crayon": { + "Package": "crayon", + "Version": "1.4.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "e75525c55c70e5f4f78c9960a4b402e9" + }, + "crosstalk": { + "Package": "crosstalk", + "Version": "1.1.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "2b06f9e415a62b6762e4b8098d2aecbc" + }, + "curl": { + "Package": "curl", + "Version": "4.3.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "022c42d49c28e95d69ca60446dbabf88" + }, + "data.table": { + "Package": "data.table", + "Version": "1.14.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "d1b8b1a821ee564a3515fa6c6d5c52dc" + }, + "deldir": { + "Package": "deldir", + "Version": "0.2-10", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "6ba6a411bdd4c1b297bd54e2c5c98385" + }, + "desc": { + "Package": "desc", + "Version": "1.3.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "b6963166f7f10b970af1006c462ce6cd" + }, + "diffobj": { + "Package": "diffobj", + "Version": "0.3.4", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "feb5b7455eba422a2c110bb89852e6a3" + }, + "digest": { + "Package": "digest", + "Version": "0.6.28", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "49b5c6e230bfec487b8917d5a0c77cca" + }, + "dotCall64": { + "Package": "dotCall64", + "Version": "1.0-1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "d0ef6cd1546530da4d72179b52856e84" + }, + "dplyr": { + "Package": "dplyr", + "Version": "1.0.7", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "36f1ae62f026c8ba9f9b5c9a08c03297" + }, + "dqrng": { + "Package": "dqrng", + "Version": "0.3.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "3ce2af5ead3b01c518fd453c7fe5a51a" + }, + "ellipsis": { + "Package": "ellipsis", + "Version": "0.3.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "bb0eec2fe32e88d9e2836c2f73ea2077" + }, + "evaluate": { + "Package": "evaluate", + "Version": "0.14", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "ec8ca05cffcc70569eaaad8469d2a3a7" + }, + "fansi": { + "Package": "fansi", + "Version": "0.5.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "d447b40982c576a72b779f0a3b3da227" + }, + "farver": { + "Package": "farver", + "Version": "2.1.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "c98eb5133d9cb9e1622b8691487f11bb" + }, + "fastmap": { + "Package": "fastmap", + "Version": "1.1.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "77bd60a6157420d4ffa93b27cf6a58b8" + }, + "fitdistrplus": { + "Package": "fitdistrplus", + "Version": "1.1-5", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "df82a154b37aad59c6a9cc2b54614296" + }, + "fontawesome": { + "Package": "fontawesome", + "Version": "0.2.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "55624ed409e46c5f358b2c060be87f67" + }, + "fs": { + "Package": "fs", + "Version": "1.5.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "44594a07a42e5f91fac9f93fda6d0109" + }, + "future": { + "Package": "future", + "Version": "1.22.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "9c56382c3e53f0b4fc0fc16d88fc3974" + }, + "future.apply": { + "Package": "future.apply", + "Version": "1.8.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "f568ce73d3d59582b0f7babd0eb33d07" + }, + "generics": { + "Package": "generics", + "Version": "0.1.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "4d243a9c10b00589889fe32314ffd902" + }, + "ggplot2": { + "Package": "ggplot2", + "Version": "3.3.5", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "d7566c471c7b17e095dd023b9ef155ad" + }, + "ggrepel": { + "Package": "ggrepel", + "Version": "0.9.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "08ab869f37e6a7741a64ab9069bcb67d" + }, + "ggridges": { + "Package": "ggridges", + "Version": "0.5.3", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "9d028e8f37c84dba356ce3c367a1978e" + }, + "globals": { + "Package": "globals", + "Version": "0.14.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "eca8023ed5ca6372479ebb9b3207f5ae" + }, + "glue": { + "Package": "glue", + "Version": "1.4.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "6efd734b14c6471cfe443345f3e35e29" + }, + "goftest": { + "Package": "goftest", + "Version": "1.2-2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "3c9209bce1b10900155ea37ce954cf30" + }, + "gplots": { + "Package": "gplots", + "Version": "3.1.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "e65e5d5dea4cbb9ba822dcd782b2ee1f" + }, + "gridExtra": { + "Package": "gridExtra", + "Version": "2.3", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "7d7f283939f563670a697165b2cf5560" + }, + "gtable": { + "Package": "gtable", + "Version": "0.3.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "ac5c6baf7822ce8732b343f14c072c4d" + }, + "gtools": { + "Package": "gtools", + "Version": "3.9.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "2ace6c4a06297d0b364e0444384a2b82" + }, + "here": { + "Package": "here", + "Version": "1.0.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "24b224366f9c2e7534d2344d10d59211" + }, + "highr": { + "Package": "highr", + "Version": "0.9", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "8eb36c8125038e648e5d111c0d7b2ed4" + }, + "htmltools": { + "Package": "htmltools", + "Version": "0.5.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "526c484233f42522278ab06fb185cb26" + }, + "htmlwidgets": { + "Package": "htmlwidgets", + "Version": "1.5.4", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "76147821cd3fcd8c4b04e1ef0498e7fb" + }, + "httpuv": { + "Package": "httpuv", + "Version": "1.6.3", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "65e865802fe6dd1bafef1dae5b80a844" + }, + "httr": { + "Package": "httr", + "Version": "1.4.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "a525aba14184fec243f9eaec62fbed43" + }, + "ica": { + "Package": "ica", + "Version": "1.0-2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "95ba9b882bb834ecbdad37338a11f3f8" + }, + "igraph": { + "Package": "igraph", + "Version": "1.2.6", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "7b1f856410253d56ea67ad808f7cdff6" + }, + "irlba": { + "Package": "irlba", + "Version": "2.3.3", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "a9ad517358000d57022401ef18ee657a" + }, + "isoband": { + "Package": "isoband", + "Version": "0.2.5", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "7ab57a6de7f48a8dc84910d1eca42883" + }, + "jquerylib": { + "Package": "jquerylib", + "Version": "0.1.4", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "5aab57a3bd297eee1c1d862735972182" + }, + "jsonlite": { + "Package": "jsonlite", + "Version": "1.7.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "98138e0994d41508c7a6b84a0600cfcb" + }, + "knitr": { + "Package": "knitr", + "Version": "1.34", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "aa958054ac6f0360926bb952ea302f0f" + }, + "labeling": { + "Package": "labeling", + "Version": "0.4.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "3d5108641f47470611a32d0bdf357a72" + }, + "later": { + "Package": "later", + "Version": "1.3.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "7e7b457d7766bc47f2a5f21cc2984f8e" + }, + "lattice": { + "Package": "lattice", + "Version": "0.20-44", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "f36bf1a849d9106dc2af72e501f9de41" + }, + "lazyeval": { + "Package": "lazyeval", + "Version": "0.2.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "d908914ae53b04d4c0c0fd72ecc35370" + }, + "leiden": { + "Package": "leiden", + "Version": "0.3.9", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "d6768920a499f996e6025c5daecf33fb" + }, + "lifecycle": { + "Package": "lifecycle", + "Version": "1.0.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "a6b6d352e3ed897373ab19d8395c98d0" + }, + "listenv": { + "Package": "listenv", + "Version": "0.8.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "0bde42ee282efb18c7c4e63822f5b4f7" + }, + "lmtest": { + "Package": "lmtest", + "Version": "0.9-38", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "b0edacc02f7a3dad41a1afc385e929f4" + }, + "magrittr": { + "Package": "magrittr", + "Version": "2.0.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "41287f1ac7d28a92f0a286ed507928d3" + }, + "matrixStats": { + "Package": "matrixStats", + "Version": "0.61.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "b8e6221fc11247b12ab1b055a6f66c27" + }, + "mgcv": { + "Package": "mgcv", + "Version": "1.8-36", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "93cc747b0e1ad882a4570463c3575c23" + }, + "mime": { + "Package": "mime", + "Version": "0.11", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "8974a907200fc9948d636fe7d85ca9fb" + }, + "miniUI": { + "Package": "miniUI", + "Version": "0.1.1.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "fec5f52652d60615fdb3957b3d74324a" + }, + "munsell": { + "Package": "munsell", + "Version": "0.5.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "6dfe8bf774944bd5595785e3229d8771" + }, + "nlme": { + "Package": "nlme", + "Version": "3.1-152", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "35de1ce639f20b5e10f7f46260730c65" + }, + "openssl": { + "Package": "openssl", + "Version": "1.4.5", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "5406fd37ef0bf9b88c8a4f264d6ec220" + }, + "parallelly": { + "Package": "parallelly", + "Version": "1.28.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "5300c9fc71841550bdca64d39e82af0e" + }, + "patchwork": { + "Package": "patchwork", + "Version": "1.1.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "c446b30cb33ec125ff02588b60660ccb" + }, + "pbapply": { + "Package": "pbapply", + "Version": "1.5-0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "11359a5bb73622ab3f4136bf57108b64" + }, + "pillar": { + "Package": "pillar", + "Version": "1.6.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "43f228eb4b49093d1c8a5c93cae9efe9" + }, + "pkgconfig": { + "Package": "pkgconfig", + "Version": "2.0.3", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "01f28d4278f15c76cddbea05899c5d6f" + }, + "pkgload": { + "Package": "pkgload", + "Version": "1.2.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "53139eedf68b98eecd5289664969c3f2" + }, + "plotly": { + "Package": "plotly", + "Version": "4.9.4.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "af4b92cb3828aa30002e2f945c49c2d7" + }, + "plyr": { + "Package": "plyr", + "Version": "1.8.6", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "ec0e5ab4e5f851f6ef32cd1d1984957f" + }, + "png": { + "Package": "png", + "Version": "0.1-7", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "03b7076c234cb3331288919983326c55" + }, + "polyclip": { + "Package": "polyclip", + "Version": "1.10-0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "cb167f328b3ada4ec5cf67a7df4c900a" + }, + "praise": { + "Package": "praise", + "Version": "1.0.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "a555924add98c99d2f411e37e7d25e9f" + }, + "processx": { + "Package": "processx", + "Version": "3.5.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "0cbca2bc4d16525d009c4dbba156b37c" + }, + "promises": { + "Package": "promises", + "Version": "1.2.0.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "4ab2c43adb4d4699cf3690acd378d75d" + }, + "ps": { + "Package": "ps", + "Version": "1.6.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "32620e2001c1dce1af49c49dccbb9420" + }, + "purrr": { + "Package": "purrr", + "Version": "0.3.4", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "97def703420c8ab10d8f0e6c72101e02" + }, + "rappdirs": { + "Package": "rappdirs", + "Version": "0.3.3", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "5e3c5dc0b071b21fa128676560dbe94d" + }, + "rematch2": { + "Package": "rematch2", + "Version": "2.1.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "76c9e04c712a05848ae7a23d2f170a40" + }, + "renv": { + "Package": "renv", + "Version": "0.14.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "30e5eba91b67f7f4d75d31de14bbfbdc" + }, + "reshape2": { + "Package": "reshape2", + "Version": "1.4.4", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "bb5996d0bd962d214a11140d77589917" + }, + "reticulate": { + "Package": "reticulate", + "Version": "1.22", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "b34a8bb69005168078d1d546a53912b2" + }, + "rlang": { + "Package": "rlang", + "Version": "0.4.11", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "515f341d3affe0de9e4a7f762efb0456" + }, + "rpart": { + "Package": "rpart", + "Version": "4.1-15", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "9787c1fcb680e655d062e7611cadf78e" + }, + "rprojroot": { + "Package": "rprojroot", + "Version": "2.0.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "249d8cd1e74a8f6a26194a91b47f21d1" + }, + "rstudioapi": { + "Package": "rstudioapi", + "Version": "0.13", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "06c85365a03fdaf699966cc1d3cf53ea" + }, + "sass": { + "Package": "sass", + "Version": "0.4.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "50cf822feb64bb3977bda0b7091be623" + }, + "scales": { + "Package": "scales", + "Version": "1.1.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "6f76f71042411426ec8df6c54f34e6dd" + }, + "scattermore": { + "Package": "scattermore", + "Version": "0.7", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "373eb417aadd7c7f35861953f3fe3deb" + }, + "sctransform": { + "Package": "sctransform", + "Version": "0.3.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "f6423b883393222fb5c022799374ccec" + }, + "shiny": { + "Package": "shiny", + "Version": "1.7.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "72bf551406bb75a182df55090dc5a25a" + }, + "sitmo": { + "Package": "sitmo", + "Version": "2.0.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "0f9ba299f2385e686745b066c6d7a7c4" + }, + "sourcetools": { + "Package": "sourcetools", + "Version": "0.1.7", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "947e4e02a79effa5d512473e10f41797" + }, + "spam": { + "Package": "spam", + "Version": "2.7-0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "1abde1bf414425ea288fd9e277d6e410" + }, + "spatstat.core": { + "Package": "spatstat.core", + "Version": "2.3-0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "dda7556b6adf3f14c4cce941df3c30ba" + }, + "spatstat.data": { + "Package": "spatstat.data", + "Version": "2.1-0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "4e8002e034d7d0af852b2bbcce851c2e" + }, + "spatstat.geom": { + "Package": "spatstat.geom", + "Version": "2.2-2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "abf3bf1c02e9af3be5b6817d463dc064" + }, + "spatstat.sparse": { + "Package": "spatstat.sparse", + "Version": "2.0-0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "615efff0d33e612b15dc3fc3ba0cc554" + }, + "spatstat.utils": { + "Package": "spatstat.utils", + "Version": "2.2-0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "a1519c316a49d8041bdc6f37fd0249e2" + }, + "stringi": { + "Package": "stringi", + "Version": "1.7.4", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "ebaccb577da50829a3bb1b8296f318a5" + }, + "stringr": { + "Package": "stringr", + "Version": "1.4.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "0759e6b6c0957edb1311028a49a35e76" + }, + "survival": { + "Package": "survival", + "Version": "3.2-12", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "dcdc241a75d17656fec8d02cd8215c39" + }, + "sys": { + "Package": "sys", + "Version": "3.4", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "b227d13e29222b4574486cfcbde077fa" + }, + "tensor": { + "Package": "tensor", + "Version": "1.5", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "25cfab6cf405c15bccf7e69ec39df090" + }, + "testthat": { + "Package": "testthat", + "Version": "3.0.4", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "575216c9946ca70016c3ffb9c31709ba" + }, + "tibble": { + "Package": "tibble", + "Version": "3.1.4", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "5e8ad5621e5c94b24ec07b88eee13df8" + }, + "tidyr": { + "Package": "tidyr", + "Version": "1.1.3", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "450d7dfaedde58e28586b854eeece4fa" + }, + "tidyselect": { + "Package": "tidyselect", + "Version": "1.1.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "7243004a708d06d4716717fa1ff5b2fe" + }, + "utf8": { + "Package": "utf8", + "Version": "1.2.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "c9c462b759a5cc844ae25b5942654d13" + }, + "uwot": { + "Package": "uwot", + "Version": "0.1.10", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "a9737c75f5f949695617b05e78281b2f" + }, + "vctrs": { + "Package": "vctrs", + "Version": "0.3.8", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "ecf749a1b39ea72bd9b51b76292261f1" + }, + "viridisLite": { + "Package": "viridisLite", + "Version": "0.4.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "55e157e2aa88161bdb0754218470d204" + }, + "waldo": { + "Package": "waldo", + "Version": "0.3.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "ad8cfff5694ac5b3c354f8f2044bd976" + }, + "withr": { + "Package": "withr", + "Version": "2.4.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "ad03909b44677f930fa156d47d7a3aeb" + }, + "xfun": { + "Package": "xfun", + "Version": "0.26", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "a270216f7ffda25e53298293046d1d05" + }, + "xtable": { + "Package": "xtable", + "Version": "1.8-4", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "b8acdf8af494d9ec19ccb2481a9b11c2" + }, + "yaml": { + "Package": "yaml", + "Version": "2.2.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "2826c5d9efb0a88f657c7a679c7106db" + }, + "zoo": { + "Package": "zoo", + "Version": "1.8-9", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "035d1c7c12593038c26fb1c2fd40c4d2" + } + } +} diff --git a/renv/.gitignore b/renv/.gitignore index 62e60ad4a..5246cf196 100644 --- a/renv/.gitignore +++ b/renv/.gitignore @@ -1,3 +1,4 @@ +local/ library/ lock/ python/ diff --git a/renv/activate.R b/renv/activate.R index 5be4bbd65..304fd900a 100644 --- a/renv/activate.R +++ b/renv/activate.R @@ -2,13 +2,27 @@ local({ # the requested version of renv - version <- "0.12.5" + version <- "0.14.0" # the project directory project <- getwd() + # allow environment variable to control activation + activate <- Sys.getenv("RENV_ACTIVATE_PROJECT") + if (!nzchar(activate)) { + + # don't auto-activate when R CMD INSTALL is running + if (nzchar(Sys.getenv("R_INSTALL_PKG"))) + return(FALSE) + + } + + # bail if activation was explicitly disabled + if (tolower(activate) %in% c("false", "f", "0")) + return(FALSE) + # avoid recursion - if (!is.na(Sys.getenv("RENV_R_INITIALIZING", unset = NA))) + if (nzchar(Sys.getenv("RENV_R_INITIALIZING"))) return(invisible(TRUE)) # signal that we're loading renv during R startup @@ -70,10 +84,13 @@ local({ repos <- getOption("repos") # ensure @CRAN@ entries are resolved - repos[repos == "@CRAN@"] <- "https://cloud.r-project.org" + repos[repos == "@CRAN@"] <- getOption( + "renv.repos.cran", + "https://cloud.r-project.org" + ) # add in renv.bootstrap.repos if set - default <- c(CRAN = "https://cloud.r-project.org") + default <- c(FALLBACK = "https://cloud.r-project.org") extra <- getOption("renv.bootstrap.repos", default = default) repos <- c(repos, extra) @@ -134,16 +151,20 @@ local({ renv_bootstrap_download_cran_latest <- function(version) { - repos <- renv_bootstrap_download_cran_latest_find(version) + spec <- renv_bootstrap_download_cran_latest_find(version) + + message("* Downloading renv ", version, " ... ", appendLF = FALSE) - message("* Downloading renv ", version, " from CRAN ... ", appendLF = FALSE) + type <- spec$type + repos <- spec$repos info <- tryCatch( utils::download.packages( - pkgs = "renv", - repos = repos, + pkgs = "renv", destdir = tempdir(), - quiet = TRUE + repos = repos, + type = type, + quiet = TRUE ), condition = identity ) @@ -153,36 +174,52 @@ local({ return(FALSE) } - message("OK") + # report success and return + message("OK (downloaded ", type, ")") info[1, 2] } renv_bootstrap_download_cran_latest_find <- function(version) { - all <- renv_bootstrap_repos() + # check whether binaries are supported on this system + binary <- + getOption("renv.bootstrap.binary", default = TRUE) && + !identical(.Platform$pkgType, "source") && + !identical(getOption("pkgType"), "source") && + Sys.info()[["sysname"]] %in% c("Darwin", "Windows") - for (repos in all) { + types <- c(if (binary) "binary", "source") - db <- tryCatch( - as.data.frame( - x = utils::available.packages(repos = repos), - stringsAsFactors = FALSE - ), - error = identity - ) + # iterate over types + repositories + for (type in types) { + for (repos in renv_bootstrap_repos()) { - if (inherits(db, "error")) - next + # retrieve package database + db <- tryCatch( + as.data.frame( + utils::available.packages(type = type, repos = repos), + stringsAsFactors = FALSE + ), + error = identity + ) - entry <- db[db$Package %in% "renv" & db$Version %in% version, ] - if (nrow(entry) == 0) - next + if (inherits(db, "error")) + next - return(repos) + # check for compatible entry + entry <- db[db$Package %in% "renv" & db$Version %in% version, ] + if (nrow(entry) == 0) + next + + # found it; return spec to caller + spec <- list(entry = entry, type = type, repos = repos) + return(spec) + } } + # if we got here, we failed to find renv fmt <- "renv %s is not available from your declared package repositories" stop(sprintf(fmt, version)) @@ -195,7 +232,7 @@ local({ urls <- file.path(repos, "src/contrib/Archive/renv", name) destfile <- file.path(tempdir(), name) - message("* Downloading renv ", version, " from CRAN archive ... ", appendLF = FALSE) + message("* Downloading renv ", version, " ... ", appendLF = FALSE) for (url in urls) { @@ -286,7 +323,7 @@ local({ } - renv_bootstrap_prefix <- function() { + renv_bootstrap_platform_prefix <- function() { # construct version prefix version <- paste(R.version$major, R.version$minor, sep = ".") @@ -305,8 +342,8 @@ local({ components <- c(prefix, R.version$platform) # include prefix if provided by user - prefix <- Sys.getenv("RENV_PATHS_PREFIX") - if (nzchar(prefix)) + prefix <- renv_bootstrap_platform_prefix_impl() + if (!is.na(prefix) && nzchar(prefix)) components <- c(prefix, components) # build prefix @@ -314,6 +351,139 @@ local({ } + renv_bootstrap_platform_prefix_impl <- function() { + + # if an explicit prefix has been supplied, use it + prefix <- Sys.getenv("RENV_PATHS_PREFIX", unset = NA) + if (!is.na(prefix)) + return(prefix) + + # if the user has requested an automatic prefix, generate it + auto <- Sys.getenv("RENV_PATHS_PREFIX_AUTO", unset = NA) + if (auto %in% c("TRUE", "True", "true", "1")) + return(renv_bootstrap_platform_prefix_auto()) + + # empty string on failure + "" + + } + + renv_bootstrap_platform_prefix_auto <- function() { + + prefix <- tryCatch(renv_bootstrap_platform_os(), error = identity) + if (inherits(prefix, "error") || prefix %in% "unknown") { + + msg <- paste( + "failed to infer current operating system", + "please file a bug report at https://github.com/rstudio/renv/issues", + sep = "; " + ) + + warning(msg) + + } + + prefix + + } + + renv_bootstrap_platform_os <- function() { + + sysinfo <- Sys.info() + sysname <- sysinfo[["sysname"]] + + # handle Windows + macOS up front + if (sysname == "Windows") + return("windows") + else if (sysname == "Darwin") + return("macos") + + # check for os-release files + for (file in c("/etc/os-release", "/usr/lib/os-release")) + if (file.exists(file)) + return(renv_bootstrap_platform_os_via_os_release(file, sysinfo)) + + # check for redhat-release files + if (file.exists("/etc/redhat-release")) + return(renv_bootstrap_platform_os_via_redhat_release()) + + "unknown" + + } + + renv_bootstrap_platform_os_via_os_release <- function(file, sysinfo) { + + # read /etc/os-release + release <- utils::read.table( + file = file, + sep = "=", + quote = c("\"", "'"), + col.names = c("Key", "Value"), + comment.char = "#", + stringsAsFactors = FALSE + ) + + vars <- as.list(release$Value) + names(vars) <- release$Key + + # get os name + os <- tolower(sysinfo[["sysname"]]) + + # read id + id <- "unknown" + for (field in c("ID", "ID_LIKE")) { + if (field %in% names(vars) && nzchar(vars[[field]])) { + id <- vars[[field]] + break + } + } + + # read version + version <- "unknown" + for (field in c("UBUNTU_CODENAME", "VERSION_CODENAME", "VERSION_ID", "BUILD_ID")) { + if (field %in% names(vars) && nzchar(vars[[field]])) { + version <- vars[[field]] + break + } + } + + # join together + paste(c(os, id, version), collapse = "-") + + } + + renv_bootstrap_platform_os_via_redhat_release <- function() { + + # read /etc/redhat-release + contents <- readLines("/etc/redhat-release", warn = FALSE) + + # infer id + id <- if (grepl("centos", contents, ignore.case = TRUE)) + "centos" + else if (grepl("redhat", contents, ignore.case = TRUE)) + "redhat" + else + "unknown" + + # try to find a version component (very hacky) + version <- "unknown" + + parts <- strsplit(contents, "[[:space:]]")[[1L]] + for (part in parts) { + + nv <- tryCatch(numeric_version(part), error = identity) + if (inherits(nv, "error")) + next + + version <- nv[1, 1] + break + + } + + paste(c("linux", id, version), collapse = "-") + + } + renv_bootstrap_library_root_name <- function(project) { # use project name as-is if requested @@ -339,7 +509,8 @@ local({ return(file.path(path, name)) } - file.path(project, "renv/library") + prefix <- renv_bootstrap_profile_prefix() + paste(c(project, prefix, "renv/library"), collapse = "/") } @@ -396,12 +567,69 @@ local({ TRUE } + + renv_bootstrap_profile_load <- function(project) { + + # if RENV_PROFILE is already set, just use that + profile <- Sys.getenv("RENV_PROFILE", unset = NA) + if (!is.na(profile) && nzchar(profile)) + return(profile) + + # check for a profile file (nothing to do if it doesn't exist) + path <- file.path(project, "renv/local/profile") + if (!file.exists(path)) + return(NULL) + + # read the profile, and set it if it exists + contents <- readLines(path, warn = FALSE) + if (length(contents) == 0L) + return(NULL) + + # set RENV_PROFILE + profile <- contents[[1L]] + if (nzchar(profile)) + Sys.setenv(RENV_PROFILE = profile) + + profile + + } + + renv_bootstrap_profile_prefix <- function() { + profile <- renv_bootstrap_profile_get() + if (!is.null(profile)) + return(file.path("renv/profiles", profile)) + } + + renv_bootstrap_profile_get <- function() { + profile <- Sys.getenv("RENV_PROFILE", unset = "") + renv_bootstrap_profile_normalize(profile) + } + + renv_bootstrap_profile_set <- function(profile) { + profile <- renv_bootstrap_profile_normalize(profile) + if (is.null(profile)) + Sys.unsetenv("RENV_PROFILE") + else + Sys.setenv(RENV_PROFILE = profile) + } + + renv_bootstrap_profile_normalize <- function(profile) { + + if (is.null(profile) || profile %in% c("", "default")) + return(NULL) + + profile + + } + + # load the renv profile, if any + renv_bootstrap_profile_load(project) # construct path to library root root <- renv_bootstrap_library_root(project) # construct library prefix for platform - prefix <- renv_bootstrap_prefix() + prefix <- renv_bootstrap_platform_prefix() # construct full libpath libpath <- file.path(root, prefix) From 0b91db2770a8d804544bf3a7b89af64540398400 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Tue, 28 Sep 2021 11:21:29 -0400 Subject: [PATCH 006/979] Update to latest v5 version of SeuratObject --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0a41fa9ae..028b44599 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -67,7 +67,7 @@ Imports: scales, scattermore (>= 0.7), sctransform (>= 0.3.2), - SeuratObject (>= 4.0.2), + SeuratObject (>= 4.9.9.9001), shiny, spatstat.core, spatstat.geom, From 1fda9b40bf395d48d6c2c414b6da6aa417aabb5e Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 29 Sep 2021 02:43:09 -0400 Subject: [PATCH 007/979] Turn v3 FindVariableFeatures default method to V3Matrix method --- R/preprocessing.R | 3 ++- man/FindVariableFeatures.Rd | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index 75a8eb9b8..09485de14 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -1911,9 +1911,10 @@ SubsetByBarcodeInflections <- function(object) { #' #' @rdname FindVariableFeatures #' @concept preprocessing +#' @method FindVariableFeatures V3Matrix #' @export #' -FindVariableFeatures.default <- function( +FindVariableFeatures.V3Matrix <- function( object, selection.method = "vst", loess.span = 0.3, diff --git a/man/FindVariableFeatures.Rd b/man/FindVariableFeatures.Rd index 3db62df0d..8ec4f4859 100644 --- a/man/FindVariableFeatures.Rd +++ b/man/FindVariableFeatures.Rd @@ -3,7 +3,7 @@ \name{FindVariableFeatures} \alias{FindVariableFeatures} \alias{FindVariableGenes} -\alias{FindVariableFeatures.default} +\alias{FindVariableFeatures.V3Matrix} \alias{FindVariableFeatures.Assay} \alias{FindVariableFeatures.SCTAssay} \alias{FindVariableFeatures.Seurat} @@ -11,7 +11,7 @@ \usage{ FindVariableFeatures(object, ...) -\method{FindVariableFeatures}{default}( +\method{FindVariableFeatures}{V3Matrix}( object, selection.method = "vst", loess.span = 0.3, From e0721ddf9f9419010826e239edc00ca49ed15f61 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 29 Sep 2021 02:44:25 -0400 Subject: [PATCH 008/979] Add VST function for v5 FindVariableFeatures VST function is backend and directionality-agnostic Designed to be called by future FindVariableFeatures.default --- NAMESPACE | 1 + R/preprocessing5.R | 292 +++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 284 insertions(+), 9 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 1cbacb630..ade63d706 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -28,6 +28,7 @@ S3method(FindSpatiallyVariableFeatures,default) S3method(FindVariableFeatures,Assay) S3method(FindVariableFeatures,SCTAssay) S3method(FindVariableFeatures,Seurat) +S3method(FindVariableFeatures,V3Matrix) S3method(FindVariableFeatures,default) S3method(FoldChange,Assay) S3method(FoldChange,DimReduc) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 890f79d7f..00447c9a9 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1,9 +1,13 @@ #' @include generics.R #' @include preprocessing.R #' @importFrom methods slot +#' @importFrom SeuratObject .MARGIN .SparseSlots +#' @importFrom utils txtProgressBar setTxtProgressBar #' NULL +hvf.methods <- list() + #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Generics #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -22,7 +26,24 @@ LogNormalize5 <- function(data, scale.factor = 1e4, margin = 2L, verbose = TRUE) # Methods for Seurat-defined generics #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -#' @importFrom utils setTxtProgressBar txtProgressBar +#' @method FindVariableFeatures default +#' @export +#' +FindVariableFeatures.default <- function( + object, + method = 'vst', + ... +) { + .NotYetImplemented() + if (is.character(x = method)) { + method <- method[1L] + method <- match.arg(arg = method, choices = names(x = hvf.methods)) + } + if (!is.function(x = method)) { + stop("'method' must be a function for calculating highly variable features") + } +} + #' #' @method LogNormalize5 default #' @export @@ -60,7 +81,7 @@ LogNormalize5.default <- function( return(data) } -#' @importFrom SeuratObject IsSparse .MARGIN +#' @importFrom SeuratObject IsSparse #' #' @method NormalizeData default #' @export @@ -82,7 +103,7 @@ NormalizeData.default <- function( EXPR = method, 'LogNormalize' = { if (IsSparse(x = object) && .MARGIN(object = object) == cmargin) { - SparseNormalize( + .SparseNormalize( data = object, scale.factor = scale.factor, verbose = verbose @@ -100,7 +121,7 @@ NormalizeData.default <- function( return(normalized) } -#' @importFrom SeuratObject .MARGIN Cells DefaultLayer DefaultLayer<- Features +#' @importFrom SeuratObject Cells DefaultLayer DefaultLayer<- Features #' LayerData LayerData<- #' #' @method NormalizeData StdAssay @@ -122,7 +143,13 @@ NormalizeData.StdAssay <- function( if (save == DefaultLayer(object = object)) { default <- FALSE } - data <- NormalizeData( + data <- LayerData(object = object, layer = layer, fast = TRUE) + f <- if (inherits(x = data, what = 'V3Matrix')) { + NormalizeData.default + } else { + NormalizeData + } + data <- f( object = LayerData(object = object, layer = layer, fast = TRUE), method = method, scale.factor = 1e4, @@ -153,10 +180,80 @@ NormalizeData.StdAssay <- function( # Internal #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -#' @importFrom SeuratObject .SparseSlots -#' @importFrom utils txtProgressBar setTxtProgressBar -#' -SparseNormalize <- function(data, scale.factor = 1e4, verbose = TRUE) { +.FeatureVar <- function( + data, + mu, + fmargin = 1L, + standardize = FALSE, + sd = NULL, + clip = NULL, + verbose = TRUE +) { + fmargin <- SeuratObject:::.CheckFmargin(fmargin = fmargin) + ncells <- dim(x = data)[-fmargin] + nfeatures <- dim(x = data)[fmargin] + fvars <- vector(mode = 'numeric', length = nfeatures) + if (length(x = mu) != nfeatures) { + stop("Wrong number of feature means provided") + } + if (isTRUE(x = standardize)) { + clip <- clip %||% sqrt(x = ncells) + if (length(x = sd) != nfeatures) { + stop("Wrong number of standard deviations") + } + } + if (isTRUE(x = verbose)) { + msg <- 'Calculating feature variances' + if (isTRUE(x = standardize)) { + msg <- paste(msg, 'of standardized and clipped values') + } + message(msg) + pb <- txtProgressBar(style = 3, file = stderr()) + } + for (i in seq_len(length.out = nfeatures)) { + if (isTRUE(x = standardize) && sd[i] == 0) { + if (isTRUE(x = verbose)) { + setTxtProgressBar(pb = pb, value = i / nfeatures) + } + next + } + x <- if (fmargin == 1L) { + data[i, , drop = TRUE] + } else { + data[, i, drop = TRUE] + } + x <- x - mu[i] + if (isTRUE(x = standardize)) { + x <- x / sd[i] + x[x > clip] <- clip + } + fvars[i] <- sum(x ^ 2) / (ncells - 1L) + if (isTRUE(x = verbose)) { + setTxtProgressBar(pb = pb, value = i / nfeatures) + } + } + if (isTRUE(x = verbose)) { + close(con = pb) + } + return(fvars) +} + +.Mean <- function(data, margin = 1L) { + nout <- dim(x = data)[margin] + nobs <- dim(x = data)[-margin] + means <- vector(mode = 'numeric', length = nout) + for (i in seq_len(length.out = nout)) { + x <- if (margin == 1L) { + data[i, , drop = TRUE] + } else { + data[, i, drop = TRUE] + } + means[i] <- sum(x) / nobs + } + return(means) +} + +.SparseNormalize <- function(data, scale.factor = 1e4, verbose = TRUE) { entryname <- .SparseSlots(x = data, type = 'entries') p <- slot(object = data, name = .SparseSlots(x = data, type = 'pointers')) if (p[1L] == 0) { @@ -182,6 +279,183 @@ SparseNormalize <- function(data, scale.factor = 1e4, verbose = TRUE) { return(data) } +#' @param data A sparse matrix +#' @param mu A vector of feature means +#' @param fmargin Feature margin +#' @param standardize Standardize matrix rows prior to calculating variances +#' @param sd If standardizing, a vector of standard deviations to +#' standardize with +#' @param clip Set upper bound for standardized variances; defaults to the +#' square root of the number of cells +#' @param verbose Show progress updates +#' +#' @keywords internal +#' +#' @noRd +#' +.SparseFeatureVar <- function( + data, + mu, + fmargin = 1L, + standardize = FALSE, + sd = NULL, + clip = NULL, + verbose = TRUE +) { + fmargin <- SeuratObject:::.CheckFmargin(fmargin = fmargin) + if (fmargin != .MARGIN(object = data)) { + data <- t(x = data) + fmargin <- .MARGIN(object = data) + } + entryname <- .SparseSlots(x = data, type = 'entries') + p <- slot(object = data, name = .SparseSlots(x = data, type = 'pointers')) + if (p[1L] == 0) { + p <- p + 1L + } + np <- length(x = p) - 1L + ncells <- dim(x = data)[-fmargin] + fvars <- vector(mode = 'numeric', length = np) + if (length(x = mu) != np) { + stop("Wrong number of feature means provided") + } + if (isTRUE(x = standardize)) { + clip <- clip %||% sqrt(x = ncells) + if (length(x = sd) != np) { + stop("Wrong number of standard deviations provided") + } + } + if (isTRUE(x = verbose)) { + msg <- 'Calculating feature variances' + if (isTRUE(x = standardize)) { + msg <- paste(msg, 'of standardized and clipped values') + } + message(msg) + pb <- txtProgressBar(style = 3, file = stderr()) + } + for (i in seq_len(length.out = np)) { + if (isTRUE(x = standardize) && sd[i] == 0) { + if (isTRUE(x = verbose)) { + setTxtProgressBar(pb = pb, value = i / np) + } + next + } + idx <- seq.int(from = p[i], to = p[i + 1L] - 1L) + xidx <- slot(object = data, name = entryname)[idx] - mu[i] + nzero <- ncells - length(x = xidx) + csum <- nzero * ifelse( + test = isTRUE(x = standardize), + yes = ((0 - mu[i]) / sd[i]) ^ 2, + no = mu[i] ^ 2 + ) + if (isTRUE(x = standardize)) { + xidx <- xidx / sd[i] + xidx[xidx > clip] <- clip + } + fsum <- sum(xidx ^ 2) + csum + fvars[i] <- fsum / (ncells - 1L) + if (isTRUE(x = verbose)) { + setTxtProgressBar(pb = pb, value = i / np) + } + } + if (isTRUE(x = verbose)) { + close(con = pb) + } + return(fvars) +} + +.SparseMean <- function(data, margin = 1L) { + margin <- SeuratObject:::.CheckFmargin(fmargin = margin) + if (margin != .MARGIN(object = data)) { + data <- t(x = data) + margin <- .MARGIN(object = data) + } + entryname <- .SparseSlots(x = data, type = 'entries') + p <- slot(object = data, name = .SparseSlots(x = data, type = 'pointers')) + if (p[1L] == 0) { + p <- p + 1L + } + np <- length(x = p) - 1L + nobs <- dim(x = data)[-margin] + means <- vector(mode = 'numeric', length = np) + for (i in seq_len(length.out = np)) { + idx <- seq.int(from = p[i], to = p[i + 1L] - 1L) + means[i] <- sum(slot(object = data, name = entryname)[idx]) / nobs + } + return(means) +} + +#' @inheritParams stats::loess +#' @param data A matrix +#' @param fmargin Feature margin +#' @param nfeatures Number of features to select +#' @param clip After standardization values larger than \code{clip} will be set +#' to \code{clip}; default is \code{NULL} which sets this value to the square +#' root of the number of cells +#' +#' @importFrom stats loess +#' +#' @keywords internal +#' +#' @noRd +#' +VST <- function( + data, + fmargin = 1L, + nselect = 2000L, + span = 0.3, + clip = NULL, + verbose = TRUE, + ... +) { + fmargin <- SeuratObject:::.CheckFmargin(fmargin = fmargin) + nfeatures <- dim(x = data)[fmargin] + if (IsSparse(x = data)) { + mean.func <- .SparseMean + var.func <- .SparseFeatureVar + } else { + mean.func <- .Mean + var.func <- .FeatureVar + } + hvf.info <- SeuratObject:::EmptyDF(n = nfeatures) + hvf.info$mean <- mean.func(data = data, margin = 1L) + hvf.info$variance <- var.func( + data = data, + mu = hvf.info$mean, + fmargin = fmargin, + verbose = verbose + ) + hvf.info$variance.expected <- 0L + not.const <- hvf.info$variance > 0 + fit <- loess( + formula = log10(x = variance) ~ log10(x = mean), + data = hvf.info[not.const, , drop = TRUE], + span = span + ) + hvf.info$variance.expected[not.const] <- 10 ^ fit$fitted + hvf.info$variance.standardized <- var.func( + data = data, + mu = hvf.info$mean, + standardize = TRUE, + sd = sqrt(x = hvf.info$variance.expected), + clip = clip, + verbose = verbose + ) + hvf.info$variable <- FALSE + hvf.info$rank <- NA + vs <- hvf.info$variance.standardized + vs[vs == 0] <- NA + vf <- head( + x = order(hvf.info$variance.standardized, decreasing = TRUE), + n = nselect + ) + hvf.info$variable[vf] <- TRUE + hvf.info$rank[vf] <- seq_along(along.with = vf) + # colnames(x = hvf.info) <- paste0('vst.', colnames(x = hvf.info)) + return(hvf.info) +} + +hvf.methods$vst <- VST + #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # S4 Methods #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% From 0eddd0796d7169ae30c46cb9081c7655a12cad0f Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 29 Sep 2021 02:45:42 -0400 Subject: [PATCH 009/979] Update renv lockfile --- renv.lock | 52 +++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 5 deletions(-) diff --git a/renv.lock b/renv.lock index 78249d425..900952632 100644 --- a/renv.lock +++ b/renv.lock @@ -8,6 +8,9 @@ } ] }, + "Bioconductor": { + "Version": "3.13" + }, "Packages": { "BH": { "Package": "BH", @@ -16,6 +19,19 @@ "Repository": "CRAN", "Hash": "e4c04affc2cac20c8fec18385cd14691" }, + "BiocGenerics": { + "Package": "BiocGenerics", + "Version": "0.38.0", + "Source": "Bioconductor", + "Hash": "de5e346fed0fc44a0424a0531cf5d12d" + }, + "BiocManager": { + "Package": "BiocManager", + "Version": "1.30.16", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "2fdca0877debdd4668190832cdee4c31" + }, "FNN": { "Package": "FNN", "Version": "1.1.3", @@ -23,6 +39,12 @@ "Repository": "CRAN", "Hash": "b56998fff55e4a4b4860ad6e8c67e0f9" }, + "IRanges": { + "Package": "IRanges", + "Version": "2.26.0", + "Source": "Bioconductor", + "Hash": "7859b18fedba59e99467df40b42e3553" + }, "KernSmooth": { "Package": "KernSmooth", "Version": "2.23-20", @@ -121,17 +143,23 @@ "Repository": "CRAN", "Hash": "f153432c4ca15b937ccfaa40f167c892" }, + "S4Vectors": { + "Package": "S4Vectors", + "Version": "0.30.0", + "Source": "Bioconductor", + "Hash": "a750488825efca8e08a30e8157821b9b" + }, "SeuratObject": { "Package": "SeuratObject", - "Version": "4.9.9.9000", + "Version": "4.9.9.9001", "Source": "GitHub", "RemoteType": "github", - "RemoteHost": "api.github.com", - "RemoteRepo": "seurat-object", "RemoteUsername": "mojaveazure", + "RemoteRepo": "seurat-object", "RemoteRef": "feat/standard", - "RemoteSha": "e9029cdf82d0d0c2811cc84931f27aa7f5ff3c4c", - "Hash": "86d79ca1e6507e84353cb34cfe2d3811" + "RemoteSha": "a7a542f503d3ed3f0836170bdaf89e4d278d30ab", + "RemoteHost": "api.github.com", + "Hash": "3a2f1268f710da71f5fc8d5419b75778" }, "abind": { "Package": "abind", @@ -819,6 +847,13 @@ "Repository": "CRAN", "Hash": "515f341d3affe0de9e4a7f762efb0456" }, + "rmarkdown": { + "Package": "rmarkdown", + "Version": "2.11", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "320017b52d05a943981272b295750388" + }, "rpart": { "Package": "rpart", "Version": "4.1-15", @@ -994,6 +1029,13 @@ "Repository": "CRAN", "Hash": "7243004a708d06d4716717fa1ff5b2fe" }, + "tinytex": { + "Package": "tinytex", + "Version": "0.33", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "6e0ad90ac5669e35d5456cb61b295acb" + }, "utf8": { "Package": "utf8", "Version": "1.2.2", From b1d957901dd05d699e9f63a249452db64eceda97 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 29 Sep 2021 13:24:59 -0400 Subject: [PATCH 010/979] Add FindVariableFeatures.StdAssay --- NAMESPACE | 1 + R/preprocessing5.R | 74 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 74 insertions(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index ade63d706..d4c6af611 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -28,6 +28,7 @@ S3method(FindSpatiallyVariableFeatures,default) S3method(FindVariableFeatures,Assay) S3method(FindVariableFeatures,SCTAssay) S3method(FindVariableFeatures,Seurat) +S3method(FindVariableFeatures,StdAssay) S3method(FindVariableFeatures,V3Matrix) S3method(FindVariableFeatures,default) S3method(FoldChange,Assay) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 00447c9a9..604c3d14c 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -32,16 +32,88 @@ LogNormalize5 <- function(data, scale.factor = 1e4, margin = 2L, verbose = TRUE) FindVariableFeatures.default <- function( object, method = 'vst', + nselect = 2000L, + fmargin = 1L, + verbose = TRUE, ... ) { - .NotYetImplemented() if (is.character(x = method)) { method <- method[1L] method <- match.arg(arg = method, choices = names(x = hvf.methods)) + method <- hvf.methods[[method]] } if (!is.function(x = method)) { stop("'method' must be a function for calculating highly variable features") } + return(method( + data = object, + fmargin = fmargin, + nselect = nselect, + verbose = verbose, + ... + )) +} + +#' @importFrom SeuratObject DefaultLayer Features Key +#' +#' @method FindVariableFeatures StdAssay +#' @export +#' +FindVariableFeatures.StdAssay <- function( + object, + method = 'vst', + nselect = 2000L, + layer = NULL, + span = 0.3, + clip = NULL, + key = NULL, + verbose = TRUE, + ... +) { + layer <- layer %||% DefaultLayer(object = object) + data <- LayerData(object = object, layer = layer, fast = TRUE) + f <- if (inherits(x = data, what = 'V3Matrix')) { + FindVariableFeatures.default + } else { + FindVariableFeatures + } + hvf.info <- f( + object = data, + method = method, + nselect = nselect, + fmargin = .MARGIN(object = object, type = 'features'), + span = span, + clip = clip, + verbose = verbose, + ... + ) + rownames(x = hvf.info) <- Features(x = object, layer = layer) + pattern <- '^[[:alnum:]]+_' + if (!any(grepl(pattern = pattern, x = colnames(x = hvf.info)))) { + key <- key %||% if (is.character(x = method)) { + if (grepl(pattern = '\\.', x = method)) { + x <- vapply( + X = unlist(x = strsplit(x = method, split = '\\.')), + FUN = substr, + FUN.VALUE = character(length = 1L), + start = 1L, + stop = 1L, + USE.NAMES = FALSE + ) + paste(x, collapse = '') + } else { + method + } + } else { + SeuratObject:::RandomKey() + } + key <- suppressWarnings(expr = Key(object = key)) + colnames(x = hvf.info) <- paste0(key, colnames(x = hvf.info)) + } #else if (!all(grepl(pattern = pattern, x = colnames(x = hvf.info)))) { + # '' + # } + object[[]] <- hvf.info + return(object) } #' From 253892cab4878abb3815cb07a99ee5bbd200e935 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 29 Sep 2021 13:25:38 -0400 Subject: [PATCH 011/979] bump v5 version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 028b44599..90741eff2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.0.4.9002 -Date: 2021-09-10 +Version: 4.0.4.9003 +Date: 2021-09-29 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From 86fc7dd9f1003004fd6dfa7fc011273fbd4b9ab9 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 9 Dec 2021 13:59:12 -0500 Subject: [PATCH 012/979] add bridge integration --- R/integration.R | 356 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 356 insertions(+) diff --git a/R/integration.R b/R/integration.R index 62150ed1b..525df4f07 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5476,3 +5476,359 @@ ValidateParams_IntegrateEmbeddings_TransferAnchors <- function( } } } + + + + + +BridgeCellsRepresentation <- function(object.list, + bridge.object, + object.reduction.list, + bridge.reduction.list, + dims.list, + smooth.by = NULL, + laplacian.reduction = NULL, + laplacian.dims = NULL, + new.assay.name = "Bridge", + return.all.assays = FALSE, + l2.norm = TRUE, + do.center = FALSE, + bridge.cells = NULL, + verbose = TRUE +) { + + if (!is.null(laplacian.reduction) & !is.null(smooth.by)) { + stop("when laplacian.reduction is set, smooth.by should set to NULL") + } + bridge.object[['ident']] <- Idents(object = bridge.object) + my.lapply <- ifelse( + test = verbose && nbrOfWorkers() == 1, + yes = pblapply, + no = future_lapply + ) + if (!is.null(bridge.cells)) { + bridge.object <- subset(bridge.object, cells = bridge.cells) + } + if (verbose) { + message("Constructing Bridge-cells representation") + } + object.list <- my.lapply( + X = 1:length(x = object.list), + FUN = function(x) { + SA.inv <- MASS::ginv( + X = Embeddings( + object = bridge.object, + reduction = bridge.reduction.list[[x]] + )[ ,dims.list[[x]]] + ) + if (is.null(smooth.by)) { + if (!is.null(laplacian.reduction)) { + laplacian.dims <- laplacian.dims %||% 1:ncol(bridge.object[[laplacian.reduction]]) + lap.vector <- Embeddings(bridge.object[[laplacian.reduction]])[,laplacian.dims] + X <- Embeddings( + object = object.list[[x]], + reduction = object.reduction.list[[x]] + )[, 1:length(x = dims.list[[x]])] %*% (SA.inv %*% lap.vector) + + } else { + X <- Embeddings( + object = object.list[[x]], + reduction = object.reduction.list[[x]] + )[, 1:length(x = dims.list[[x]])] %*% SA.inv + colnames(X) <- Cells(bridge.object) + } + + } else { + smooth.matrix <- as.sparse( + x = fastDummies::dummy_cols( + bridge.object[[ smooth.by ]] + )[, -1] + ) + colnames(smooth.matrix) <- gsub( pattern = paste0(smooth.by,"_"), + replacement = "", + x = colnames(smooth.matrix) + ) + X <- Embeddings( + object = object.list[[x]], + reduction = object.reduction.list[[x]] + )[, 1:length( dims.list[[x]])] %*% as.matrix(SA.inv %*% smooth.matrix) + } + if (l2.norm) { + X <- L2Norm(mat = X, MARGIN = 1) + } + colnames(x = X) <- paste0('bridge_', colnames(x = X)) + suppressWarnings(object.list[[x]][[new.assay.name]] <- CreateAssayObject(data = t(X))) + object.list[[x]][[new.assay.name]]@misc$SA.inv <- SA.inv + DefaultAssay(object.list[[x]]) <- new.assay.name + VariableFeatures(object = object.list[[x]]) <- rownames(object.list[[x]]) + object.list[[x]] <- ScaleData( + object = object.list[[x]], + do.scale = FALSE, + do.center = do.center, + verbose = FALSE + ) + return (object.list[[x]]) + } + ) + if (!return.all.assays) { + object.list <- my.lapply( + X = object.list, + FUN = function(x) { + x <- DietSeurat(object = x, assay = new.assay.name, scale.data = TRUE) + return(x) + } + ) + } + return(object.list) +} + + +LaplacianGraphDecomposition <- function(A, ndim = 50, type = "SM",verbose = FALSE) { + ## graph should not be pruned + n <- nrow(A) + # A lot faster (order of magnitude when n = 1000) + Dsq <- sqrt(Matrix::colSums(A)) + L <- -Matrix::t(A / Dsq) / Dsq + Matrix::diag(L) <- 1 + Matrix::diag(L) + + k <- ndim + 1 + opt <- list(tol = 1e-4) + if (type == "SM"){ + res <- tryCatch(RSpectra::eigs_sym(L, k = k, which = "SM", opt = opt), + error = function(c) { + NULL + } + ) + } + if (type == "LM"){ + res <- tryCatch(RSpectra::eigs_sym(L, + k = k, which = "LM", + opt = opt + ), + error = function(c) { + NULL + } + ) + } + + vec_indices <- rev(order(res$values, decreasing = TRUE)[1:ndim]) + eigen.output <- list(eigen_vector = as.matrix(Re(res$vectors[, vec_indices])), + eigen_value = res$values[vec_indices]) + return( eigen.output) +} + + +NNtoGraph <- function(nn.object, ncol.nn = NULL, col.cells = NULL) { + + select_nn <- nn.object@nn.idx + ncol.nn <- ncol.nn %||% nrow(x = select_nn) + col.cells <- col.cells %||% nn.object@cell.names + k.nn <- ncol(select_nn) + j <- as.numeric(x = t(x = select_nn )) + i <- ((1:length(x = j)) - 1) %/% k.nn + 1 + nn.matrix <- sparseMatrix( + i = i, + j = j, + x = 1, + dims = c(nrow(x = select_nn), ncol.nn) + ) + + rownames(x = nn.matrix) <- nn.object@cell.names + colnames(x = nn.matrix) <- col.cells + return( nn.matrix) +} + +FindDirectAnchor <- function( + object.list, + assay = "Bridge", + slot = "data", + reduction = NULL, + k.anchor = 20, + k.score = 50, + verbose = TRUE +) { + reduction.name <-reduction %||% paste0(assay, ".reduc") + object.list <- lapply(object.list, function(x) { + if (is.null(reduction)) { + x[[reduction.name]] <- CreateDimReducObject( + embeddings = t(GetAssayData( + object = x, + slot = slot, + assay = assay + )), + key = "L_", + assay = assay + ) + } + DefaultAssay(x) <- assay + x <- DietSeurat(x, assays = assay, dimreducs = reduction.name ) + return(x) + }) + obj.both <- merge(object.list[[1]], object.list[[2]], merge.dr = reduction.name) + + objects.ncell <- sapply(X = object.list, FUN = function(x) dim(x = x)[2]) + offsets <- as.vector(x = cumsum(x = c(0, objects.ncell)))[1:length(x = object.list)] + + anchors <- FindAnchors(object.pair = obj.both, + assay = DefaultAssay(obj.both), + slot = 'data', + cells1 = colnames(object.list[[1]]), + cells2 = colnames(object.list[[2]]), + internal.neighbors = NULL, + reduction = reduction.name, + k.anchor = k.anchor, + k.score = k.score, + dims = 1:ncol(obj.both[[reduction.name]]), + k.filter = NA, + verbose = verbose + ) + + anchors[, 1] <- anchors[, 1] + offsets[1] + anchors[, 2] <- anchors[, 2] + offsets[2] + + # determine all anchors + all.anchors <- anchors + all.anchors <- rbind(all.anchors, all.anchors[, c(2, 1, 3)]) + all.anchors <- AddDatasetID(anchor.df = all.anchors, offsets = offsets, obj.lengths = objects.ncell) + + + + command <- LogSeuratCommand(object = object.list[[1]], return.command = TRUE) + reference <- NULL + anchor.features <- rownames( obj.both ) + anchor.set <- new(Class = "IntegrationAnchorSet", + object.list = object.list, + reference.objects = reference %||% seq_along(object.list), + anchors = all.anchors, + offsets = offsets, + anchor.features = anchor.features, + command = command + ) + return(anchor.set) +} + + +RunBridgeIntegration <- function(object.list, + bridge.object, + object.reduction.list, + bridge.reduction.list, + dims.list, + laplacian.reduction = "lap", + laplacian.dims = NULL, + smooth.by = NULL, + new.assay.name = "Bridge", + integrated.reduction.name = "bridge_dr", + verbose = TRUE +) { + if (!is.null(smooth.by)) { + bridge.method <- "bridge clusters" + } else if (!is.null(laplacian.reduction)) { + bridge.method <- "bridge graph" + } else { + bridge.method <- "bridge cells" + } + if (verbose) { + switch( + EXPR = bridge.method, + 'bridge clusters' = { + message("Transform cells to bridge clusters space") + }, + "bridge graph" = { + message('Transform cells to bridge graph laplacian space') + }, + "bridge cells" = { + message('Transform cells to bridge cells space') + } + ) + } + + object.list <- BridgeCellsRepresentation(object.list = object.list, + bridge.object = bridge.object, + object.reduction.list = object.reduction.list, + bridge.reduction.list = bridge.reduction.list, + dims.list = dims.list, + smooth.by = smooth.by, + new.assay.name = new.assay.name, + laplacian.reduction = laplacian.reduction, + laplacian.dims = laplacian.dims, + verbose = verbose + ) + if (verbose) { + message("Integrating Bridge space") + } + + anchor <- FindDirectAnchor( + object.list = object.list , + slot = "data", + assay = new.assay.name + ) + anchor.emb <- merge( + anchor@object.list[[1]], + anchor@object.list[[2]], + merge.dr = paste0(new.assay.name, ".reduc") + ) + object.merge <- IntegrateEmbeddings(anchorset = anchor, + reductions = anchor.emb[[paste0(new.assay.name, ".reduc")]], + new.reduction.name = integrated.reduction.name) + return(object.merge) +} + + +TranferLablesNN <- function( + nn.object, + reference.object, + group.by = NULL +){ + k.nn <- ncol(nn.object@nn.idx) + select_nn <- nn.object@nn.idx + j <- as.numeric(x = t(x = select_nn )) + i <- ((1:length(x = j)) - 1) %/% k.nn + 1 + + nn.matrix <- sparseMatrix( + i = i, + j = j, + x = 1, + dims = c(nrow(select_nn), ncol(x = reference.object)) + ) + + + reference.labels.matrix <- as.sparse( + x = fastDummies::dummy_cols( + reference.object[[group.by]] + )[, -1] + ) + colnames(reference.labels.matrix) <- gsub( paste0(group.by, "_"), "",colnames(reference.labels.matrix)) + + query.label.mat <- nn.matrix %*%reference.labels.matrix + query.label.mat <- query.label.mat/k.nn + rownames(query.label.mat) <- nn.object@cell.names + #colnames(query.label) <- gsub(".data_", "", colnames(query.label) ) + prediction.max <- apply(X = query.label.mat, MARGIN = 1, FUN = which.max) + query.label <- colnames(query.label.mat)[prediction.max] + query.label.score <- apply(X = query.label.mat, MARGIN = 1, FUN = max) + + return( list(labels = query.label,scores = query.label.score, prediction.mat = query.label.mat)) +} + + + + +RunGraphLaplacian <- function( + object, + graph, + reduction.name = "lap", + reduction.key ="LAP_", + n = 50 +) { + lap <- LaplacianGraphDecomposition(A = object[[graph]], ndim = n) + rownames(lap$eigen_vector) <- Cells(object) + colnames(lap$eigen_vector) <- paste0(reduction.key, 1:n ) + object[[reduction.name]] <- CreateDimReducObject(embeddings = lap$eigen_vector, + key = reduction.key, + assay = DefaultAssay(object), + stdev = lap$eigen_value + ) + return(object) +} + + \ No newline at end of file From 6f86b2754cceb16337b98d827758dbfaebf79f5c Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 9 Dec 2021 14:26:29 -0500 Subject: [PATCH 013/979] cca bridge anchor --- R/integration.R | 69 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 50 insertions(+), 19 deletions(-) diff --git a/R/integration.R b/R/integration.R index 525df4f07..84dab34c1 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5714,6 +5714,7 @@ RunBridgeIntegration <- function(object.list, bridge.reduction.list, dims.list, laplacian.reduction = "lap", + anchor.type = c("direct", "cca")[1], laplacian.dims = NULL, smooth.by = NULL, new.assay.name = "Bridge", @@ -5742,34 +5743,64 @@ RunBridgeIntegration <- function(object.list, ) } - object.list <- BridgeCellsRepresentation(object.list = object.list, - bridge.object = bridge.object, - object.reduction.list = object.reduction.list, - bridge.reduction.list = bridge.reduction.list, - dims.list = dims.list, - smooth.by = smooth.by, - new.assay.name = new.assay.name, - laplacian.reduction = laplacian.reduction, - laplacian.dims = laplacian.dims, - verbose = verbose + object.list <- BridgeCellsRepresentation( + object.list = object.list, + bridge.object = bridge.object, + object.reduction.list = object.reduction.list, + bridge.reduction.list = bridge.reduction.list, + dims.list = dims.list, + smooth.by = smooth.by, + new.assay.name = new.assay.name, + laplacian.reduction = laplacian.reduction, + laplacian.dims = laplacian.dims, + verbose = verbose ) if (verbose) { message("Integrating Bridge space") } + bridge.reduction.name <- paste0(new.assay.name, ".reduc") + object.list <- lapply( + X = object.list, + FUN = function(x) { + x[[bridge.reduction.name]] <- CreateDimReducObject( + embeddings = t(GetAssayData( + object = x, + slot = "data", + assay = new.assay.name + )), + key = "L_", + assay = new.assay.name + ) + return(x) + } + ) + merge.emb <- merge( + object.list[[1]], + object.list[[2]], + merge.dr = paste0(new.assay.name, ".reduc") + ) + + if (anchor.type == "direct") { anchor <- FindDirectAnchor( object.list = object.list , - slot = "data", + slot = "data", + reduction = bridge.reduction.name, assay = new.assay.name ) - anchor.emb <- merge( - anchor@object.list[[1]], - anchor@object.list[[2]], - merge.dr = paste0(new.assay.name, ".reduc") - ) - object.merge <- IntegrateEmbeddings(anchorset = anchor, - reductions = anchor.emb[[paste0(new.assay.name, ".reduc")]], - new.reduction.name = integrated.reduction.name) + } else if (anchor.type == "cca") { + anchor <- FindIntegrationAnchors(object.list = object.list, + k.filter = NA, + scale = FALSE, + verbose = verbose) + } + + object.merge <- IntegrateEmbeddings( + anchorset = anchor, + reductions = merge.emb[[paste0(new.assay.name, ".reduc")]], + new.reduction.name = integrated.reduction.name + ) + return(object.merge) } From f1089ca4547126fb86c008705c9708e0ef8c00f6 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 9 Dec 2021 16:33:10 -0500 Subject: [PATCH 014/979] docu TranferLablesNN --- R/integration.R | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/R/integration.R b/R/integration.R index 84dab34c1..533916083 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5804,17 +5804,24 @@ RunBridgeIntegration <- function(object.list, return(object.merge) } - +# Helper function to transfer labels based on neighbors object +# @param nn.object the query neighbors object +# @param reference.object the reference seurat object +# @param group.by A vector of variables to group cells by +# @return Returns a list for predicted labels, prediction score and matrix +#' @importFrom Matrix sparseMatrix +#' @importFrom fastDummies dummy_cols +#' @importFrom Matrix rowMeans +#' TranferLablesNN <- function( nn.object, reference.object, group.by = NULL ){ - k.nn <- ncol(nn.object@nn.idx) - select_nn <- nn.object@nn.idx + select_nn <- Indices(nn.object) + k.nn <- ncol(select_nn) j <- as.numeric(x = t(x = select_nn )) i <- ((1:length(x = j)) - 1) %/% k.nn + 1 - nn.matrix <- sparseMatrix( i = i, j = j, @@ -5822,23 +5829,29 @@ TranferLablesNN <- function( dims = c(nrow(select_nn), ncol(x = reference.object)) ) - reference.labels.matrix <- as.sparse( - x = fastDummies::dummy_cols( + x = dummy_cols( reference.object[[group.by]] )[, -1] ) - colnames(reference.labels.matrix) <- gsub( paste0(group.by, "_"), "",colnames(reference.labels.matrix)) + colnames(reference.labels.matrix) <- gsub( + pattern = paste0(group.by, "_"), + replacement = "", + x = colnames(reference.labels.matrix) + ) - query.label.mat <- nn.matrix %*%reference.labels.matrix + query.label.mat <- nn.matrix %*% reference.labels.matrix query.label.mat <- query.label.mat/k.nn - rownames(query.label.mat) <- nn.object@cell.names - #colnames(query.label) <- gsub(".data_", "", colnames(query.label) ) + rownames(x = query.label.mat) <- Cells(nn.object) prediction.max <- apply(X = query.label.mat, MARGIN = 1, FUN = which.max) - query.label <- colnames(query.label.mat)[prediction.max] - query.label.score <- apply(X = query.label.mat, MARGIN = 1, FUN = max) + query.label <- colnames(x = query.label.mat)[prediction.max] + query.label.score <- apply(X = query.label.mat, MARGIN = 1, FUN = max) - return( list(labels = query.label,scores = query.label.score, prediction.mat = query.label.mat)) + output.list <- list(labels = query.label, + scores = query.label.score, + prediction.mat = query.label.mat + ) + return(output.list) } From 3e0d63e289cf71e9ebae3e437340da752f01099a Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 9 Dec 2021 17:45:09 -0500 Subject: [PATCH 015/979] add RunGraphLaplacian --- NAMESPACE | 3 ++ R/generics.R | 20 ++++++++++++ R/integration.R | 84 +++++++++++++++++++++++-------------------------- 3 files changed, 62 insertions(+), 45 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index c847f2c78..2c108eabb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -60,6 +60,8 @@ S3method(RenameCells,SlideSeq) S3method(RenameCells,VisiumV1) S3method(RunCCA,Seurat) S3method(RunCCA,default) +S3method(RunGraphLaplacian,Seurat) +S3method(RunGraphLaplacian,default) S3method(RunICA,Assay) S3method(RunICA,Seurat) S3method(RunICA,default) @@ -617,6 +619,7 @@ importFrom(reticulate,py_module_available) importFrom(reticulate,py_set_seed) importFrom(rlang,"!!") importFrom(rlang,as_label) +importFrom(RSpectra, eigs_sym) importFrom(scales,brewer_pal) importFrom(scales,hue_pal) importFrom(scales,rescale) diff --git a/R/generics.R b/R/generics.R index ba893f0df..1f432b350 100644 --- a/R/generics.R +++ b/R/generics.R @@ -392,6 +392,26 @@ RunCCA <- function(object1, object2, ...) { UseMethod(generic = 'RunCCA', object = object1) } + +#' Run Graph Laplacian Eigendecomposition +#' +#' Run a graph dimensionality reductionxxxxx +#' xxxxxx +#' @param object An object +#' @param ... Arguments passed to other methods and IRLBA +#' +#' @return Returns Seurat object with the SPCA calculation stored in the reductions slot +#' @export +#' +#' @rdname RunGraphLaplacian +#' @export RunGraphLaplacian +#' + +RunGraphLaplacian <- function(object, ...) { + UseMethod(generic = 'RunGraphLaplacian', object = object) +} + + #' Run Independent Component Analysis on gene expression #' #' Run fastica algorithm from the ica package for ICA dimensionality reduction. diff --git a/R/integration.R b/R/integration.R index 533916083..fae4655d5 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5583,40 +5583,6 @@ BridgeCellsRepresentation <- function(object.list, } -LaplacianGraphDecomposition <- function(A, ndim = 50, type = "SM",verbose = FALSE) { - ## graph should not be pruned - n <- nrow(A) - # A lot faster (order of magnitude when n = 1000) - Dsq <- sqrt(Matrix::colSums(A)) - L <- -Matrix::t(A / Dsq) / Dsq - Matrix::diag(L) <- 1 + Matrix::diag(L) - - k <- ndim + 1 - opt <- list(tol = 1e-4) - if (type == "SM"){ - res <- tryCatch(RSpectra::eigs_sym(L, k = k, which = "SM", opt = opt), - error = function(c) { - NULL - } - ) - } - if (type == "LM"){ - res <- tryCatch(RSpectra::eigs_sym(L, - k = k, which = "LM", - opt = opt - ), - error = function(c) { - NULL - } - ) - } - - vec_indices <- rev(order(res$values, decreasing = TRUE)[1:ndim]) - eigen.output <- list(eigen_vector = as.matrix(Re(res$vectors[, vec_indices])), - eigen_value = res$values[vec_indices]) - return( eigen.output) -} - NNtoGraph <- function(nn.object, ncol.nn = NULL, col.cells = NULL) { @@ -5828,7 +5794,6 @@ TranferLablesNN <- function( x = 1, dims = c(nrow(select_nn), ncol(x = reference.object)) ) - reference.labels.matrix <- as.sparse( x = dummy_cols( reference.object[[group.by]] @@ -5839,7 +5804,6 @@ TranferLablesNN <- function( replacement = "", x = colnames(reference.labels.matrix) ) - query.label.mat <- nn.matrix %*% reference.labels.matrix query.label.mat <- query.label.mat/k.nn rownames(x = query.label.mat) <- Cells(nn.object) @@ -5856,23 +5820,53 @@ TranferLablesNN <- function( +## RunLaplacian +### -RunGraphLaplacian <- function( + + +RunGraphLaplacian.Seurat <- function( object, graph, reduction.name = "lap", reduction.key ="LAP_", n = 50 ) { - lap <- LaplacianGraphDecomposition(A = object[[graph]], ndim = n) - rownames(lap$eigen_vector) <- Cells(object) - colnames(lap$eigen_vector) <- paste0(reduction.key, 1:n ) - object[[reduction.name]] <- CreateDimReducObject(embeddings = lap$eigen_vector, - key = reduction.key, - assay = DefaultAssay(object), - stdev = lap$eigen_value - ) + + lap_dir <- RunGraphLaplacian(object = object[[graph]], + n = n, + reduction.key = reduction.key + ) + + object[[reduction.name]] <- lap_dir return(object) } + +#' @importFrom Matrix diag t rowSums +#' @importFrom RSpectra eigs_sym +RunGraphLaplacian.default <- function(object, + n = 50, + reduction.key ="LAP_", + verbose = FALSE +) { + + D_half <- sqrt(rowSums(object)) + L <- -1 * (t(object / D_half) / D_half) + diag(L) <- 1 + diag(L) + L_eigen <- eigs_sym(L, k = n + 1, which = "SM") + new_order <- n:1 + lap_output <- list(eigen_vector = Re(L_eigen$vectors[, new_order]), + eigen_value = L_eigen$values[new_order] + ) + rownames(lap_output$eigen_vector) <- colnames(object) + colnames(lap_output$eigen_vector) <- paste0(reduction.key, 1:n ) + lap_dir <- CreateDimReducObject(embeddings = lap_output$eigen_vector, + key = reduction.key, + assay = DefaultAssay(object), + stdev = lap_output$eigen_value + ) + return(lap_dir) +} + \ No newline at end of file From 59066205e85f12ce16b37fce65613a3f3d67fdb2 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 9 Dec 2021 19:40:05 -0500 Subject: [PATCH 016/979] update bridge integration --- NAMESPACE | 1 + R/integration.R | 202 +++++++++++++++++++++--------------------------- 2 files changed, 87 insertions(+), 116 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 2c108eabb..c176838ba 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -360,6 +360,7 @@ importClassesFrom(SeuratObject,Seurat) importClassesFrom(SeuratObject,SeuratCommand) importClassesFrom(SeuratObject,SpatialImage) importFrom(KernSmooth,bkde) +importFrom(MASS,ginv) importFrom(MASS,glm.nb) importFrom(MASS,lda) importFrom(Matrix,as.matrix) diff --git a/R/integration.R b/R/integration.R index fae4655d5..135381f3d 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5481,109 +5481,6 @@ ValidateParams_IntegrateEmbeddings_TransferAnchors <- function( -BridgeCellsRepresentation <- function(object.list, - bridge.object, - object.reduction.list, - bridge.reduction.list, - dims.list, - smooth.by = NULL, - laplacian.reduction = NULL, - laplacian.dims = NULL, - new.assay.name = "Bridge", - return.all.assays = FALSE, - l2.norm = TRUE, - do.center = FALSE, - bridge.cells = NULL, - verbose = TRUE -) { - - if (!is.null(laplacian.reduction) & !is.null(smooth.by)) { - stop("when laplacian.reduction is set, smooth.by should set to NULL") - } - bridge.object[['ident']] <- Idents(object = bridge.object) - my.lapply <- ifelse( - test = verbose && nbrOfWorkers() == 1, - yes = pblapply, - no = future_lapply - ) - if (!is.null(bridge.cells)) { - bridge.object <- subset(bridge.object, cells = bridge.cells) - } - if (verbose) { - message("Constructing Bridge-cells representation") - } - object.list <- my.lapply( - X = 1:length(x = object.list), - FUN = function(x) { - SA.inv <- MASS::ginv( - X = Embeddings( - object = bridge.object, - reduction = bridge.reduction.list[[x]] - )[ ,dims.list[[x]]] - ) - if (is.null(smooth.by)) { - if (!is.null(laplacian.reduction)) { - laplacian.dims <- laplacian.dims %||% 1:ncol(bridge.object[[laplacian.reduction]]) - lap.vector <- Embeddings(bridge.object[[laplacian.reduction]])[,laplacian.dims] - X <- Embeddings( - object = object.list[[x]], - reduction = object.reduction.list[[x]] - )[, 1:length(x = dims.list[[x]])] %*% (SA.inv %*% lap.vector) - - } else { - X <- Embeddings( - object = object.list[[x]], - reduction = object.reduction.list[[x]] - )[, 1:length(x = dims.list[[x]])] %*% SA.inv - colnames(X) <- Cells(bridge.object) - } - - } else { - smooth.matrix <- as.sparse( - x = fastDummies::dummy_cols( - bridge.object[[ smooth.by ]] - )[, -1] - ) - colnames(smooth.matrix) <- gsub( pattern = paste0(smooth.by,"_"), - replacement = "", - x = colnames(smooth.matrix) - ) - X <- Embeddings( - object = object.list[[x]], - reduction = object.reduction.list[[x]] - )[, 1:length( dims.list[[x]])] %*% as.matrix(SA.inv %*% smooth.matrix) - } - if (l2.norm) { - X <- L2Norm(mat = X, MARGIN = 1) - } - colnames(x = X) <- paste0('bridge_', colnames(x = X)) - suppressWarnings(object.list[[x]][[new.assay.name]] <- CreateAssayObject(data = t(X))) - object.list[[x]][[new.assay.name]]@misc$SA.inv <- SA.inv - DefaultAssay(object.list[[x]]) <- new.assay.name - VariableFeatures(object = object.list[[x]]) <- rownames(object.list[[x]]) - object.list[[x]] <- ScaleData( - object = object.list[[x]], - do.scale = FALSE, - do.center = do.center, - verbose = FALSE - ) - return (object.list[[x]]) - } - ) - if (!return.all.assays) { - object.list <- my.lapply( - X = object.list, - FUN = function(x) { - x <- DietSeurat(object = x, assay = new.assay.name, scale.data = TRUE) - return(x) - } - ) - } - return(object.list) -} - - - NNtoGraph <- function(nn.object, ncol.nn = NULL, col.cells = NULL) { select_nn <- nn.object@nn.idx @@ -5648,17 +5545,12 @@ FindDirectAnchor <- function( k.filter = NA, verbose = verbose ) - anchors[, 1] <- anchors[, 1] + offsets[1] anchors[, 2] <- anchors[, 2] + offsets[2] - # determine all anchors all.anchors <- anchors all.anchors <- rbind(all.anchors, all.anchors[, c(2, 1, 3)]) all.anchors <- AddDatasetID(anchor.df = all.anchors, offsets = offsets, obj.lengths = objects.ncell) - - - command <- LogSeuratCommand(object = object.list[[1]], return.command = TRUE) reference <- NULL anchor.features <- rownames( obj.both ) @@ -5674,6 +5566,91 @@ FindDirectAnchor <- function( } +#' +#' +#' +#' +#' +#' +#' @importFrom MASS ginv +#‘ internel + +BridgeCellsRepresentation <- function(object.list, + bridge.object, + object.reduction.list, + bridge.reduction.list, + dims.list, + laplacian.reduction = NULL, + laplacian.dims = NULL, + new.assay.name = "Bridge", + return.all.assays = FALSE, + l2.norm = TRUE, + do.center = FALSE, + verbose = TRUE +) { + laplacian.dims <- laplacian.dims %||% 1:ncol(bridge.object[[laplacian.reduction]]) + my.lapply <- ifelse( + test = verbose && nbrOfWorkers() == 1, + yes = pblapply, + no = future_lapply + ) + if (verbose) { + message("Constructing Bridge-cells representation") + } + object.list <- my.lapply( + X = 1:length(x = object.list), + FUN = function(x) { + SA.inv <- ginv( + X = Embeddings( + object = bridge.object, + reduction = bridge.reduction.list[[x]] + )[ ,dims.list[[x]]] + ) + if (!is.null(laplacian.reduction)) { + lap.vector <- Embeddings(bridge.object[[laplacian.reduction]])[,laplacian.dims] + X <- Embeddings( + object = object.list[[x]], + reduction = object.reduction.list[[x]] + )[, 1:length(x = dims.list[[x]])] %*% (SA.inv %*% lap.vector) + + } else { + X <- Embeddings( + object = object.list[[x]], + reduction = object.reduction.list[[x]] + )[, 1:length(x = dims.list[[x]])] %*% SA.inv + colnames(X) <- Cells(bridge.object) + } + + if (l2.norm) { + X <- L2Norm(mat = X, MARGIN = 1) + } + colnames(x = X) <- paste0('bridge_', colnames(x = X)) + suppressWarnings(object.list[[x]][[new.assay.name]] <- CreateAssayObject(data = t(X))) + object.list[[x]][[new.assay.name]]@misc$SA.inv <- SA.inv + DefaultAssay(object.list[[x]]) <- new.assay.name + VariableFeatures(object = object.list[[x]]) <- rownames(object.list[[x]]) + object.list[[x]] <- ScaleData( + object = object.list[[x]], + do.scale = FALSE, + do.center = do.center, + verbose = FALSE + ) + return (object.list[[x]]) + } + ) + if (!return.all.assays) { + object.list <- my.lapply( + X = object.list, + FUN = function(x) { + x <- DietSeurat(object = x, assay = new.assay.name, scale.data = TRUE) + return(x) + } + ) + } + return(object.list) +} + + RunBridgeIntegration <- function(object.list, bridge.object, object.reduction.list, @@ -5682,14 +5659,11 @@ RunBridgeIntegration <- function(object.list, laplacian.reduction = "lap", anchor.type = c("direct", "cca")[1], laplacian.dims = NULL, - smooth.by = NULL, new.assay.name = "Bridge", integrated.reduction.name = "bridge_dr", verbose = TRUE ) { - if (!is.null(smooth.by)) { - bridge.method <- "bridge clusters" - } else if (!is.null(laplacian.reduction)) { + if (!is.null(laplacian.reduction)) { bridge.method <- "bridge graph" } else { bridge.method <- "bridge cells" @@ -5697,9 +5671,6 @@ RunBridgeIntegration <- function(object.list, if (verbose) { switch( EXPR = bridge.method, - 'bridge clusters' = { - message("Transform cells to bridge clusters space") - }, "bridge graph" = { message('Transform cells to bridge graph laplacian space') }, @@ -5715,7 +5686,6 @@ RunBridgeIntegration <- function(object.list, object.reduction.list = object.reduction.list, bridge.reduction.list = bridge.reduction.list, dims.list = dims.list, - smooth.by = smooth.by, new.assay.name = new.assay.name, laplacian.reduction = laplacian.reduction, laplacian.dims = laplacian.dims, From 64eae5732e740065be953b8ef8617c7bc79da7f5 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 9 Dec 2021 23:11:52 -0500 Subject: [PATCH 017/979] add transferAnchor --- NAMESPACE | 1 + R/integration.R | 62 ++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 50 insertions(+), 13 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index c176838ba..25ac927ae 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -445,6 +445,7 @@ importFrom(cluster,clara) importFrom(cowplot,get_legend) importFrom(cowplot,plot_grid) importFrom(cowplot,theme_cowplot) +importFrom(fastDummies,dummy_cols) importFrom(fitdistrplus,fitdist) importFrom(future,nbrOfWorkers) importFrom(future,plan) diff --git a/R/integration.R b/R/integration.R index 135381f3d..a22ebc397 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5503,6 +5503,8 @@ NNtoGraph <- function(nn.object, ncol.nn = NULL, col.cells = NULL) { FindDirectAnchor <- function( object.list, + reference = NULL, + anchor.type = c("Integration", "Transfer")[1], assay = "Bridge", slot = "data", reduction = NULL, @@ -5510,6 +5512,11 @@ FindDirectAnchor <- function( k.score = 50, verbose = TRUE ) { + if (!is.null(reference) ) { + object.list <- list(object.list[[reference]], object.list[[setdiff(c(1,2),reference)]]) + } + + reduction.name <-reduction %||% paste0(assay, ".reduc") object.list <- lapply(object.list, function(x) { if (is.null(reduction)) { @@ -5545,23 +5552,52 @@ FindDirectAnchor <- function( k.filter = NA, verbose = verbose ) - anchors[, 1] <- anchors[, 1] + offsets[1] - anchors[, 2] <- anchors[, 2] + offsets[2] - # determine all anchors all.anchors <- anchors + + all.anchors[, 1] <- all.anchors[, 1] + offsets[1] + all.anchors[, 2] <- all.anchors[, 2] + offsets[2] + # determine all anchors + all.anchors <- rbind(all.anchors, all.anchors[, c(2, 1, 3)]) all.anchors <- AddDatasetID(anchor.df = all.anchors, offsets = offsets, obj.lengths = objects.ncell) command <- LogSeuratCommand(object = object.list[[1]], return.command = TRUE) - reference <- NULL - anchor.features <- rownames( obj.both ) - anchor.set <- new(Class = "IntegrationAnchorSet", - object.list = object.list, - reference.objects = reference %||% seq_along(object.list), - anchors = all.anchors, - offsets = offsets, - anchor.features = anchor.features, - command = command - ) + reference <- reference + anchor.features <- rownames(obj.both) + if (anchor.type == "Integration") { + anchor.set <- new(Class = "IntegrationAnchorSet", + object.list = object.list, + reference.objects = reference %||% seq_along(object.list), + anchors = all.anchors, + offsets = offsets, + anchor.features = anchor.features, + command = command + ) + } else if (anchor.type == "Transfer") { + reference.index <- reference + reference <- object.list[[1]] + query <- object.list[[2]] + query <- RenameCells( + object = query, + new.names = paste0(Cells(x = query), "_", "query") + ) + reference <- RenameCells( + object = reference, + new.names = paste0(Cells(x = reference), "_", "reference") + ) + combined.ob <- suppressWarnings(expr = merge( + x = DietSeurat(object = reference, counts = FALSE), + y = DietSeurat(object = query, counts = FALSE), + )) + anchor.set <- new( + Class = "TransferAnchorSet", + object.list = list(combined.ob), + reference.cells = colnames(x = reference), + query.cells = colnames(x = query), + anchors = anchors, + anchor.features = anchor.features, + command = command + ) + } return(anchor.set) } From f52008274ccdaa72b57556bf6eaa9ab8fb6ad3ef Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sat, 11 Dec 2021 18:04:53 -0500 Subject: [PATCH 018/979] modify MapQuery for DirectTransferAnchor --- R/integration.R | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/R/integration.R b/R/integration.R index 557e01f92..5300b63b9 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1973,10 +1973,15 @@ MapQuery <- function( reference.dims <- query.dims <- 1:ncol(x = ref.cca.embedding) } else if (grepl(pattern = "lsi", x = slot(object = anchorset, name = "command")$reduction)) { anchor.reduction <- "lsiproject" - } else { + } else if (grepl(pattern = "FindDirectAnchor", + x = slot(object = + slot(object = anchor_t, name = "command"), + name = "name"))) { + anchor.reduction <- slot(object = anchorset, name = "command")$reduction + }else { stop("unkown type of anchors") } - + reference.reduction <- reference.reduction %||% slot(object = anchorset, name = "command")$reference.reduction %||% @@ -2017,7 +2022,8 @@ MapQuery <- function( ), transferdata.args ) ) - if (transferdata.args$weight.reduction == integrateembeddings.args$weight.reduction) { + if (inherits(x = transferdata.args$weight.reduction , "character") && + transferdata.args$weight.reduction == integrateembeddings.args$weight.reduction) { reuse.weights.matrix <- TRUE } } @@ -2890,6 +2896,7 @@ TransferData <- function( if (verbose) { message("Running PCA on query dataset") } + features <- slot(object = anchorset, name = "anchor.features") query.ob <- query query.ob <- ScaleData(object = query.ob, features = features, verbose = FALSE) @@ -2937,6 +2944,7 @@ TransferData <- function( } weight.reduction <- combined.ob[[weight.reduction]] } + dims <- dims %||% (1:ncol(weight.reduction)) if (max(dims) > ncol(x = weight.reduction)) { stop("dims is larger than the number of available dimensions in ", "weight.reduction (", ncol(x = weight.reduction), ").", call. = FALSE) @@ -5224,7 +5232,8 @@ ValidateParams_TransferData <- function( stop("None of the provided refdata elements are valid.", call. = FALSE) } ModifyParam(param = "refdata", value = refdata) - valid.weight.reduction <- c("pcaproject", "pca", "cca", "rpca.ref","lsiproject", "lsi") + object.reduction <- Reductions(object = slot(object = anchorset, name = "object.list")[[1]]) + valid.weight.reduction <- c("pcaproject", "pca", "cca", "rpca.ref","lsiproject", "lsi", object.reduction) if (!inherits(x = weight.reduction, "DimReduc")) { if (!weight.reduction %in% valid.weight.reduction) { stop("Please provide one of ", paste(valid.weight.reduction, collapse = ", "), " or a custom DimReduc to ", @@ -5250,8 +5259,8 @@ ValidateParams_TransferData <- function( ncol(x = weight.reduction), ").", call. = FALSE) } } else { - if (is.null(x = dims)) { - ModifyParam(param = "dims", value = 1:length(x = slot(object = anchorset, name = "command")$dims)) + if (is.null(x = dims) && !is.null(x = slot(object = anchorset, name = "command")$dims)) { + ModifyParam(param = "dims", value = 1:length(x = slot(object = anchorset, name = "command")$dims)) } } @@ -5590,8 +5599,9 @@ FindDirectAnchor <- function( new.names = paste0(Cells(x = reference), "_", "reference") ) combined.ob <- suppressWarnings(expr = merge( - x = DietSeurat(object = reference, counts = FALSE), - y = DietSeurat(object = query, counts = FALSE), + x = reference, + y = query, + merge.dr = reduction.name )) anchor.set <- new( Class = "TransferAnchorSet", From def97c05b4275a9c574d427ef56a02c0b57abac4 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sun, 12 Dec 2021 12:22:35 -0500 Subject: [PATCH 019/979] bridge integration anchors --- R/integration.R | 283 +++++++++++++++++++++++++++++++++--------------- R/objects.R | 2 + 2 files changed, 196 insertions(+), 89 deletions(-) diff --git a/R/integration.R b/R/integration.R index 5300b63b9..83cdb21de 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1438,7 +1438,6 @@ IntegrateData <- function( reference.model <- model.list[[which(reference.model)]] } } - if (length(x = reference.datasets) == length(x = object.list)) { if (normalization.method == "SCT") { reference.integrated[[new.assay.name]] <- CreateSCTAssayObject( @@ -5526,94 +5525,89 @@ FindDirectAnchor <- function( k.score = 50, verbose = TRUE ) { - if (!is.null(reference) ) { - object.list <- list(object.list[[reference]], object.list[[setdiff(c(1,2),reference)]]) - } - - - reduction.name <-reduction %||% paste0(assay, ".reduc") - object.list <- lapply(object.list, function(x) { - if (is.null(reduction)) { - x[[reduction.name]] <- CreateDimReducObject( - embeddings = t(GetAssayData( - object = x, - slot = slot, - assay = assay - )), - key = "L_", - assay = assay - ) - } + reduction.name <- reduction %||% paste0(assay, ".reduc") + if (!reduction %in% Reductions(object.list[[1]])) { + object.list <- lapply(object.list, function(x) { + if (is.null(reduction)) { + x[[reduction.name]] <- CreateDimReducObject( + embeddings = t(GetAssayData( + object = x, + slot = slot, + assay = assay + )), + key = "L_", + assay = assay + ) + } DefaultAssay(x) <- assay x <- DietSeurat(x, assays = assay, dimreducs = reduction.name ) return(x) - }) - obj.both <- merge(object.list[[1]], object.list[[2]], merge.dr = reduction.name) - - objects.ncell <- sapply(X = object.list, FUN = function(x) dim(x = x)[2]) - offsets <- as.vector(x = cumsum(x = c(0, objects.ncell)))[1:length(x = object.list)] - - anchors <- FindAnchors(object.pair = obj.both, - assay = DefaultAssay(obj.both), - slot = 'data', - cells1 = colnames(object.list[[1]]), - cells2 = colnames(object.list[[2]]), - internal.neighbors = NULL, - reduction = reduction.name, - k.anchor = k.anchor, - k.score = k.score, - dims = 1:ncol(obj.both[[reduction.name]]), - k.filter = NA, - verbose = verbose + } ) - all.anchors <- anchors - - all.anchors[, 1] <- all.anchors[, 1] + offsets[1] - all.anchors[, 2] <- all.anchors[, 2] + offsets[2] - # determine all anchors - - all.anchors <- rbind(all.anchors, all.anchors[, c(2, 1, 3)]) - all.anchors <- AddDatasetID(anchor.df = all.anchors, offsets = offsets, obj.lengths = objects.ncell) - command <- LogSeuratCommand(object = object.list[[1]], return.command = TRUE) - reference <- reference - anchor.features <- rownames(obj.both) - if (anchor.type == "Integration") { - anchor.set <- new(Class = "IntegrationAnchorSet", - object.list = object.list, - reference.objects = reference %||% seq_along(object.list), - anchors = all.anchors, - offsets = offsets, - anchor.features = anchor.features, - command = command - ) - } else if (anchor.type == "Transfer") { - reference.index <- reference - reference <- object.list[[1]] - query <- object.list[[2]] - query <- RenameCells( - object = query, - new.names = paste0(Cells(x = query), "_", "query") - ) - reference <- RenameCells( - object = reference, - new.names = paste0(Cells(x = reference), "_", "reference") - ) - combined.ob <- suppressWarnings(expr = merge( - x = reference, - y = query, - merge.dr = reduction.name - )) - anchor.set <- new( - Class = "TransferAnchorSet", - object.list = list(combined.ob), - reference.cells = colnames(x = reference), - query.cells = colnames(x = query), - anchors = anchors, - anchor.features = anchor.features, - command = command +} + object.both <- merge(object.list[[1]], object.list[[2]], merge.dr = reduction.name) + objects.ncell <- sapply(X = object.list, FUN = function(x) dim(x = x)[2]) + offsets <- as.vector(x = cumsum(x = c(0, objects.ncell)))[1:length(x = object.list)] + anchors <- FindAnchors(object.pair = object.both, + assay = DefaultAssay(object.both), + slot = 'data', + cells1 = colnames(object.list[[1]]), + cells2 = colnames(object.list[[2]]), + internal.neighbors = NULL, + reduction = reduction.name, + k.anchor = k.anchor, + k.score = k.score, + dims = 1:ncol(object.both[[reduction.name]]), + k.filter = NA, + verbose = verbose ) - } - return(anchor.set) + inte.anchors <- anchors + inte.anchors[, 1] <- inte.anchors[, 1] + offsets[1] + inte.anchors[, 2] <- inte.anchors[, 2] + offsets[2] + # determine all anchors + inte.anchors <- rbind(inte.anchors, inte.anchors[, c(2, 1, 3)]) + inte.anchors <- AddDatasetID(anchor.df = inte.anchors, offsets = offsets, obj.lengths = objects.ncell) + command <- LogSeuratCommand(object = object.list[[1]], return.command = TRUE) + + anchor.features <- rownames(object.both) + if (anchor.type == "Integration") { + anchor.set <- new(Class = "IntegrationAnchorSet", + object.list = object.list, + reference.objects = reference %||% seq_along(object.list), + anchors = inte.anchors, + weight.reduction = object.both[[reduction.name]], + offsets = offsets, + anchor.features = anchor.features, + command = command + ) + } else if (anchor.type == "Transfer") { + reference.index <- reference + reference <- object.list[[reference.index]] + query <- object.list[[setdiff(reference.index, c(1,2))]] + query <- RenameCells( + object = query, + new.names = paste0(Cells(x = query), "_", "query") + ) + reference <- RenameCells( + object = reference, + new.names = paste0(Cells(x = reference), "_", "reference") + ) + combined.ob <- suppressWarnings(expr = merge( + x = reference, + y = query, + merge.dr = reduction.name + )) + anchor.set <- new( + Class = "TransferAnchorSet", + object.list = list(combined.ob), + reference.cells = colnames(x = reference), + query.cells = colnames(x = query), + anchors = anchors, + anchor.features = anchor.features, + command = command + ) + } + return(anchor.set) } @@ -5633,7 +5627,7 @@ BridgeCellsRepresentation <- function(object.list, dims.list, laplacian.reduction = NULL, laplacian.dims = NULL, - new.assay.name = "Bridge", + bridge.assay.name = "Bridge", return.all.assays = FALSE, l2.norm = TRUE, do.center = FALSE, @@ -5676,9 +5670,9 @@ BridgeCellsRepresentation <- function(object.list, X <- L2Norm(mat = X, MARGIN = 1) } colnames(x = X) <- paste0('bridge_', colnames(x = X)) - suppressWarnings(object.list[[x]][[new.assay.name]] <- CreateAssayObject(data = t(X))) - object.list[[x]][[new.assay.name]]@misc$SA.inv <- SA.inv - DefaultAssay(object.list[[x]]) <- new.assay.name + suppressWarnings(object.list[[x]][[bridge.assay.name]] <- CreateAssayObject(data = t(X))) + object.list[[x]][[bridge.assay.name]]@misc$SA.inv <- SA.inv + DefaultAssay(object.list[[x]]) <- bridge.assay.name VariableFeatures(object = object.list[[x]]) <- rownames(object.list[[x]]) object.list[[x]] <- ScaleData( object = object.list[[x]], @@ -5693,7 +5687,7 @@ BridgeCellsRepresentation <- function(object.list, object.list <- my.lapply( X = object.list, FUN = function(x) { - x <- DietSeurat(object = x, assay = new.assay.name, scale.data = TRUE) + x <- DietSeurat(object = x, assay = bridge.assay.name, scale.data = TRUE) return(x) } ) @@ -5791,6 +5785,117 @@ RunBridgeIntegration <- function(object.list, return(object.merge) } + +FindBridgeAnchor <- function(object.list = NULL, + bridge.object, + object.reduction.list, + bridge.reduction.list, + dims.list, + anchor.type = c("Integration", "Transfer")[1], + reference = NULL, + laplacian.reduction = "lap", + anchor.method = c("direct", "cca")[1], + dims.cca = 1:30, + laplacian.dims = NULL, + bridge.assay.name = "Bridge", + verbose = TRUE) { + if (!is.null(laplacian.reduction)) { + bridge.method <- "bridge graph" + } else { + bridge.method <- "bridge cells" + } + if (verbose) { + switch( + EXPR = bridge.method, + "bridge graph" = { + message('Transform cells to bridge graph laplacian space') + }, + "bridge cells" = { + message('Transform cells to bridge cells space') + } + ) + } + if (anchor.type == "Transfer") { + reference <- reference %||% c(1) + query <- setdiff(c(1,2), reference) + } + + bridge.reduction.name <- paste0(bridge.assay.name, ".reduc") + + object.list <- BridgeCellsRepresentation( + object.list = object.list , + bridge.object = bridge.object, + object.reduction.list = object.reduction.list, + bridge.reduction.list = bridge.reduction.list, + dims.list = dims.list, + bridge.assay.name = bridge.assay.name, + laplacian.reduction = laplacian.reduction, + laplacian.dims = laplacian.dims, + verbose = verbose + ) + # assay to dimensional reduction + object.list <- lapply( + X = object.list, + FUN = function(x) { + x[[bridge.reduction.name]] <- CreateDimReducObject( + embeddings = t(GetAssayData( + object = x, + slot = "data", + assay = bridge.assay.name + )), + key = "L_", + assay = bridge.assay.name + ) + return(x) + } + ) + + if (anchor.method == "direct") { + anchor <- FindDirectAnchor( + object.list = object.list , + reference = reference, + slot = "data", + anchor.type = anchor.type, + reduction = bridge.reduction.name, + assay = bridge.assay.name + ) + }else if (anchor.method == "cca") { + anchor <- switch(EXPR = anchor.type, + "Integration" = { + anchor <- FindIntegrationAnchors( + object.list = object.list, + k.filter = NA, + reduction = "cca", + scale = FALSE, + dims = dims.cca, + verbose = verbose) + slot( + object = anchor, + name = "weight.reduction" + ) <- merge(object.list[[1]][[bridge.reduction.name]], + object.list[[2]][[bridge.reduction.name]] + ) + anchor + }, + "Transfer" = { + anchor <- FindTransferAnchors( + reference = object.list[[reference]], + query = object.list[[query]], + reduction = "cca", + scale = FALSE, + k.filter = NA, + dims = dims.cca, + verbose = verbose + ) + } + ) + + } + return(anchor) + +} + + # Helper function to transfer labels based on neighbors object # @param nn.object the query neighbors object # @param reference.object the reference seurat object diff --git a/R/objects.R b/R/objects.R index e20b99b85..f964fe030 100644 --- a/R/objects.R +++ b/R/objects.R @@ -30,6 +30,7 @@ setOldClass(Classes = 'package_version') #' anchor score, and the index of the original dataset in the object.list for cell1 and cell2 of #' the anchor. #' @slot offsets The offsets used to enable cell look up in downstream functions +#' @slot weight.reduction The weight dimensional reduction used to calculate weight matrix #' @slot anchor.features The features used when performing anchor finding. #' @slot neighbors List containing Neighbor objects for reuse later (e.g. mapping) #' @slot command Store log of parameters that were used @@ -49,6 +50,7 @@ AnchorSet <- setClass( query.cells = "vector", anchors = "ANY", offsets = "ANY", + weight.reduction = "DimReduc", anchor.features = "ANY", neighbors = "list", command = "ANY" From 340e1a42fc169b7da35f962172ca24e69dd45d7b Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sun, 12 Dec 2021 16:04:42 -0500 Subject: [PATCH 020/979] transfer bridge anchor --- R/integration.R | 42 ++++++++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/R/integration.R b/R/integration.R index 83cdb21de..1fb565b0d 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1930,7 +1930,6 @@ MapQuery <- function( projectumap.args = list(), verbose = TRUE ) { - # determine anchor type if (grepl(pattern = "pca", x = slot(object = anchorset, name = "command")$reduction)) { anchor.reduction <- "pcaproject" @@ -1972,12 +1971,27 @@ MapQuery <- function( reference.dims <- query.dims <- 1:ncol(x = ref.cca.embedding) } else if (grepl(pattern = "lsi", x = slot(object = anchorset, name = "command")$reduction)) { anchor.reduction <- "lsiproject" - } else if (grepl(pattern = "FindDirectAnchor", - x = slot(object = - slot(object = anchor_t, name = "command"), - name = "name"))) { - anchor.reduction <- slot(object = anchorset, name = "command")$reduction - }else { + } else if (grepl(pattern = "FindBridgeAnchor", x = slot(object = slot(object = anchorset, name = "command"), name = "name"))){ + anchor.reduction <- paste0( slot(object = anchorset, name = "command")$bridge.assay.name, ".reduc") + ref.reduction.emb <- Embeddings( + object = + slot( + object = anchorset, + name = "object.list" + )[[1]][[anchor.reduction]])[ + slot(object = anchorset, name = "reference.cells"),] + rownames(ref.reduction.emb) <- gsub( + pattern = "_reference", + replacement = "", + x = rownames(ref.reduction.emb) + ) + reference[[anchor.reduction]] <- CreateDimReducObject( + embeddings = ref.reduction.emb, + key = "L_", + assay = DefaultAssay(reference) + ) + } + else { stop("unkown type of anchors") } @@ -5583,7 +5597,7 @@ FindDirectAnchor <- function( } else if (anchor.type == "Transfer") { reference.index <- reference reference <- object.list[[reference.index]] - query <- object.list[[setdiff(reference.index, c(1,2))]] + query <- object.list[[setdiff(c(1,2), reference.index)]] query <- RenameCells( object = query, new.names = paste0(Cells(x = query), "_", "query") @@ -5794,7 +5808,7 @@ FindBridgeAnchor <- function(object.list = NULL, anchor.type = c("Integration", "Transfer")[1], reference = NULL, laplacian.reduction = "lap", - anchor.method = c("direct", "cca")[1], + reduction = c("direct", "cca")[1], dims.cca = 1:30, laplacian.dims = NULL, bridge.assay.name = "Bridge", @@ -5850,7 +5864,7 @@ FindBridgeAnchor <- function(object.list = NULL, } ) - if (anchor.method == "direct") { + if (reduction == "direct") { anchor <- FindDirectAnchor( object.list = object.list , reference = reference, @@ -5859,12 +5873,13 @@ FindBridgeAnchor <- function(object.list = NULL, reduction = bridge.reduction.name, assay = bridge.assay.name ) - }else if (anchor.method == "cca") { + } else if (reduction == "cca") { anchor <- switch(EXPR = anchor.type, "Integration" = { anchor <- FindIntegrationAnchors( object.list = object.list, k.filter = NA, + reference = reference, reduction = "cca", scale = FALSE, dims = dims.cca, @@ -5889,8 +5904,11 @@ FindBridgeAnchor <- function(object.list = NULL, ) } ) - } + slot(object = anchor, name = "command") <- LogSeuratCommand( + object = object.list[[1]], + return.command = TRUE + ) return(anchor) } From fa06b6455b8e88f366ca0b3dff9854efe038e10d Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sun, 12 Dec 2021 16:31:52 -0500 Subject: [PATCH 021/979] check transfer reduction --- R/integration.R | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/R/integration.R b/R/integration.R index 1fb565b0d..eb1433fb5 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1930,8 +1930,9 @@ MapQuery <- function( projectumap.args = list(), verbose = TRUE ) { + transfer.reduction <- slot(object = anchorset, name = "command")$reduction # determine anchor type - if (grepl(pattern = "pca", x = slot(object = anchorset, name = "command")$reduction)) { + if (grepl(pattern = "pca", x = transfer.reduction)) { anchor.reduction <- "pcaproject" # check if the anchorset can be used for mapping if (is.null(x = slot(object = anchorset, name = "command")$reference.reduction)) { @@ -1939,7 +1940,7 @@ MapQuery <- function( 'FindTransferAnchors, so the resulting AnchorSet object cannot be used ', 'in the MapQuery function.') } - } else if (grepl(pattern = "cca", x = slot(object = anchorset, name = "command")$reduction)) { + } else if (grepl(pattern = "cca", x = transfer.reduction)) { anchor.reduction <- "cca" ref.cca.embedding <- Embeddings( slot(object = anchorset, name = "object.list")[[1]][["cca"]] @@ -1969,10 +1970,14 @@ MapQuery <- function( ) reference.reduction <- new.reduction.name <- "cca" reference.dims <- query.dims <- 1:ncol(x = ref.cca.embedding) - } else if (grepl(pattern = "lsi", x = slot(object = anchorset, name = "command")$reduction)) { + } else if (grepl(pattern = "lsi", x = transfer.reduction)) { anchor.reduction <- "lsiproject" - } else if (grepl(pattern = "FindBridgeAnchor", x = slot(object = slot(object = anchorset, name = "command"), name = "name"))){ - anchor.reduction <- paste0( slot(object = anchorset, name = "command")$bridge.assay.name, ".reduc") + } else if (grepl(pattern = "direct", x = transfer.reduction)){ + anchor.reduction <- paste0( + slot(object = anchorset, + name = "command")$bridge.assay.name, + ".reduc" + ) ref.reduction.emb <- Embeddings( object = slot( From 842bd697ab2eef9876a4c9950fbf22e96862c9d5 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sun, 12 Dec 2021 16:33:13 -0500 Subject: [PATCH 022/979] delete RunBridgeIntegration --- R/integration.R | 90 ------------------------------------------------- 1 file changed, 90 deletions(-) diff --git a/R/integration.R b/R/integration.R index eb1433fb5..ec8df3b29 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5715,96 +5715,6 @@ BridgeCellsRepresentation <- function(object.list, } -RunBridgeIntegration <- function(object.list, - bridge.object, - object.reduction.list, - bridge.reduction.list, - dims.list, - laplacian.reduction = "lap", - anchor.type = c("direct", "cca")[1], - laplacian.dims = NULL, - new.assay.name = "Bridge", - integrated.reduction.name = "bridge_dr", - verbose = TRUE -) { - if (!is.null(laplacian.reduction)) { - bridge.method <- "bridge graph" - } else { - bridge.method <- "bridge cells" - } - if (verbose) { - switch( - EXPR = bridge.method, - "bridge graph" = { - message('Transform cells to bridge graph laplacian space') - }, - "bridge cells" = { - message('Transform cells to bridge cells space') - } - ) - } - - object.list <- BridgeCellsRepresentation( - object.list = object.list, - bridge.object = bridge.object, - object.reduction.list = object.reduction.list, - bridge.reduction.list = bridge.reduction.list, - dims.list = dims.list, - new.assay.name = new.assay.name, - laplacian.reduction = laplacian.reduction, - laplacian.dims = laplacian.dims, - verbose = verbose - ) - if (verbose) { - message("Integrating Bridge space") - } - - bridge.reduction.name <- paste0(new.assay.name, ".reduc") - object.list <- lapply( - X = object.list, - FUN = function(x) { - x[[bridge.reduction.name]] <- CreateDimReducObject( - embeddings = t(GetAssayData( - object = x, - slot = "data", - assay = new.assay.name - )), - key = "L_", - assay = new.assay.name - ) - return(x) - } - ) - merge.emb <- merge( - object.list[[1]], - object.list[[2]], - merge.dr = paste0(new.assay.name, ".reduc") - ) - - if (anchor.type == "direct") { - anchor <- FindDirectAnchor( - object.list = object.list , - slot = "data", - reduction = bridge.reduction.name, - assay = new.assay.name - ) - } else if (anchor.type == "cca") { - anchor <- FindIntegrationAnchors(object.list = object.list, - k.filter = NA, - scale = FALSE, - verbose = verbose) - } - - object.merge <- IntegrateEmbeddings( - anchorset = anchor, - reductions = merge.emb[[paste0(new.assay.name, ".reduc")]], - new.reduction.name = integrated.reduction.name - ) - - return(object.merge) -} - - FindBridgeAnchor <- function(object.list = NULL, bridge.object, object.reduction.list, From a2f1bb1f5330195bf0ced67ca0aff80669e29105 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sun, 12 Dec 2021 16:57:32 -0500 Subject: [PATCH 023/979] add default reduction --- R/integration.R | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/R/integration.R b/R/integration.R index ec8df3b29..86d66f229 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1567,6 +1567,7 @@ IntegrateEmbeddings.IntegrationAnchorSet <- function( reference.datasets <- slot(object = anchorset, name = 'reference.objects') object.list <- slot(object = anchorset, name = 'object.list') anchors <- slot(object = anchorset, name = 'anchors') + reductions <- reductions %||% slot(object = anchorset, name = 'weight.reduction') ValidateParams_IntegrateEmbeddings_IntegrationAnchors( anchorset = anchorset, object.list = object.list, @@ -5514,7 +5515,6 @@ ValidateParams_IntegrateEmbeddings_TransferAnchors <- function( NNtoGraph <- function(nn.object, ncol.nn = NULL, col.cells = NULL) { - select_nn <- nn.object@nn.idx ncol.nn <- ncol.nn %||% nrow(x = select_nn) col.cells <- col.cells %||% nn.object@cell.names @@ -5715,7 +5715,7 @@ BridgeCellsRepresentation <- function(object.list, } -FindBridgeAnchor <- function(object.list = NULL, +FindBridgeAnchor <- function(object.list, bridge.object, object.reduction.list, bridge.reduction.list, @@ -5723,9 +5723,9 @@ FindBridgeAnchor <- function(object.list = NULL, anchor.type = c("Integration", "Transfer")[1], reference = NULL, laplacian.reduction = "lap", + laplacian.dims = NULL, reduction = c("direct", "cca")[1], dims.cca = 1:30, - laplacian.dims = NULL, bridge.assay.name = "Bridge", verbose = TRUE) { if (!is.null(laplacian.reduction)) { @@ -5881,9 +5881,7 @@ TranferLablesNN <- function( ## RunLaplacian ### - - - + RunGraphLaplacian.Seurat <- function( object, graph, @@ -5891,7 +5889,6 @@ RunGraphLaplacian.Seurat <- function( reduction.key ="LAP_", n = 50 ) { - lap_dir <- RunGraphLaplacian(object = object[[graph]], n = n, reduction.key = reduction.key @@ -5909,8 +5906,7 @@ RunGraphLaplacian.default <- function(object, reduction.key ="LAP_", verbose = FALSE ) { - - D_half <- sqrt(rowSums(object)) + D_half <- sqrt(x = rowSums(x = object)) L <- -1 * (t(object / D_half) / D_half) diag(L) <- 1 + diag(L) L_eigen <- eigs_sym(L, k = n + 1, which = "SM") From bfd2ab600235ef9ca2d35fc3108d48b3030218a6 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sun, 12 Dec 2021 17:49:17 -0500 Subject: [PATCH 024/979] add anchor type message --- R/integration.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/integration.R b/R/integration.R index 86d66f229..27b981480 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5734,6 +5734,7 @@ FindBridgeAnchor <- function(object.list, bridge.method <- "bridge cells" } if (verbose) { + message("Finding ", anchor.type," anchors") switch( EXPR = bridge.method, "bridge graph" = { From e4774e09735d9e0ba4ec633ef6ed4824ebcc54a9 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sun, 12 Dec 2021 19:44:33 -0500 Subject: [PATCH 025/979] docu FindBridgeAnchors --- NAMESPACE | 8 +++-- R/integration.R | 66 +++++++++++++++++++++++++++++++++++++----- man/AnchorSet-class.Rd | 2 ++ 3 files changed, 66 insertions(+), 10 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 45fe59afa..e59bd61e7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -60,8 +60,6 @@ S3method(RenameCells,SlideSeq) S3method(RenameCells,VisiumV1) S3method(RunCCA,Seurat) S3method(RunCCA,default) -S3method(RunGraphLaplacian,Seurat) -S3method(RunGraphLaplacian,default) S3method(RunICA,Assay) S3method(RunICA,Seurat) S3method(RunICA,default) @@ -175,6 +173,7 @@ export(FeatureScatter) export(FetchData) export(FilterSlideSeq) export(FindAllMarkers) +export(FindBridgeAnchor) export(FindClusters) export(FindConservedMarkers) export(FindIntegrationAnchors) @@ -280,6 +279,7 @@ export(RidgePlot) export(RotatedAxis) export(RowMergeSparseMatrices) export(RunCCA) +export(RunGraphLaplacian) export(RunICA) export(RunLDA) export(RunMarkVario) @@ -367,16 +367,19 @@ importFrom(Matrix,as.matrix) importFrom(Matrix,colMeans) importFrom(Matrix,colSums) importFrom(Matrix,crossprod) +importFrom(Matrix,diag) importFrom(Matrix,readMM) importFrom(Matrix,rowMeans) importFrom(Matrix,rowSums) importFrom(Matrix,sparse.model.matrix) importFrom(Matrix,sparseMatrix) +importFrom(Matrix,t) importFrom(RANN,nn2) importFrom(RColorBrewer,brewer.pal) importFrom(RColorBrewer,brewer.pal.info) importFrom(ROCR,performance) importFrom(ROCR,prediction) +importFrom(RSpectra,eigs_sym) importFrom(Rcpp,evalCpp) importFrom(RcppAnnoy,AnnoyAngular) importFrom(RcppAnnoy,AnnoyEuclidean) @@ -621,7 +624,6 @@ importFrom(reticulate,py_module_available) importFrom(reticulate,py_set_seed) importFrom(rlang,"!!") importFrom(rlang,as_label) -importFrom(RSpectra, eigs_sym) importFrom(rlang,invoke) importFrom(scales,brewer_pal) importFrom(scales,hue_pal) diff --git a/R/integration.R b/R/integration.R index 27b981480..917b2f7b2 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5714,6 +5714,57 @@ BridgeCellsRepresentation <- function(object.list, return(object.list) } +#' Find bridge anchors between two modalities objects +#' +#' First, bridge object is used to reconstruct two single-modality profiles and +#' then project those cells into bridage graph laplacian space. +#' Next, find a set of anchors between two single-modality objects. These +#' anchors can later be used to integrate embeddings or transfer data from the reference to +#' query object using the \code{\link{MapQuery}} object. +#' +#' \itemize{ +#' \item{ Bridge cells reconstruction +#' } +#' \item{ Find anchors between objects. It can be either IntegrationAnchors or TransferAnchor. +#' } +#' } +#' +#' @param object.list A list of Seurat objects between which to +#' find anchors for downstream integration. +#' @param bridge.object A multimodal bridge seurat which connects two +#' single-modality objects +#' @param object.reduction.list A list of dimensional reductions from object.list used +#' to be reconstructed by bridge.obejct +#' @param bridge.reduction.list A list of dimensional reductions from bridge.object used +#' to reconstruct object.reduction.list +#' @param dims.list A list of dimensions to use for object.reduction.list and +#' bridge.reduction.list +#' @param anchor.type The type of anchors. Can +#' be one of: +#' \itemize{ +#' \item{Integration: Generate IntegrationAnchors for integration} +#' \item{Transfer: Generate TransferAnchors for transfering data} +#' } +#' @param reference A vector specifying the object/s to be used as a reference +#' during integration or transfer data. +#' @param laplacian.reduction Name of bridge graph laplacian dimensional reduction +#' @param laplacian.dims Dimensions used for bridge graph laplacian dimensional reduction +#' @param reduction Dimensional reduction to perform when finding anchors. Can +#' be one of: +#' \itemize{ +#' \item{cca: Canonical correlation analysis} +#' \item{direct: Use assay data as a dimensional reduction} +#' } +#' @param bridge.assay.name Assay name used for bridge object reconstruction value +#' @param verbose Print messages and progress +#' @param ... Additional parameters passed to \code{FindIntegrationAnchors} or +#' \code{FindTransferAnchors} +#' +#' +#' @return Returns an \code{\link{AnchorSet}} object that can be used as input to +#' \code{\link{IntegrateEmbeddings}}.or \code{\link{MapQuery}} +#' @export +#' FindBridgeAnchor <- function(object.list, bridge.object, @@ -5725,9 +5776,10 @@ FindBridgeAnchor <- function(object.list, laplacian.reduction = "lap", laplacian.dims = NULL, reduction = c("direct", "cca")[1], - dims.cca = 1:30, - bridge.assay.name = "Bridge", - verbose = TRUE) { + bridge.assay.name = "Bridge", + verbose = TRUE, + ... + ) { if (!is.null(laplacian.reduction)) { bridge.method <- "bridge graph" } else { @@ -5798,8 +5850,8 @@ FindBridgeAnchor <- function(object.list, reference = reference, reduction = "cca", scale = FALSE, - dims = dims.cca, - verbose = verbose) + verbose = verbose, + ...) slot( object = anchor, name = "weight.reduction" @@ -5815,8 +5867,8 @@ FindBridgeAnchor <- function(object.list, reduction = "cca", scale = FALSE, k.filter = NA, - dims = dims.cca, - verbose = verbose + verbose = verbose, + ... ) } ) diff --git a/man/AnchorSet-class.Rd b/man/AnchorSet-class.Rd index 25c7e4962..37c7687d0 100644 --- a/man/AnchorSet-class.Rd +++ b/man/AnchorSet-class.Rd @@ -28,6 +28,8 @@ the anchor.} \item{\code{offsets}}{The offsets used to enable cell look up in downstream functions} +\item{\code{weight.reduction}}{The weight dimensional reduction used to calculate weight matrix} + \item{\code{anchor.features}}{The features used when performing anchor finding.} \item{\code{neighbors}}{List containing Neighbor objects for reuse later (e.g. mapping)} From 7ee761fbe5e1de5f100df5012d0b0b62d347336d Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sun, 12 Dec 2021 20:11:50 -0500 Subject: [PATCH 026/979] docu RunGraphLaplacian --- NAMESPACE | 2 ++ R/generics.R | 12 +++++++----- R/integration.R | 49 +++++++++++++++++++++++++++++++++++++++---------- 3 files changed, 48 insertions(+), 15 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index e59bd61e7..42e578419 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -60,6 +60,8 @@ S3method(RenameCells,SlideSeq) S3method(RenameCells,VisiumV1) S3method(RunCCA,Seurat) S3method(RunCCA,default) +S3method(RunGraphLaplacian,Seurat) +S3method(RunGraphLaplacian,default) S3method(RunICA,Assay) S3method(RunICA,Seurat) S3method(RunICA,default) diff --git a/R/generics.R b/R/generics.R index 1f432b350..4ad5ff13f 100644 --- a/R/generics.R +++ b/R/generics.R @@ -395,12 +395,14 @@ RunCCA <- function(object1, object2, ...) { #' Run Graph Laplacian Eigendecomposition #' -#' Run a graph dimensionality reductionxxxxx -#' xxxxxx -#' @param object An object -#' @param ... Arguments passed to other methods and IRLBA +#' Run a graph laplacian dimensionality reduction. It is used as a low dimensional +#' representation for a cell-cell graph. The input graph should be symmetric +#' +#' @param object A Seurat object +#' @param ... Arguments passed to RSpectra eigs_sym #' -#' @return Returns Seurat object with the SPCA calculation stored in the reductions slot +#' @return Returns Seurat object with the Graph laplacian eigenvector calculation +#' stored in the reductions slot #' @export #' #' @rdname RunGraphLaplacian diff --git a/R/integration.R b/R/integration.R index 917b2f7b2..dbca276cb 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5727,9 +5727,9 @@ BridgeCellsRepresentation <- function(object.list, #' } #' \item{ Find anchors between objects. It can be either IntegrationAnchors or TransferAnchor. #' } -#' } +#' } #' -#' @param object.list A list of Seurat objects between which to +#' @param object.list A list of Seurat objects between which to #' find anchors for downstream integration. #' @param bridge.object A multimodal bridge seurat which connects two #' single-modality objects @@ -5932,37 +5932,66 @@ TranferLablesNN <- function( -## RunLaplacian -### - +#' @param reduction.name dimensional reduction name, lap by default +#' @param graph The name of graph +#' @rdname RunGraphLaplacian +#' @concept dimensional_reduction +#' @export +#' @method RunGraphLaplacian Seurat +#' RunGraphLaplacian.Seurat <- function( object, graph, reduction.name = "lap", reduction.key ="LAP_", - n = 50 + n = 50, + verbose = TRUE, + ... ) { lap_dir <- RunGraphLaplacian(object = object[[graph]], n = n, - reduction.key = reduction.key + reduction.key = reduction.key , + verbose = verbose, + ... ) object[[reduction.name]] <- lap_dir return(object) } - + + +#' @param n Total Number of Eigenvectors to compute and store (50 by default) +#' @param reduction.key dimensional reduction key, specifies the string before +#' the number for the dimension names. LAP by default +#' @param verbose Print message and process +#' +#' +#' @concept dimensional_reduction +#' @rdname RunGraphLaplacian +#' @export +#' #' @importFrom Matrix diag t rowSums #' @importFrom RSpectra eigs_sym RunGraphLaplacian.default <- function(object, n = 50, reduction.key ="LAP_", - verbose = FALSE + verbose = TRUE, + ... ) { + if (!all(t(x = object)@x == object@x)) { + step("Input graph is not symmetric") + } + if (verbose) { + message("Generating normalized laplacian graph") + } D_half <- sqrt(x = rowSums(x = object)) L <- -1 * (t(object / D_half) / D_half) diag(L) <- 1 + diag(L) - L_eigen <- eigs_sym(L, k = n + 1, which = "SM") + if (verbose) { + message("Performing eigendecomposition of the normalized laplacian graph") + } + L_eigen <- eigs_sym(L, k = n + 1, which = "SM", ...) new_order <- n:1 lap_output <- list(eigen_vector = Re(L_eigen$vectors[, new_order]), eigen_value = L_eigen$values[new_order] From 5c4607a290bf7e65a3e96161a3846595c6910640 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 13 Dec 2021 08:11:05 -0500 Subject: [PATCH 027/979] add k.anchor k.score --- R/integration.R | 163 +++++++++++++++++++++++++++--------------------- 1 file changed, 93 insertions(+), 70 deletions(-) diff --git a/R/integration.R b/R/integration.R index dbca276cb..2ba6c9b17 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5533,15 +5533,21 @@ NNtoGraph <- function(nn.object, ncol.nn = NULL, col.cells = NULL) { return( nn.matrix) } + +# Find Anchor direct from assay +# +# +# @return Returns an Integration or TranserAnchor set + FindDirectAnchor <- function( object.list, - reference = NULL, - anchor.type = c("Integration", "Transfer")[1], + reference = NULL, + anchor.type = c("Integration", "Transfer")[1], assay = "Bridge", - slot = "data", - reduction = NULL, - k.anchor = 20, - k.score = 50, + slot = "data", + reduction = NULL, + k.anchor = 20, + k.score = 50, verbose = TRUE ) { reduction.name <- reduction %||% paste0(assay, ".reduc") @@ -5554,12 +5560,12 @@ FindDirectAnchor <- function( slot = slot, assay = assay )), - key = "L_", - assay = assay + key = "L_", + assay = assay ) - } + } DefaultAssay(x) <- assay - x <- DietSeurat(x, assays = assay, dimreducs = reduction.name ) + x <- DietSeurat(x, assays = assay, dimreducs = reduction.name) return(x) } ) @@ -5567,17 +5573,20 @@ FindDirectAnchor <- function( object.both <- merge(object.list[[1]], object.list[[2]], merge.dr = reduction.name) objects.ncell <- sapply(X = object.list, FUN = function(x) dim(x = x)[2]) offsets <- as.vector(x = cumsum(x = c(0, objects.ncell)))[1:length(x = object.list)] + if (verbose) { + message("Finding ", anchor.type," anchors from assay ", assay) + } anchors <- FindAnchors(object.pair = object.both, - assay = DefaultAssay(object.both), - slot = 'data', - cells1 = colnames(object.list[[1]]), + assay = DefaultAssay(object.both), + slot = 'data', + cells1 = colnames(object.list[[1]]), cells2 = colnames(object.list[[2]]), internal.neighbors = NULL, reduction = reduction.name, k.anchor = k.anchor, k.score = k.score, - dims = 1:ncol(object.both[[reduction.name]]), - k.filter = NA, + dims = 1:ncol(object.both[[reduction.name]]), + k.filter = NA, verbose = verbose ) inte.anchors <- anchors @@ -5585,22 +5594,25 @@ FindDirectAnchor <- function( inte.anchors[, 2] <- inte.anchors[, 2] + offsets[2] # determine all anchors inte.anchors <- rbind(inte.anchors, inte.anchors[, c(2, 1, 3)]) - inte.anchors <- AddDatasetID(anchor.df = inte.anchors, offsets = offsets, obj.lengths = objects.ncell) + inte.anchors <- AddDatasetID( + anchor.df = inte.anchors, + offsets = offsets, + obj.lengths = objects.ncell + ) command <- LogSeuratCommand(object = object.list[[1]], return.command = TRUE) - anchor.features <- rownames(object.both) if (anchor.type == "Integration") { anchor.set <- new(Class = "IntegrationAnchorSet", object.list = object.list, reference.objects = reference %||% seq_along(object.list), anchors = inte.anchors, - weight.reduction = object.both[[reduction.name]], + weight.reduction = object.both[[reduction.name]], offsets = offsets, anchor.features = anchor.features, command = command ) } else if (anchor.type == "Transfer") { - reference.index <- reference + reference.index <- reference reference <- object.list[[reference.index]] query <- object.list[[setdiff(c(1,2), reference.index)]] query <- RenameCells( @@ -5630,26 +5642,26 @@ FindDirectAnchor <- function( } -#' +#' Use bridge cells to represent single-modality object #' #' #' #' #' #' @importFrom MASS ginv -#‘ internel +#‘ BridgeCellsRepresentation <- function(object.list, - bridge.object, + bridge.object, object.reduction.list, bridge.reduction.list, dims.list, - laplacian.reduction = NULL, + laplacian.reduction = NULL, laplacian.dims = NULL, - bridge.assay.name = "Bridge", - return.all.assays = FALSE, - l2.norm = TRUE, - do.center = FALSE, + bridge.assay.name = "Bridge", + return.all.assays = FALSE, + l2.norm = TRUE, + do.center = FALSE, verbose = TRUE ) { laplacian.dims <- laplacian.dims %||% 1:ncol(bridge.object[[laplacian.reduction]]) @@ -5676,20 +5688,20 @@ BridgeCellsRepresentation <- function(object.list, object = object.list[[x]], reduction = object.reduction.list[[x]] )[, 1:length(x = dims.list[[x]])] %*% (SA.inv %*% lap.vector) - } else { X <- Embeddings( - object = object.list[[x]], + object = object.list[[x]], reduction = object.reduction.list[[x]] )[, 1:length(x = dims.list[[x]])] %*% SA.inv colnames(X) <- Cells(bridge.object) } - if (l2.norm) { X <- L2Norm(mat = X, MARGIN = 1) } colnames(x = X) <- paste0('bridge_', colnames(x = X)) - suppressWarnings(object.list[[x]][[bridge.assay.name]] <- CreateAssayObject(data = t(X))) + suppressWarnings( + object.list[[x]][[bridge.assay.name]] <- CreateAssayObject(data = t(X)) + ) object.list[[x]][[bridge.assay.name]]@misc$SA.inv <- SA.inv DefaultAssay(object.list[[x]]) <- bridge.assay.name VariableFeatures(object = object.list[[x]]) <- rownames(object.list[[x]]) @@ -5756,6 +5768,8 @@ BridgeCellsRepresentation <- function(object.list, #' \item{direct: Use assay data as a dimensional reduction} #' } #' @param bridge.assay.name Assay name used for bridge object reconstruction value +#' @param k.anchor How many neighbors (k) to use when picking anchors +#' @param k.score How many neighbors (k) to use when scoring anchors #' @param verbose Print messages and progress #' @param ... Additional parameters passed to \code{FindIntegrationAnchors} or #' \code{FindTransferAnchors} @@ -5767,16 +5781,18 @@ BridgeCellsRepresentation <- function(object.list, #' FindBridgeAnchor <- function(object.list, - bridge.object, + bridge.object, object.reduction.list, bridge.reduction.list, - dims.list, - anchor.type = c("Integration", "Transfer")[1], + dims.list, + anchor.type = c("Integration", "Transfer")[1], reference = NULL, laplacian.reduction = "lap", laplacian.dims = NULL, reduction = c("direct", "cca")[1], bridge.assay.name = "Bridge", + k.anchor = 20, + k.score = 50, verbose = TRUE, ... ) { @@ -5803,16 +5819,15 @@ FindBridgeAnchor <- function(object.list, } bridge.reduction.name <- paste0(bridge.assay.name, ".reduc") - object.list <- BridgeCellsRepresentation( object.list = object.list , bridge.object = bridge.object, - object.reduction.list = object.reduction.list, - bridge.reduction.list = bridge.reduction.list, + object.reduction.list = object.reduction.list, + bridge.reduction.list = bridge.reduction.list, dims.list = dims.list, - bridge.assay.name = bridge.assay.name, - laplacian.reduction = laplacian.reduction, - laplacian.dims = laplacian.dims, + bridge.assay.name = bridge.assay.name, + laplacian.reduction = laplacian.reduction, + laplacian.dims = laplacian.dims, verbose = verbose ) # assay to dimensional reduction @@ -5825,32 +5840,36 @@ FindBridgeAnchor <- function(object.list, slot = "data", assay = bridge.assay.name )), - key = "L_", + key = "L_", assay = bridge.assay.name ) return(x) } ) - if (reduction == "direct") { anchor <- FindDirectAnchor( object.list = object.list , - reference = reference, - slot = "data", + reference = reference, + slot = "data", anchor.type = anchor.type, reduction = bridge.reduction.name, - assay = bridge.assay.name + assay = bridge.assay.name, + k.anchor = k.anchor, + k.score = k.score, + verbose = verbose ) } else if (reduction == "cca") { anchor <- switch(EXPR = anchor.type, "Integration" = { anchor <- FindIntegrationAnchors( - object.list = object.list, + object.list = object.list, k.filter = NA, - reference = reference, - reduction = "cca", + reference = reference, + reduction = "cca", scale = FALSE, - verbose = verbose, + k.anchor = k.anchor, + k.score = k.score, + verbose = verbose, ...) slot( object = anchor, @@ -5862,11 +5881,13 @@ FindBridgeAnchor <- function(object.list, }, "Transfer" = { anchor <- FindTransferAnchors( - reference = object.list[[reference]], + reference = object.list[[reference]], query = object.list[[query]], reduction = "cca", - scale = FALSE, + scale = FALSE, k.filter = NA, + k.anchor = k.anchor, + k.score = k.score, verbose = verbose, ... ) @@ -5874,7 +5895,7 @@ FindBridgeAnchor <- function(object.list, ) } slot(object = anchor, name = "command") <- LogSeuratCommand( - object = object.list[[1]], + object = object.list[[1]], return.command = TRUE ) return(anchor) @@ -5924,7 +5945,7 @@ TranferLablesNN <- function( query.label.score <- apply(X = query.label.mat, MARGIN = 1, FUN = max) output.list <- list(labels = query.label, - scores = query.label.score, + scores = query.label.score, prediction.mat = query.label.mat ) return(output.list) @@ -5940,21 +5961,20 @@ TranferLablesNN <- function( #' @method RunGraphLaplacian Seurat #' RunGraphLaplacian.Seurat <- function( - object, - graph, - reduction.name = "lap", - reduction.key ="LAP_", - n = 50, - verbose = TRUE, + object, + graph, + reduction.name = "lap", + reduction.key ="LAP_", + n = 50, + verbose = TRUE, ... ) { lap_dir <- RunGraphLaplacian(object = object[[graph]], - n = n, - reduction.key = reduction.key , - verbose = verbose, + n = n, + reduction.key = reduction.key , + verbose = verbose, ... ) - object[[reduction.name]] <- lap_dir return(object) } @@ -5973,13 +5993,15 @@ RunGraphLaplacian.Seurat <- function( #' #' @importFrom Matrix diag t rowSums #' @importFrom RSpectra eigs_sym -RunGraphLaplacian.default <- function(object, - n = 50, - reduction.key ="LAP_", - verbose = TRUE, +RunGraphLaplacian.default <- function(object, + n = 50, + reduction.key ="LAP_", + verbose = TRUE, ... ) { - if (!all(t(x = object)@x == object@x)) { + if (!all( + slot(object = t(x = object), name = "x") == slot(object = object, name = "x") + )) { step("Input graph is not symmetric") } if (verbose) { @@ -5991,9 +6013,10 @@ RunGraphLaplacian.default <- function(object, if (verbose) { message("Performing eigendecomposition of the normalized laplacian graph") } - L_eigen <- eigs_sym(L, k = n + 1, which = "SM", ...) + L_eigen <- eigs_sym(L, k = n + 1, which = "SM", ...) + #delete the first eigen vector new_order <- n:1 - lap_output <- list(eigen_vector = Re(L_eigen$vectors[, new_order]), + lap_output <- list(eigen_vector = Re(L_eigen$vectors[, new_order]), eigen_value = L_eigen$values[new_order] ) rownames(lap_output$eigen_vector) <- colnames(object) From 31851f537dafd8dbeba0e48c81b245a6ac511cd4 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 13 Dec 2021 14:12:43 -0500 Subject: [PATCH 028/979] add leverage score --- R/integration.R | 172 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 172 insertions(+) diff --git a/R/integration.R b/R/integration.R index 2ba6c9b17..7123cb08c 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6029,4 +6029,176 @@ RunGraphLaplacian.default <- function(object, return(lap_dir) } + +#' @importFrom Matrix sparseMatrix + +CountSketch <- function(nrow, ncol, seed = 123) { + set.seed(seed = seed) + iv <- xv <- vector(mode = "numeric", length = ncol) + jv <- seq_len(length.out = ncol) + for (i in jv) { + iv[i] <- sample(x = seq_len(length.out = nrow), size = 1L) + xv[i] <- sample(x = c(-1L, 1L), size = 1L) + } + return(sparseMatrix( + i = iv, + j = jv, + x = xv + )) +} + + +LiProj <- function(nrow, ncol, eps = 0.1, seed = NA) { + if (!is.na(x = seed)) { + set.seed(seed = seed) + } + s <- ceiling(x = sqrt(x = ncol)) + prob <- c( + 1 / (2 * s), + 1 - (1 / s), + 1 / (2 * s) + ) + return(matrix( + data = sample( + x = seq.int(from = -1L, to = 1L), + size = nrow * ncol, + replace = TRUE, + prob = prob + ), + nrow = nrow + )) +} + + +LiProj <- function(nrow, ncol, eps = 0.1, seed = NA) { + if (!is.na(x = seed)) { + set.seed(seed = seed) + } + if (!is.null(x = eps)) { + if (eps > 1 || eps <= 0) { + stop("'eps' must be 0 < eps <= 1") + } + ncol <- floor(x = 4 * log(x = ncol) / ((eps ^ 2) / 2 - (eps ^ 3 / 3))) + } + s <- ceiling(x = sqrt(x = ncol)) + prob <- c( + 1 / (2 * s), + 1 - (1 / s), + 1 / (2 * s) + ) + return(matrix( + data = sample( + x = seq.int(from = -1L, to = 1L), + size = nrow * ncol, + replace = TRUE, + prob = prob + ), + nrow = nrow + )) +} + +JLEmbed <- function(nrow, ncol, eps = 0.1, seed = NA, method = "li") { + if (!is.na(x = seed)) { + set.seed(seed = seed) + } + method <- method[1L] + method <- match.arg(arg = method) + if (!is.null(x = eps)) { + if (eps > 1 || eps <= 0) { + stop("'eps' must be 0 < eps <= 1") + } + ncol <- floor(x = 4 * log(x = ncol) / ((eps ^ 2) / 2 - (eps ^ 3 / 3))) + } + m <- switch( + EXPR = method, + "li" = { + s <- ceiling(x = sqrt(x = ncol)) + prob <- c( + 1 / (2 * s), + 1 - (1 / s), + 1 / (2 * s) + ) + matrix( + data = sample( + x = seq.int(from = -1L, to = 1L), + size = nrow * ncol, + replace = TRUE, + prob = prob + ), + nrow = nrow + ) + } + ) + return(m) +} + + +LeverageScore.default <- function( + object, + features = NULL, + nsketch = 5000L, + ndims = 200L, + sampling.method = c("CountSketch", "Gaussian"), + MARGIN = 2L, + verbose = TRUE, + ... +) { + MARGIN <- MARGIN %/% 1L + if (!MARGIN %in% seq.int(from = 1L, to = 2L)) { + stop("'MARGIN' must be either 1 or 2") + } + sampling.method <- sampling.method[1L] + sampling.method <- match.arg(arg = sampling.method) + if (isTRUE(x = verbose)) { + message(sampling.method, " sampling ", nsketch, " cells") + } + ncells <- dim(x = object)[[MARGIN]] + S <- switch( + EXPR = sampling.method, + "CountSketch" = CountSketch(nrow = nsketch, ncol = ncells), + "Gaussian" = matrix( + data = rnorm(n = nsketch * ncells, mean = 0L, sd = 1 / ncells ^ 2), + nrow = nsketch, + ncol = ncells + ) + ) + if (!is.null(x = features)) { + object <- if (MARGIN == 1L) { + object[, features, drop = FALSE] + } else { + object[features, , drop = FALSE] + } + } + if (MARGIN == 2L) { + tf <- tryCatch( + expr = methods::slot( + object = methods::selectMethod(f = "t", signature = class(x = object)), + name = ".Data" + ), + error = function(...) { + return(base::t) + } + ) + object <- tf(object) + } + sa <- S %*% object + qr.sa <- base::qr(x = sa) + R <- if (inherits(x = qr.sa, what = "sparseQR")) { + Matrix::qrR(qr = qr.sa) + } else { + base::qr.R(qr = qr.sa) + } + R.inv <- solve(a = R) + JL <- SeuratObject::as.sparse(x = JLEmbed( + nrow = ncol(x = R.inv), + ncol = ndims, + eps = 0.5 + )) + if (isTRUE(x = verbose)) { + message("Random projection") + } + Z <- object %*% (R.inv %*% JL) + return(rowSums(x = Z ^ 2)) +} + \ No newline at end of file From 3511ce412be58b04473b8a9e8392dce50a809ff3 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 13 Dec 2021 15:14:25 -0500 Subject: [PATCH 029/979] leverage generic --- NAMESPACE | 4 ++ R/generics.R | 12 +++++ R/integration.R | 119 +++++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 123 insertions(+), 12 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 42e578419..0edefe1c8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -43,6 +43,9 @@ S3method(GetTissueCoordinates,VisiumV1) S3method(HVFInfo,SCTAssay) S3method(IntegrateEmbeddings,IntegrationAnchorSet) S3method(IntegrateEmbeddings,TransferAnchorSet) +S3method(LeverageScore,Assay) +S3method(LeverageScore,Seurat) +S3method(LeverageScore,default) S3method(MappingScore,AnchorSet) S3method(MappingScore,default) S3method(NormalizeData,Assay) @@ -221,6 +224,7 @@ export(L2CCA) export(L2Dim) export(LabelClusters) export(LabelPoints) +export(LeverageScore) export(LinkedDimPlot) export(LinkedFeaturePlot) export(Load10X_Spatial) diff --git a/R/generics.R b/R/generics.R index 4ad5ff13f..9597f3c7d 100644 --- a/R/generics.R +++ b/R/generics.R @@ -302,6 +302,18 @@ IntegrateEmbeddings <- function(anchorset, ...) { UseMethod(generic = "IntegrateEmbeddings", object = anchorset) } + +#' Leverage score +#' xxxx +#' xxxx +#' @param object An object +#' @export LeverageScore +#' +LeverageScore <- function(object, ...) { + UseMethod(generic = 'LeverageScore', object = object) +} + + #' Metric for evaluating mapping success #' #' This metric was designed to help identify query cells that aren't well diff --git a/R/integration.R b/R/integration.R index 7123cb08c..206948a68 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6132,17 +6132,25 @@ JLEmbed <- function(nrow, ncol, eps = 0.1, seed = NA, method = "li") { return(m) } - +#' xxxxx +#' +#' +#' @rdname LeverageScore +#' @export LeverageScore.default <- function( object, features = NULL, nsketch = 5000L, - ndims = 200L, + ndims = NULL, sampling.method = c("CountSketch", "Gaussian"), MARGIN = 2L, + eps = 0.5, + seed = 123, verbose = TRUE, ... ) { + features <- features %||% rownames(object) + ndims <- ndims %||% ncol(x = object) MARGIN <- MARGIN %/% 1L if (!MARGIN %in% seq.int(from = 1L, to = 2L)) { stop("'MARGIN' must be either 1 or 2") @@ -6155,12 +6163,15 @@ LeverageScore.default <- function( ncells <- dim(x = object)[[MARGIN]] S <- switch( EXPR = sampling.method, - "CountSketch" = CountSketch(nrow = nsketch, ncol = ncells), - "Gaussian" = matrix( - data = rnorm(n = nsketch * ncells, mean = 0L, sd = 1 / ncells ^ 2), - nrow = nsketch, - ncol = ncells - ) + "CountSketch" = CountSketch(nrow = nsketch, ncol = ncells, seed = seed), + "Gaussian" = { + set.seed(seed) + matrix( + data = rnorm(n = nsketch * ncells, mean = 0L, sd = 1 / (ncells ^ 2)), + nrow = nsketch, + ncol = ncells + ) + } ) if (!is.null(x = features)) { object <- if (MARGIN == 1L) { @@ -6181,6 +6192,7 @@ LeverageScore.default <- function( ) object <- tf(object) } + # row of object is cell, col of matrix is feature sa <- S %*% object qr.sa <- base::qr(x = sa) R <- if (inherits(x = qr.sa, what = "sparseQR")) { @@ -6189,16 +6201,99 @@ LeverageScore.default <- function( base::qr.R(qr = qr.sa) } R.inv <- solve(a = R) + if (isTRUE(x = verbose)) { + message("Random projection") + } JL <- SeuratObject::as.sparse(x = JLEmbed( nrow = ncol(x = R.inv), ncol = ndims, - eps = 0.5 + eps = eps, + seed = seed )) - if (isTRUE(x = verbose)) { - message("Random projection") - } Z <- object %*% (R.inv %*% JL) return(rowSums(x = Z ^ 2)) } +#' ssssxxxxx +#' +#' @rdname LeverageScore +#' @export +#' @method LeverageScore Assay +#' +#' +LeverageScore.Assay <- function(object, + features = NULL, + nsketch = 5000L, + ndims = NULL, + sampling.method = c("CountSketch", "Gaussian")[1], + slot = "data", + seed = 123, + eps = 0.5, + verbose = TRUE, + ...) { + features <- features %||% VariableFeatures(object = object) + ndims <- ndims%||%ncol(object) + data <- GetAssayData(object, slot = slot)[features,] + score <- LeverageScore( + object = data, + features = features, + nsketch = nsketch, + ndims = ndims, + sampling.method = sampling.method, + seed = seed, + eps = eps, + verbose = verbose, + ... + ) +return(score) +} + +#' ssssxxxxx +#' +#' @rdname LeverageScore +#' @export +#' @method LeverageScore Seurat +#' +LeverageScore.Seurat <- function(object, + features = NULL, + assay = NULL, + nsketch = 5000L, + ndims = NULL, + var.name = "leverage.score", + sampling.method = c("CountSketch", "Gaussian")[1], + slot = "data", + eps = 0.5, + seed = 123, + verbose = TRUE, + ... +) { + assay <- assay%||% DefaultAssay(object) + features <- features %||% VariableFeatures(object) + ndims <- ndims %||% ncol(object) + + if (is.null(features)) { + stop("No variable features are set. Please run FindVariableFeatures.") + } + if (var.name %in% colnames(object[[]])) { + var.name.exist <- var.name + var.name <- rev(make.unique(colnames(object[[]]), var.name.exist))[1] + warning(var.name.exist, " is already existed in the meta.data. ", + var.name, " will store leverage score value") + } + object[[var.name]] <- LeverageScore( + object = GetAssay(object = object, assay = assay), + features = features, + nsketch = nsketch, + ndims = ndims, + sampling.method = sampling.method, + seed = seed, + slot = slot, + eps = eps, + verbose = verbose, + ... + ) + return(object) +} + + \ No newline at end of file From b5a1fe9f6c1c3240edd26d3575e5b92496ba19ae Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 13 Dec 2021 16:23:08 -0500 Subject: [PATCH 030/979] add leverage score sampling --- R/integration.R | 87 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 71 insertions(+), 16 deletions(-) diff --git a/R/integration.R b/R/integration.R index 206948a68..c46781f79 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6144,13 +6144,25 @@ LeverageScore.default <- function( ndims = NULL, sampling.method = c("CountSketch", "Gaussian"), MARGIN = 2L, - eps = 0.5, + eps = 0.5, seed = 123, verbose = TRUE, ... ) { - features <- features %||% rownames(object) + features <- features %||% rownames(x = object) + if (length(x = features) > 5000) { + stop("when the number of feature is larger than 5000, this implementation will be too slow") + } + if (length(x = features) > ncol(x = object)/1.1) { + stop("the number of features is too close to the number cells in the object") + } ndims <- ndims %||% ncol(x = object) + if (nsketch < 1.1*length(x = features)) { + warning("nsketch is too close to the number of features", + "nsketch is reset to ", round(1.1*length(x = features))) + nsketch <- 1.1*length(x = features) + } + nsketch <- min(nsketch, ndims) MARGIN <- MARGIN %/% 1L if (!MARGIN %in% seq.int(from = 1L, to = 2L)) { stop("'MARGIN' must be either 1 or 2") @@ -6192,6 +6204,9 @@ LeverageScore.default <- function( ) object <- tf(object) } + if (verbose) { + message("Performing QR decomposition of the sketch matrix") + } # row of object is cell, col of matrix is feature sa <- S %*% object qr.sa <- base::qr(x = sa) @@ -6226,13 +6241,13 @@ LeverageScore.Assay <- function(object, nsketch = 5000L, ndims = NULL, sampling.method = c("CountSketch", "Gaussian")[1], - slot = "data", - seed = 123, - eps = 0.5, - verbose = TRUE, + slot = "data", + seed = 123, + eps = 0.5, + verbose = TRUE, ...) { features <- features %||% VariableFeatures(object = object) - ndims <- ndims%||%ncol(object) + ndims <- ndims%||%ncol(x = object) data <- GetAssayData(object, slot = slot)[features,] score <- LeverageScore( object = data, @@ -6267,19 +6282,14 @@ LeverageScore.Seurat <- function(object, verbose = TRUE, ... ) { - assay <- assay%||% DefaultAssay(object) - features <- features %||% VariableFeatures(object) - ndims <- ndims %||% ncol(object) + assay <- assay %||% DefaultAssay(object) + features <- features %||% VariableFeatures(object = object[[assay]]) + ndims <- ndims %||% ncol(x = object) if (is.null(features)) { stop("No variable features are set. Please run FindVariableFeatures.") } - if (var.name %in% colnames(object[[]])) { - var.name.exist <- var.name - var.name <- rev(make.unique(colnames(object[[]]), var.name.exist))[1] - warning(var.name.exist, " is already existed in the meta.data. ", - var.name, " will store leverage score value") - } + var.name <- CheckMetaVarName(object = object, var.name = var.name) object[[var.name]] <- LeverageScore( object = GetAssay(object = object, assay = assay), features = features, @@ -6294,6 +6304,51 @@ LeverageScore.Seurat <- function(object, ) return(object) } +CheckMetaVarName <- function(object, var.name) { + if (var.name %in% colnames(x = object[[]])) { + var.name.exist <- var.name + var.name <- rev( + x = make.unique( + names = c(colnames(object[[]]), var.name.exist) + ) + )[1] + warning(var.name.exist, " is already existed in the meta.data. ", + var.name, " will store leverage score value") + } + return(var.name) +} +#' +#' +#' +#' +#' @return Returns a sub-sampled seurat object +LeverageScoreSampling <- function( + object, + assay = NULL, + features = NULL, + var.name = "leverage.score", + seed = 123, + num.cells = 5000, + ...) { + var.name <- CheckMetaVarName(object = object, var.name = var.name) + object <- LeverageScore( + object = object, + assay = assay, + features = features, + var.name = var.name, + seed = seed, + ... + ) + num.cells <- min(num.cells, ncol(x = object)) + set.seed(seed) + sampled.cells <- sample(x = Cells(x = object), + size = num.cells, + prob = object[[var.name]][,1] + ) + object.sampled <- subset(x = object, cells = sampled.cells) + return(object.sampled) +} + \ No newline at end of file From 953521e847474c43c9d48d7c22634527fa741448 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 13 Dec 2021 17:07:25 -0500 Subject: [PATCH 031/979] export LeverageScoreSampling --- NAMESPACE | 1 + R/integration.R | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 0edefe1c8..d1452fa4e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -225,6 +225,7 @@ export(L2Dim) export(LabelClusters) export(LabelPoints) export(LeverageScore) +export(LeverageScoreSampling) export(LinkedDimPlot) export(LinkedFeaturePlot) export(Load10X_Spatial) diff --git a/R/integration.R b/R/integration.R index c46781f79..24e70076b 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6319,11 +6319,13 @@ CheckMetaVarName <- function(object, var.name) { } +#' LeverageScoreSampling #' #' -#' -#' +#' #' @return Returns a sub-sampled seurat object +#' @export +#' LeverageScoreSampling <- function( object, assay = NULL, From 368a3d552fecbf2135c6d2dde5992a21f98f3760 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 14 Dec 2021 00:08:34 -0500 Subject: [PATCH 032/979] docu LeverageScoreSampling --- R/integration.R | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/R/integration.R b/R/integration.R index 24e70076b..ad0b2d2b3 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6134,7 +6134,7 @@ JLEmbed <- function(nrow, ncol, eps = 0.1, seed = NA, method = "li") { #' xxxxx #' -#' +#' #' @rdname LeverageScore #' @export LeverageScore.default <- function( @@ -6304,6 +6304,9 @@ LeverageScore.Seurat <- function(object, ) return(object) } + +# Check if the var.name already existed in the meta.data +# CheckMetaVarName <- function(object, var.name) { if (var.name %in% colnames(x = object[[]])) { var.name.exist <- var.name @@ -6319,20 +6322,26 @@ CheckMetaVarName <- function(object, var.name) { } -#' LeverageScoreSampling -#' -#' +#' Subset objects based on Leverage score +#' +#' @param object A seurat object +#' @param num.cells Number of sampled cells +#' @param assay Assay used to calculate leverage score +#' @param features Features used to calculate leverage score +#' @param var.name Variable name stored leverage score in the meta.data +#' @param seed Set a random seed.By default, sets the seed to 123 +#' @param ... Arguments passed to LeverageScore #' #' @return Returns a sub-sampled seurat object #' @export #' LeverageScoreSampling <- function( object, + num.cells = 5000, assay = NULL, features = NULL, var.name = "leverage.score", seed = 123, - num.cells = 5000, ...) { var.name <- CheckMetaVarName(object = object, var.name = var.name) object <- LeverageScore( From f6a2a4a66d006368124982cfe110a3fc238b16b6 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 14 Dec 2021 00:24:30 -0500 Subject: [PATCH 033/979] add docu LeverageScore.default --- NAMESPACE | 1 + R/integration.R | 19 +++++++++++++++---- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index d1452fa4e..d9740510f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -375,6 +375,7 @@ importFrom(Matrix,colMeans) importFrom(Matrix,colSums) importFrom(Matrix,crossprod) importFrom(Matrix,diag) +importFrom(Matrix,qrR) importFrom(Matrix,readMM) importFrom(Matrix,rowMeans) importFrom(Matrix,rowSums) diff --git a/R/integration.R b/R/integration.R index ad0b2d2b3..035ab7fd3 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6030,6 +6030,8 @@ RunGraphLaplacian.default <- function(object, } + + #' @importFrom Matrix sparseMatrix CountSketch <- function(nrow, ncol, seed = 123) { @@ -6132,9 +6134,18 @@ JLEmbed <- function(nrow, ncol, eps = 0.1, seed = NA, method = "li") { return(m) } -#' xxxxx +#' @param features Features used to calculate leverage score +#' @param nsketch Number of rows in the random sketch matrix +#' @param ndims Number of dimensions in the JL embeddings +#' @param sampling.method Sampling method for generating random matrix +#' @param MARGIN Margin +#' @param eps error tolerance for JL embeddings +#' @param seed Set a random seed +#' @param verbose Print message and process +#' @param ... #' -#' +#' @importFrom Matrix qrR +#' @importFrom SeuratObject as.sparse #' @rdname LeverageScore #' @export LeverageScore.default <- function( @@ -6211,7 +6222,7 @@ LeverageScore.default <- function( sa <- S %*% object qr.sa <- base::qr(x = sa) R <- if (inherits(x = qr.sa, what = "sparseQR")) { - Matrix::qrR(qr = qr.sa) + qrR(qr = qr.sa) } else { base::qr.R(qr = qr.sa) } @@ -6219,7 +6230,7 @@ LeverageScore.default <- function( if (isTRUE(x = verbose)) { message("Random projection") } - JL <- SeuratObject::as.sparse(x = JLEmbed( + JL <- as.sparse(x = JLEmbed( nrow = ncol(x = R.inv), ncol = ndims, eps = eps, From 298b6429957db0fc905fd4b56d414ebfd9d060ad Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 15 Dec 2021 10:19:00 -0500 Subject: [PATCH 034/979] update leverage score inverse --- R/integration.R | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/R/integration.R b/R/integration.R index 035ab7fd3..5fc30c1bb 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6220,13 +6220,20 @@ LeverageScore.default <- function( } # row of object is cell, col of matrix is feature sa <- S %*% object + + qr.sa <- base::qr(x = sa) R <- if (inherits(x = qr.sa, what = "sparseQR")) { qrR(qr = qr.sa) } else { base::qr.R(qr = qr.sa) } - R.inv <- solve(a = R) + + # triangular matrix inverse + + R.inv <- as.sparse(backsolve(r = R , x = diag(ncol(R)))) + + if (isTRUE(x = verbose)) { message("Random projection") } From 4e6fe21e5951ffe015bec604894ddcd5a321bebf Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sat, 18 Dec 2021 08:40:25 -0500 Subject: [PATCH 035/979] over write leverage score --- R/integration.R | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/R/integration.R b/R/integration.R index 5fc30c1bb..650d580dc 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6282,7 +6282,7 @@ return(score) } #' ssssxxxxx -#' +#' @param over.write #' @rdname LeverageScore #' @export #' @method LeverageScore Seurat @@ -6297,6 +6297,7 @@ LeverageScore.Seurat <- function(object, slot = "data", eps = 0.5, seed = 123, + over.write = FALSE, verbose = TRUE, ... ) { @@ -6307,7 +6308,9 @@ LeverageScore.Seurat <- function(object, if (is.null(features)) { stop("No variable features are set. Please run FindVariableFeatures.") } - var.name <- CheckMetaVarName(object = object, var.name = var.name) + if (!over.write) { + var.name <- CheckMetaVarName(object = object, var.name = var.name) + } object[[var.name]] <- LeverageScore( object = GetAssay(object = object, assay = assay), features = features, @@ -6347,6 +6350,7 @@ CheckMetaVarName <- function(object, var.name) { #' @param assay Assay used to calculate leverage score #' @param features Features used to calculate leverage score #' @param var.name Variable name stored leverage score in the meta.data +#' @param over.write If over write the variable with leverage score #' @param seed Set a random seed.By default, sets the seed to 123 #' @param ... Arguments passed to LeverageScore #' @@ -6359,14 +6363,18 @@ LeverageScoreSampling <- function( assay = NULL, features = NULL, var.name = "leverage.score", + over.write = FALSE, seed = 123, ...) { - var.name <- CheckMetaVarName(object = object, var.name = var.name) + if (!over.write) { + var.name <- CheckMetaVarName(object = object, var.name = var.name) + } object <- LeverageScore( object = object, assay = assay, features = features, var.name = var.name, + over.write = over.write, seed = seed, ... ) From ab4a0eb637ca332608fc9f04af306d88fafc8fd7 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sat, 18 Dec 2021 09:16:42 -0500 Subject: [PATCH 036/979] add hnsw to NNHelper --- NAMESPACE | 2 ++ R/clustering.R | 4 ++++ R/integration.R | 42 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 9818ff0b9..5d7b41653 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -406,6 +406,8 @@ importFrom(RcppAnnoy,AnnoyAngular) importFrom(RcppAnnoy,AnnoyEuclidean) importFrom(RcppAnnoy,AnnoyHamming) importFrom(RcppAnnoy,AnnoyManhattan) +importFrom(RcppHNSW,hnsw_build) +importFrom(RcppHNSW,hnsw_search) importFrom(Rtsne,Rtsne) importFrom(SeuratObject,"DefaultAssay<-") importFrom(SeuratObject,"Idents<-") diff --git a/R/clustering.R b/R/clustering.R index de0cc96c5..d608146be 100644 --- a/R/clustering.R +++ b/R/clustering.R @@ -1610,6 +1610,10 @@ NNHelper <- function(data, query = data, k, method, cache.index = FALSE, ...) { args <- args[intersect(x = names(x = args), y = names(x = formals(fun = AnnoyNN)))] do.call(what = 'AnnoyNN', args = args) }, + "hnsw" = { + args <- args[intersect(x = names(x = args), y = names(x = formals(fun = HnswNN)))] + do.call(what = 'HnswNN', args = args) + }, stop("Invalid method. Please choose one of 'rann', 'annoy'") ) ) diff --git a/R/integration.R b/R/integration.R index 9feca76c0..f5a174cb9 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6390,4 +6390,46 @@ LeverageScoreSampling <- function( return(object.sampled) } + +# Run annoy +# +# @param data Data to build the index with +# @param query A set of data to be queried against data +# @param metric Distance metric; can be one of "euclidean", "cosine", "manhattan", +# "hamming" +# @param k Number of neighbors +# @param ef_construction A larger value means a better quality index, but increases build time. +# @param ef Higher values lead to improved recall at the expense of longer search time. +# @param n_threads Maximum number of threads to use. +# @param index optional index object, will be recomputed if not provided +#' @importFrom RcppHNSW hnsw_build hnsw_search +# +HnswNN <- function(data, + query = data, + metric = "euclidean", + k, + ef_construction = 200, + ef = 10, + index = NULL, + n_threads = 0 +) { + idx <- index %||% hnsw_build( + X = data, + distance = metric, + ef = ef_construction, + n_threads = n_threads + ) + nn <- hnsw_search( + X = query, + ann = idx, + k = k, + ef = ef, + n_threads = n_threads + ) + names(nn) <- c("nn.idx", "nn.dists") + nn$idx <- idx + nn$alg.info <- list(metric = metric, ndim = ncol(x = data)) + return(nn) +} + \ No newline at end of file From 72acf60ce22957f9f10764474ada073ca82fe3d2 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sat, 18 Dec 2021 18:00:30 -0500 Subject: [PATCH 037/979] add sketch integration --- R/integration.R | 188 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 187 insertions(+), 1 deletion(-) diff --git a/R/integration.R b/R/integration.R index f5a174cb9..f4a705e91 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6284,7 +6284,6 @@ return(score) } #' ssssxxxxx -#' @param over.write #' @rdname LeverageScore #' @export #' @method LeverageScore Seurat @@ -6432,4 +6431,191 @@ HnswNN <- function(data, return(nn) } + + +IntegrationReferenceIndex <- function(object) { + + if (is.null( object@tools$Integration@sample.tree )) { + reference.index <- object@commands$FindIntegrationAnchors$reference + if ( length(reference.index) > 1) { + stop('the number of the reference is bigger than 1') + } + } else { + reference.index <- SampleIntegrationOrder(tree = object@tools$Integration@sample.tree)[1] + } + return(reference.index) + +} + + +IntegrateSketchEmbeddings <- function(object.list, + sketch.list, + sketch.inte, + features = NULL, + assay = 'RNA', + sketch.reduction = 'pca', + sketch.reduction.inte = 'integrated_dr', + reduction.name ='pca.correct', + reduction.key = 'PCcorrect_', + X.method = c('embeddings', 'data' , 'sketch')[3], + sketch.ratio = 0.8, + num.core = 1, + merged.object = NULL, + reference.exist = TRUE, + return.seurat = TRUE, + verbose = TRUE) { + if (reference.exist) { + reference.index <- IntegrationReferenceIndex(object = sketch.inte) + } else { + reference.index <- NULL + } + features <- features %||% rownames(x = sketch.inte[[sketch.reduction]]@feature.loadings) + query.index <- setdiff(x = 1:length(object.list), y = reference.index) + features <- Reduce(f = intersect, + x = c(list(features), + lapply(object.list, function(x) rownames(x)) ) + ) + + if (verbose) { + message("Center and scale based on sketch cells") + } + # mean and sd + if (X.method == 'embeddings') { + scale.set <- 1:length(object.list) + } else { + scale.set <- reference.index + } + mean_sd.i <- SparseMeanSd(object = sketch.inte) + emb.list <- list() + for (i in scale.set) { + DefaultAssay(sketch.list[[i]]) <- DefaultAssay(object.list[[i]]) <- assay + emb.list[[i]] <- ProjectDataEmbeddings(object = object.list[[i]], + assay = assay, + feature.loadings = sketch.inte[[sketch.reduction]]@feature.loadings, + ref.mean = mean_sd.i$mean, + ref.sd = mean_sd.i$sd ) + } + if (verbose) { + message("Correcting embeddings") + } + + SketchIndex <- function(q) { + q.cells <- Cells(sketch.list[[q]]) + if ( X.method == 'embeddings') { + sketch.transform <- MASS::ginv(inte.sub[[sketch.reduction]]@cell.embeddings[q.cells ,]) %*% + inte.sub[[sketch.reduction.inte]]@cell.embeddings[q.cells ,] + emb <- emb.list[[q]] %*% sketch.transform + + } else if (X.method == 'data') { + exp.mat <- t(as.matrix(GetAssayData(inte.sub[[assay]], slot = 'data')[features,q.cells])) + sketch.transform <- MASS::ginv(exp.mat ) %*% + inte.sub[[sketch.reduction.inte]]@cell.embeddings[q.cells ,] + emb <- as.matrix(x = t(GetAssayData(object = object.list[[q]], slot = 'data')[features,]) %*% sketch.transform) + + } else if ( X.method == 'sketch') { + R <- t(CountSketch(nrow = round(sketch.ratio * length(features)), ncol = length(features))) + exp.mat <- as.matrix(t(GetAssayData(inte.sub[[assay]], slot = 'data')[features,q.cells]) %*% R) + sketch.transform <- MASS::ginv(X = exp.mat) %*% + inte.sub[[sketch.reduction.inte]]@cell.embeddings[q.cells ,] + emb <- as.matrix((t(GetAssayData(object = object.list[[q]], slot = 'data')[features,]) %*% R) %*% sketch.transform) + } + return(emb) + } + emb.list.query <- parallel::mclapply( X = query.index, + FUN = SketchIndex, + mc.cores = num.core + ) + + emb.m <- Reduce(f = rbind, x = c( emb.list[reference.index], emb.list.query)) + correct.dr <- CreateDimReducObject(embeddings = emb.m, + loadings = sketch.inte[[sketch.reduction]]@feature.loadings[features,], + key = reduction.key, + assay = assay) + + if (!return.seurat) { + return(correct.dr) + } else { + if (is.null(merged.object)) { + if (verbose) { + message("Merging all objects") + } + merged.object <- merge(x = object.list[[1]], y = object.list[2:length(object.list)]) + } + merged.object[[reduction.name]] <- correct.dr + return(merged.object) + } +} + + + +ProjectDataEmbeddings <- function(object, + assay = 'RNA', + feature.loadings, + ref.mean, + ref.sd, + block.size = 5000, + scale.max = 10, + verbose = TRUE ){ + features <- Reduce(f = intersect, + x = list(names(ref.mean), + rownames(object[[assay]]), + rownames(feature.loadings) + ) + ) + if (verbose) { + message( paste0(length(features)," features are used")) + } + + mat <- GetAssayData(object = object[[assay]], slot = 'data')[features,] + + ref.mean <-ref.mean[features] + ref.sd <- ref.sd[features] + + + block.size = min(block.size, ncol(object)) + + cell.index <- rep(x = 1:ceiling(ncol(mat)/block.size), + each = block.size )[1:ncol(mat)] + cells.list <- split(x = 1:ncol(mat), f = cell.index) + + if (verbose) { + message("ScaleData and Project to feature loadings") + } + + + emb.list <- lapply(X = cells.list, + FUN = function(x) { + mat.x <- as.matrix(mat[,x]) + mat.x <- ( mat.x- ref.mean)/ref.sd + mat.x[mat.x > 10] <- 10 + cell.emb.x <- t(mat.x[features,]) %*% feature.loadings[features,] + return (cell.emb.x) + }) + + all.emb <- Reduce(rbind, emb.list) + return(all.emb) +} + + +SparseMeanSd <- function(object, + assay = NULL, + slot = 'data', + eps = 1e-8 +){ + assay <- assay %||% DefaultAssay(object = object) + mat <- GetAssayData(object = object[[assay]], slot = slot) + if (class(mat)[1] !='dgCMatrix'){ + stop('Matrix is not sparse') + } + + mat.mean <- sparseMatrixStats::rowMeans2(x = mat, na.rm = TRUE ) + mat.sd <- sparseMatrixStats::rowSds( x = mat, center = mat.mean) + names(mat.mean) <- names(mat.sd) <- rownames( mat) + mat.sd <- MinMax( data = mat.sd, min = eps, max = max(mat.sd)) + output <- list(mean = mat.mean, sd = mat.sd) + return(output) +} + + + \ No newline at end of file From 4c9975ad1bdd8ed4ff85d6811c0c842b4a3745b5 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sat, 18 Dec 2021 20:38:17 -0500 Subject: [PATCH 038/979] reformat integrate sketch --- R/integration.R | 193 +++++++++++++++++++++++++++++------------------- 1 file changed, 118 insertions(+), 75 deletions(-) diff --git a/R/integration.R b/R/integration.R index f4a705e91..f24446181 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6390,7 +6390,7 @@ LeverageScoreSampling <- function( } -# Run annoy +# Run hnsw # # @param data Data to build the index with # @param query A set of data to be queried against data @@ -6434,23 +6434,27 @@ HnswNN <- function(data, IntegrationReferenceIndex <- function(object) { - - if (is.null( object@tools$Integration@sample.tree )) { + if (is.null(object@tools$Integration@sample.tree)) { reference.index <- object@commands$FindIntegrationAnchors$reference - if ( length(reference.index) > 1) { + if (length(x = reference.index) > 1) { stop('the number of the reference is bigger than 1') } } else { reference.index <- SampleIntegrationOrder(tree = object@tools$Integration@sample.tree)[1] } return(reference.index) - } + +#' +#' @importFrom MASS ginv +#' @importFrom Matrix t +#' @export + IntegrateSketchEmbeddings <- function(object.list, sketch.list, - sketch.inte, + sketch.object, features = NULL, assay = 'RNA', sketch.reduction = 'pca', @@ -6459,23 +6463,21 @@ IntegrateSketchEmbeddings <- function(object.list, reduction.key = 'PCcorrect_', X.method = c('embeddings', 'data' , 'sketch')[3], sketch.ratio = 0.8, - num.core = 1, merged.object = NULL, - reference.exist = TRUE, - return.seurat = TRUE, + reference.index = NULL, verbose = TRUE) { - if (reference.exist) { - reference.index <- IntegrationReferenceIndex(object = sketch.inte) - } else { - reference.index <- NULL - } - features <- features %||% rownames(x = sketch.inte[[sketch.reduction]]@feature.loadings) + reference.index <- reference.index %||% IntegrationReferenceIndex(object = sketch.object) + features <- features %||% rownames(x = Loadings(sketch.object[[sketch.reduction]])) query.index <- setdiff(x = 1:length(object.list), y = reference.index) features <- Reduce(f = intersect, x = c(list(features), - lapply(object.list, function(x) rownames(x)) ) + lapply(X = object.list, function(x) rownames(x))) + ) + my.lapply <- ifelse( + test = verbose && nbrOfWorkers() == 1, + yes = pblapply, + no = future_lapply ) - if (verbose) { message("Center and scale based on sketch cells") } @@ -6485,69 +6487,111 @@ IntegrateSketchEmbeddings <- function(object.list, } else { scale.set <- reference.index } - mean_sd.i <- SparseMeanSd(object = sketch.inte) - emb.list <- list() - for (i in scale.set) { - DefaultAssay(sketch.list[[i]]) <- DefaultAssay(object.list[[i]]) <- assay - emb.list[[i]] <- ProjectDataEmbeddings(object = object.list[[i]], - assay = assay, - feature.loadings = sketch.inte[[sketch.reduction]]@feature.loadings, - ref.mean = mean_sd.i$mean, - ref.sd = mean_sd.i$sd ) - } + mean_sd.i <- SparseMeanSd(object = sketch.object) + emb.list <- my.lapply( + X = scale.set, + FUN = function(i) { + DefaultAssay(sketch.list[[i]]) <- DefaultAssay(object.list[[i]]) <- assay + emb.i <- ProjectDataEmbeddings( + object = object.list[[i]], + assay = assay, + feature.loadings = Loadings(sketch.object[[sketch.reduction]]), + ref.mean = mean_sd.i$mean, + ref.sd = mean_sd.i$sd + ) + return(emb.i) + } + ) if (verbose) { message("Correcting embeddings") } - - SketchIndex <- function(q) { - q.cells <- Cells(sketch.list[[q]]) - if ( X.method == 'embeddings') { - sketch.transform <- MASS::ginv(inte.sub[[sketch.reduction]]@cell.embeddings[q.cells ,]) %*% - inte.sub[[sketch.reduction.inte]]@cell.embeddings[q.cells ,] - emb <- emb.list[[q]] %*% sketch.transform - - } else if (X.method == 'data') { - exp.mat <- t(as.matrix(GetAssayData(inte.sub[[assay]], slot = 'data')[features,q.cells])) - sketch.transform <- MASS::ginv(exp.mat ) %*% - inte.sub[[sketch.reduction.inte]]@cell.embeddings[q.cells ,] - emb <- as.matrix(x = t(GetAssayData(object = object.list[[q]], slot = 'data')[features,]) %*% sketch.transform) - - } else if ( X.method == 'sketch') { - R <- t(CountSketch(nrow = round(sketch.ratio * length(features)), ncol = length(features))) - exp.mat <- as.matrix(t(GetAssayData(inte.sub[[assay]], slot = 'data')[features,q.cells]) %*% R) - sketch.transform <- MASS::ginv(X = exp.mat) %*% - inte.sub[[sketch.reduction.inte]]@cell.embeddings[q.cells ,] - emb <- as.matrix((t(GetAssayData(object = object.list[[q]], slot = 'data')[features,]) %*% R) %*% sketch.transform) + + + + emb.list.query <- my.lapply( + X = query.index, + FUN = + function(q) { + q.cells <- Cells(x = sketch.list[[q]]) + emb <- switch( + EXPR = X.method, + 'embeddings'= { + sketch.transform <- ginv( + X = Embeddings(object = inte.sub[[sketch.reduction]])[q.cells ,]) %*% + Embeddings(object = inte.sub[[sketch.reduction.inte]])[q.cells ,] + emb <- emb.list[[q]] %*% sketch.transform + emb + }, + 'data' = { + exp.mat <- t( + x = as.matrix( + x = GetAssayData( + inte.sub[[assay]], + slot = 'data' + )[features,q.cells] + ) + ) + sketch.transform <- ginv(X = exp.mat) %*% + Embeddings(object = inte.sub[[sketch.reduction.inte]])[q.cells ,] + emb <- as.matrix( + x = t( + x = GetAssayData( + object = object.list[[q]], + slot = 'data')[features,] + ) %*% + sketch.transform + ) + emb + }, + 'sketch' = { + R <- t( + x = CountSketch( + nrow = round(sketch.ratio * length(x = features)), ncol = length(x = features) + ) + ) + exp.mat <- as.matrix( + x = t( + x = GetAssayData( + inte.sub[[assay]], + slot = 'data')[features,q.cells] + ) %*% + R + ) + sketch.transform <- ginv(X = exp.mat) %*% + Embeddings(object = inte.sub[[sketch.reduction.inte]])[q.cells ,] + emb <- as.matrix( + x = ( + t( + x = GetAssayData( + object = object.list[[q]], + slot = 'data')[features,] + ) %*% + R) %*% + sketch.transform + ) + emb + } + ) + return(emb) } - return(emb) - } - emb.list.query <- parallel::mclapply( X = query.index, - FUN = SketchIndex, - mc.cores = num.core - ) - - emb.m <- Reduce(f = rbind, x = c( emb.list[reference.index], emb.list.query)) - correct.dr <- CreateDimReducObject(embeddings = emb.m, - loadings = sketch.inte[[sketch.reduction]]@feature.loadings[features,], - key = reduction.key, - assay = assay) - - if (!return.seurat) { - return(correct.dr) - } else { - if (is.null(merged.object)) { - if (verbose) { - message("Merging all objects") - } - merged.object <- merge(x = object.list[[1]], y = object.list[2:length(object.list)]) + ) + emb.m <- Reduce(f = rbind, x = c(emb.list[reference.index], emb.list.query)) + correct.dr <- CreateDimReducObject( + embeddings = emb.m, + loadings = Loadings(sketch.object[[sketch.reduction]])[features,], + key = reduction.key, + assay = assay + ) + if (is.null(x = merged.object)) { + if (verbose) { + message("Merging all objects") } - merged.object[[reduction.name]] <- correct.dr - return(merged.object) + merged.object <- merge(x = object.list[[1]], y = object.list[2:length(object.list)]) } + merged.object[[reduction.name]] <- correct.dr + return(merged.object) } - - ProjectDataEmbeddings <- function(object, assay = 'RNA', feature.loadings, @@ -6607,9 +6651,8 @@ SparseMeanSd <- function(object, if (class(mat)[1] !='dgCMatrix'){ stop('Matrix is not sparse') } - - mat.mean <- sparseMatrixStats::rowMeans2(x = mat, na.rm = TRUE ) - mat.sd <- sparseMatrixStats::rowSds( x = mat, center = mat.mean) + mat.mean <- RowMeanSparse(mat) + mat.sd <- sqrt(RowVarSparse(mat)) names(mat.mean) <- names(mat.sd) <- rownames( mat) mat.sd <- MinMax( data = mat.sd, min = eps, max = max(mat.sd)) output <- list(mean = mat.mean, sd = mat.sd) From a6808ca520a80343d5da5ff7a5dd0aaa36969901 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sat, 18 Dec 2021 20:43:15 -0500 Subject: [PATCH 039/979] export sketch inte --- NAMESPACE | 1 + 1 file changed, 1 insertion(+) diff --git a/NAMESPACE b/NAMESPACE index 5d7b41653..616385100 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -218,6 +218,7 @@ export(Index) export(Indices) export(IntegrateData) export(IntegrateEmbeddings) +export(IntegrateSketchEmbeddings) export(Intensity) export(IsGlobal) export(JS) From eef571422634b316159cfe5a38956eb71551b8f6 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sat, 18 Dec 2021 22:38:43 -0500 Subject: [PATCH 040/979] center not my.lapply --- R/integration.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/integration.R b/R/integration.R index f24446181..aef868152 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6488,7 +6488,7 @@ IntegrateSketchEmbeddings <- function(object.list, scale.set <- reference.index } mean_sd.i <- SparseMeanSd(object = sketch.object) - emb.list <- my.lapply( + emb.list <- lapply( X = scale.set, FUN = function(i) { DefaultAssay(sketch.list[[i]]) <- DefaultAssay(object.list[[i]]) <- assay From ae69e2ce249de820de6c7b2e179a32810b3427b6 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sat, 18 Dec 2021 23:23:57 -0500 Subject: [PATCH 041/979] sparse data PCA --- R/integration.R | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/R/integration.R b/R/integration.R index aef868152..d23340083 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6661,4 +6661,52 @@ SparseMeanSd <- function(object, +# Run PCA on sparse matrix +# +#' +#' @export +RunPCA.Sparse <- function( + object, + features = NULL, + reduction.key = "PCsp_", + reduction.name = "pca.sparse", + npcs = 50, + do.scale = TRUE, + verbose = TRUE +) { + features <- features %||% VariableFeatures(object) + data <- GetAssayData(object = object, slot = "data")[features,] + n <- npcs + args <- list(A = Matrix::t(data), nv = n) + args$center <- Seurat:::RowMeanSparse(data) + feature.var <- Seurat:::RowVarSparse(data) + args$totalvar <- sum(feature.var) + if (do.scale) { + args$scale <- sqrt(feature.var) + args$scale <- Seurat:::MinMax(args$scale, min = 1e-8, max = max(args$scale)) + } else { + args$scale <- FALSE + } + if (verbose) { + message("Running PCA") + } + pca.irlba <- rlang::exec(.fn = irlba::irlba, !!!args) + sdev <- pca.irlba$d/sqrt(max(1, ncol(data) - 1)) + feture.loadings <- pca.irlba$v + rownames(feture.loadings) <- rownames(data) + embeddings <- sweep(x = pca.irlba$u, MARGIN = 2, STATS = pca.irlba$d, FUN = "*") + rownames(embeddings) <- colnames(data) + colnames(feture.loadings) <- colnames(embeddings) <- paste0(reduction.key, 1:npcs) + object[[reduction.name]] <- CreateDimReducObject( + embeddings = embeddings, + loadings = feture.loadings, + stdev = sdev, + key = reduction.key, + assay = DefaultAssay(object), + misc = list(d = pca.irlba$d) + ) + return(object) +} + + \ No newline at end of file From 84f0bbc40e1e9ebc6d79e969f9a0c4d044e1ecb1 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sat, 18 Dec 2021 23:51:30 -0500 Subject: [PATCH 042/979] export RunPCA_Sparse --- NAMESPACE | 2 ++ R/integration.R | 17 ++++++++++------- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 616385100..839021289 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -299,6 +299,7 @@ export(RunMarkVario) export(RunMixscape) export(RunMoransI) export(RunPCA) +export(RunPCA_Sparse) export(RunSLSI) export(RunSPCA) export(RunTSNE) @@ -652,6 +653,7 @@ importFrom(reticulate,py_module_available) importFrom(reticulate,py_set_seed) importFrom(rlang,"!!") importFrom(rlang,as_label) +importFrom(rlang,exec) importFrom(rlang,invoke) importFrom(scales,brewer_pal) importFrom(scales,hue_pal) diff --git a/R/integration.R b/R/integration.R index d23340083..cca1b3d92 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6663,9 +6663,12 @@ SparseMeanSd <- function(object, # Run PCA on sparse matrix # -#' +#' @importFrom Matrix t +#' @importFrom rlang exec +#' @importFrom irlba irlba +#' #' @export -RunPCA.Sparse <- function( +RunPCA_Sparse <- function( object, features = NULL, reduction.key = "PCsp_", @@ -6677,20 +6680,20 @@ RunPCA.Sparse <- function( features <- features %||% VariableFeatures(object) data <- GetAssayData(object = object, slot = "data")[features,] n <- npcs - args <- list(A = Matrix::t(data), nv = n) - args$center <- Seurat:::RowMeanSparse(data) - feature.var <- Seurat:::RowVarSparse(data) + args <- list(A = t(data), nv = n) + args$center <- RowMeanSparse(data) + feature.var <- RowVarSparse(data) args$totalvar <- sum(feature.var) if (do.scale) { args$scale <- sqrt(feature.var) - args$scale <- Seurat:::MinMax(args$scale, min = 1e-8, max = max(args$scale)) + args$scale <- MinMax(args$scale, min = 1e-8, max = max(args$scale)) } else { args$scale <- FALSE } if (verbose) { message("Running PCA") } - pca.irlba <- rlang::exec(.fn = irlba::irlba, !!!args) + pca.irlba <- exec(.fn = irlba, !!!args) sdev <- pca.irlba$d/sqrt(max(1, ncol(data) - 1)) feture.loadings <- pca.irlba$v rownames(feture.loadings) <- rownames(data) From 55ea23f7e020516dd86e05b14ee890f72962352d Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sun, 19 Dec 2021 10:14:11 -0500 Subject: [PATCH 043/979] fix wrong reference.index --- R/integration.R | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/R/integration.R b/R/integration.R index aef868152..b63ef64e7 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6506,8 +6506,6 @@ IntegrateSketchEmbeddings <- function(object.list, message("Correcting embeddings") } - - emb.list.query <- my.lapply( X = query.index, FUN = @@ -6575,7 +6573,11 @@ IntegrateSketchEmbeddings <- function(object.list, return(emb) } ) - emb.m <- Reduce(f = rbind, x = c(emb.list[reference.index], emb.list.query)) + if (X.method == 'embeddings') { + emb.m <- Reduce(f = rbind, x = c(emb.list[reference.index], emb.list.query)) + } else { + emb.m <- Reduce(f = rbind, x = c(emb.list[1], emb.list.query)) + } correct.dr <- CreateDimReducObject( embeddings = emb.m, loadings = Loadings(sketch.object[[sketch.reduction]])[features,], From ca1906eaa8153d6d0d206a0683fef080e0f7c4eb Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sun, 19 Dec 2021 15:11:03 -0500 Subject: [PATCH 044/979] add sample tree --- R/integration.R | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/R/integration.R b/R/integration.R index 052912d3c..684d5eec4 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1679,6 +1679,17 @@ IntegrateEmbeddings.IntegrationAnchorSet <- function( slot = "anchors", new.data = anchors ) + sample.tree <- GetIntegrationData( + object = reference.integrated, + integration.name = "Integration", + slot = "sample.tree" + ) + unintegrated <- SetIntegrationData( + object = unintegrated, + integration.name = "Integration", + slot = "sample.tree", + new.data = sample.tree + ) unintegrated[["FindIntegrationAnchors"]] <- slot(object = anchorset, name = "command") suppressWarnings(unintegrated <- LogSeuratCommand(object = unintegrated)) return(unintegrated) From 75a80dd5ff7b8863ad787c9e7ffa7df113dd41d0 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sun, 19 Dec 2021 19:30:09 -0500 Subject: [PATCH 045/979] check sample tree --- R/integration.R | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/R/integration.R b/R/integration.R index 684d5eec4..ee22758ee 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1679,11 +1679,13 @@ IntegrateEmbeddings.IntegrationAnchorSet <- function( slot = "anchors", new.data = anchors ) - sample.tree <- GetIntegrationData( - object = reference.integrated, - integration.name = "Integration", - slot = "sample.tree" - ) + if (!is.null(x = Tool(object = reference.integrated, slot = "Integration"))) { + sample.tree <- GetIntegrationData( + object = reference.integrated, + integration.name = "Integration", + slot = "sample.tree" + ) + } unintegrated <- SetIntegrationData( object = unintegrated, integration.name = "Integration", From c1660c06fd1dca246248e689bf3e9e3eb56c71f4 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 20 Dec 2021 14:32:49 -0500 Subject: [PATCH 046/979] reformat sketch inte --- R/integration.R | 122 +++++++++++++++++++++++++++++++----------------- 1 file changed, 78 insertions(+), 44 deletions(-) diff --git a/R/integration.R b/R/integration.R index ee22758ee..62e345c36 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1626,6 +1626,7 @@ IntegrateEmbeddings.IntegrationAnchorSet <- function( reference.dr <- CreateDimReducObject( embeddings = as.matrix(x = t(GetAssayData(reference.integrated[[new.reduction.name.safe]]))), assay = intdr.assay, + loadings = Loadings(object = reductions), key = paste0(new.reduction.name.safe, "_") ) DefaultAssay(object = reference.integrated) <- int.assay @@ -1671,6 +1672,7 @@ IntegrateEmbeddings.IntegrationAnchorSet <- function( unintegrated[[new.reduction.name]] <- CreateDimReducObject( embeddings = as.matrix(x = t(x = integrated.data)), assay = intdr.assay, + loadings = Loadings(object = reductions), key = paste0(new.reduction.name.safe, "_") ) unintegrated <- SetIntegrationData( @@ -6235,8 +6237,6 @@ LeverageScore.default <- function( } # row of object is cell, col of matrix is feature sa <- S %*% object - - qr.sa <- base::qr(x = sa) R <- if (inherits(x = qr.sa, what = "sparseQR")) { qrR(qr = qr.sa) @@ -6458,9 +6458,34 @@ IntegrationReferenceIndex <- function(object) { return(reference.index) } - - -#' +#' Integrate embeddings from batch-corrected sketch cell embeddings +#' First construct a sketch-cell representation for all cells and +#' then use this and the batch-corrected embeddings of sketched cells to +#' construct the batch-corrected embeddings for all cells +#' @param object.list A list of Seurat objects with all cells +#' @param sketch.list A list of Seurat objects with sketched cells +#' @param sketch.object A sketched Seurat objects with integraetd embeddings +#' @param assay Assay name for raw expression +#' @param sketch.reduction Dimensional reduction name for batch-corrected embeddings +#' in the sketched object +#' @param reduction.name dimensional reduction name, pca.correct by default +#' @param reduction.key dimensional reduction key, specifies the string before +#' the number for the dimension names. PCcorrect_ by default +#' @param dictionary.method Methods to construct sketch-cell representation +#' for all cells. sketch by default. Can be one of: +#' \itemize{ +#' \item{sketch: Use random sketched data slot} +#' \item{data: Use data slot} +#' \item{embeddings: Use uncorrected dimensional reduction in the sketched object} +#' } +#' @param sketch.ratio Sketch ratio of data slot when dictionary.method is set to sketch +#' @param sketch.reduction.raw Uncorrected dimensional reduction name in the sketched object +#' when dictionary.method is set to embeddings +#' @param merged.object A merged seurat object containing all cells +#' @param reference.index Index for the integration reference +#' @param verbose Print progress and message +#' +#' #' @importFrom MASS ginv #' @importFrom Matrix t #' @export @@ -6470,18 +6495,18 @@ IntegrateSketchEmbeddings <- function(object.list, sketch.object, features = NULL, assay = 'RNA', - sketch.reduction = 'pca', - sketch.reduction.inte = 'integrated_dr', + sketch.reduction = 'integrated_dr', reduction.name ='pca.correct', reduction.key = 'PCcorrect_', - X.method = c('embeddings', 'data' , 'sketch')[3], + dictionary.method = c('sketch', 'data','embeddings')[1], sketch.ratio = 0.8, + sketch.reduction.raw = NULL, merged.object = NULL, reference.index = NULL, verbose = TRUE) { reference.index <- reference.index %||% IntegrationReferenceIndex(object = sketch.object) - features <- features %||% rownames(x = Loadings(sketch.object[[sketch.reduction]])) - query.index <- setdiff(x = 1:length(object.list), y = reference.index) + features <- rownames(x = Loadings(object = sketch.object[[sketch.reduction]])) + query.index <- setdiff(x = 1:length(x = object.list), y = reference.index) features <- Reduce(f = intersect, x = c(list(features), lapply(X = object.list, function(x) rownames(x))) @@ -6495,8 +6520,11 @@ IntegrateSketchEmbeddings <- function(object.list, message("Center and scale based on sketch cells") } # mean and sd - if (X.method == 'embeddings') { + if (dictionary.method == 'embeddings') { scale.set <- 1:length(object.list) + if (is.null(sketch.reduction.raw)) { + stop("When dictionary.method is embeddings, sketch.reduction.raw needs to be specified") + } } else { scale.set <- reference.index } @@ -6518,18 +6546,17 @@ IntegrateSketchEmbeddings <- function(object.list, if (verbose) { message("Correcting embeddings") } - emb.list.query <- my.lapply( X = query.index, FUN = function(q) { q.cells <- Cells(x = sketch.list[[q]]) emb <- switch( - EXPR = X.method, + EXPR = dictionary.method, 'embeddings'= { sketch.transform <- ginv( - X = Embeddings(object = inte.sub[[sketch.reduction]])[q.cells ,]) %*% - Embeddings(object = inte.sub[[sketch.reduction.inte]])[q.cells ,] + X = Embeddings(object = inte.sub[[sketch.reduction.raw]])[q.cells ,]) %*% + Embeddings(object = inte.sub[[sketch.reduction]])[q.cells ,] emb <- emb.list[[q]] %*% sketch.transform emb }, @@ -6543,7 +6570,7 @@ IntegrateSketchEmbeddings <- function(object.list, ) ) sketch.transform <- ginv(X = exp.mat) %*% - Embeddings(object = inte.sub[[sketch.reduction.inte]])[q.cells ,] + Embeddings(object = inte.sub[[sketch.reduction]])[q.cells ,] emb <- as.matrix( x = t( x = GetAssayData( @@ -6569,7 +6596,7 @@ IntegrateSketchEmbeddings <- function(object.list, R ) sketch.transform <- ginv(X = exp.mat) %*% - Embeddings(object = inte.sub[[sketch.reduction.inte]])[q.cells ,] + Embeddings(object = inte.sub[[sketch.reduction]])[q.cells ,] emb <- as.matrix( x = ( t( @@ -6586,13 +6613,13 @@ IntegrateSketchEmbeddings <- function(object.list, return(emb) } ) - if (X.method == 'embeddings') { + if (dictionary.method == 'embeddings') { emb.m <- Reduce(f = rbind, x = c(emb.list[reference.index], emb.list.query)) } else { emb.m <- Reduce(f = rbind, x = c(emb.list[1], emb.list.query)) } correct.dr <- CreateDimReducObject( - embeddings = emb.m, + embeddings = as.matrix(emb.m), loadings = Loadings(sketch.object[[sketch.reduction]])[features,], key = reduction.key, assay = assay @@ -6612,7 +6639,7 @@ ProjectDataEmbeddings <- function(object, feature.loadings, ref.mean, ref.sd, - block.size = 5000, + block.size = NULL, scale.max = 10, verbose = TRUE ){ features <- Reduce(f = intersect, @@ -6621,37 +6648,44 @@ ProjectDataEmbeddings <- function(object, rownames(feature.loadings) ) ) + feature.loadings <- feature.loadings[features,] if (verbose) { message( paste0(length(features)," features are used")) } - mat <- GetAssayData(object = object[[assay]], slot = 'data')[features,] - + if (!inherits(x = mat, what = "dgCMatrix")) { + mat <- as.sparse(mat) + } ref.mean <-ref.mean[features] ref.sd <- ref.sd[features] - - - block.size = min(block.size, ncol(object)) - - cell.index <- rep(x = 1:ceiling(ncol(mat)/block.size), - each = block.size )[1:ncol(mat)] - cells.list <- split(x = 1:ncol(mat), f = cell.index) - if (verbose) { message("ScaleData and Project to feature loadings") } - - - emb.list <- lapply(X = cells.list, - FUN = function(x) { - mat.x <- as.matrix(mat[,x]) - mat.x <- ( mat.x- ref.mean)/ref.sd - mat.x[mat.x > 10] <- 10 - cell.emb.x <- t(mat.x[features,]) %*% feature.loadings[features,] - return (cell.emb.x) - }) - - all.emb <- Reduce(rbind, emb.list) + my.lapply <- ifelse( + test = verbose && nbrOfWorkers() == 1, + yes = pblapply, + no = future_lapply + ) + if (!is.null(block.size)) { + block.size = min(block.size, ncol(object)) + cell.index <- rep(x = 1:ceiling(ncol(mat)/block.size), + each = block.size )[1:ncol(mat)] + cells.list <- split(x = 1:ncol(mat), f = cell.index) + emb.list <- my.lapply(X = cells.list, + FUN = function(x) { + mat.x <- as.matrix(mat[,x]) + mat.x <- (mat.x - ref.mean) / ref.sd + mat.x[mat.x > scale.max] <- scale.max + cell.emb.x <- t(mat.x ) %*% feature.loadings + return (cell.emb.x) + } + ) + all.emb <- Reduce(rbind, emb.list) + } else { + mat <- (mat - ref.mean) / ref.sd + mat[mat > scale.max] <- scale.max + all.emb <- t(mat) %*% feature.loadings + } return(all.emb) } @@ -6668,8 +6702,8 @@ SparseMeanSd <- function(object, } mat.mean <- RowMeanSparse(mat) mat.sd <- sqrt(RowVarSparse(mat)) - names(mat.mean) <- names(mat.sd) <- rownames( mat) - mat.sd <- MinMax( data = mat.sd, min = eps, max = max(mat.sd)) + names(mat.mean) <- names(mat.sd) <- rownames(mat) + mat.sd <- MinMax(data = mat.sd, min = eps, max = max(mat.sd)) output <- list(mean = mat.mean, sd = mat.sd) return(output) } From 4fc69be37e7f7f3b84bb9eccdc6e2d0671ee41f1 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sun, 26 Dec 2021 19:41:08 -0500 Subject: [PATCH 047/979] update SparseMeanSd --- R/integration.R | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/R/integration.R b/R/integration.R index 62e345c36..da4952b00 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6653,10 +6653,7 @@ ProjectDataEmbeddings <- function(object, message( paste0(length(features)," features are used")) } mat <- GetAssayData(object = object[[assay]], slot = 'data')[features,] - if (!inherits(x = mat, what = "dgCMatrix")) { - mat <- as.sparse(mat) - } - ref.mean <-ref.mean[features] + ref.mean <- ref.mean[features] ref.sd <- ref.sd[features] if (verbose) { message("ScaleData and Project to feature loadings") @@ -6682,6 +6679,9 @@ ProjectDataEmbeddings <- function(object, ) all.emb <- Reduce(rbind, emb.list) } else { + if (inherits(x = mat, what = "dgCMatrix")) { + mat <- as.matrix(mat) + } mat <- (mat - ref.mean) / ref.sd mat[mat > scale.max] <- scale.max all.emb <- t(mat) %*% feature.loadings @@ -6693,10 +6693,12 @@ ProjectDataEmbeddings <- function(object, SparseMeanSd <- function(object, assay = NULL, slot = 'data', + features = NULL, eps = 1e-8 ){ + features <- features %||% rownames(object[[assay]]) assay <- assay %||% DefaultAssay(object = object) - mat <- GetAssayData(object = object[[assay]], slot = slot) + mat <- GetAssayData(object = object[[assay]], slot = slot)[features,] if (class(mat)[1] !='dgCMatrix'){ stop('Matrix is not sparse') } From eb4669a6f6a16576d9633b8b1db000c5ad51db73 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 27 Dec 2021 15:02:03 -0500 Subject: [PATCH 048/979] update SparseMeanSd --- R/integration.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/integration.R b/R/integration.R index da4952b00..84c764191 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6696,6 +6696,7 @@ SparseMeanSd <- function(object, features = NULL, eps = 1e-8 ){ + assay <- assay%||% DefaultAssay(object) features <- features %||% rownames(object[[assay]]) assay <- assay %||% DefaultAssay(object = object) mat <- GetAssayData(object = object[[assay]], slot = slot)[features,] From 618f39755577fdbd9d457987be5bf46e87afa51a Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 31 Dec 2021 15:04:11 -0500 Subject: [PATCH 049/979] transfer time --- R/integration.R | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/R/integration.R b/R/integration.R index da4952b00..7f553275d 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5533,8 +5533,8 @@ ValidateParams_IntegrateEmbeddings_TransferAnchors <- function( NNtoGraph <- function(nn.object, ncol.nn = NULL, col.cells = NULL) { select_nn <- nn.object@nn.idx - ncol.nn <- ncol.nn %||% nrow(x = select_nn) col.cells <- col.cells %||% nn.object@cell.names + ncol.nn <- ncol.nn %||% length(col.cells) k.nn <- ncol(select_nn) j <- as.numeric(x = t(x = select_nn )) i <- ((1:length(x = j)) - 1) %/% k.nn + 1 @@ -5929,7 +5929,7 @@ FindBridgeAnchor <- function(object.list, #' @importFrom fastDummies dummy_cols #' @importFrom Matrix rowMeans #' -TranferLablesNN <- function( +TransferLablesNN <- function( nn.object, reference.object, group.by = NULL @@ -5969,6 +5969,37 @@ TranferLablesNN <- function( } +TransferExpressionNN<- function( + nn.object, + reference.object, + var.name = NULL +){ + + nn.matrix <- NNtoGraph(nn.object = nn.object, + col.cells = Cells(reference.object) + ) + reference.exp.matrix <- FetchData(object = reference.object, vars = var.name) + # remove NA + reference.exp.matrix <- reference.exp.matrix[complete.cases(reference.exp.matrix), ,drop= F] + nn.matrix <- nn.matrix[, rownames(reference.exp.matrix)] + + # remove NO neighbor query + nn.sum <- RowSumSparse(mat = nn.matrix) + nn.matrix <- nn.matrix[nn.sum > 2, ] + nn.sum <- nn.sum[nn.sum>2] + + # transfer data + reference.exp.matrix <- as.matrix(reference.exp.matrix) + query.exp.mat <- nn.matrix %*% reference.exp.matrix + query.exp.mat <- sweep(x = query.exp.mat, MARGIN = 1, STATS = nn.sum, FUN = "/") + + # set output for all query cells + query.exp.all <- data.frame(row.names = Cells(nn.object)) + query.exp.all[rownames(query.exp.mat),1] <- query.exp.mat[,1] + colnames(query.exp.all) <- var.name + return(query.exp.all) +} + #' @param reduction.name dimensional reduction name, lap by default #' @param graph The name of graph @@ -6763,4 +6794,5 @@ RunPCA_Sparse <- function( } + \ No newline at end of file From 86b747b21fc0352e3d1a3d03ca0eaee931c04263 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sun, 2 Jan 2022 15:49:49 -0500 Subject: [PATCH 050/979] smooth labels by cluster --- R/integration.R | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/R/integration.R b/R/integration.R index dcafdf23b..a5903f3a5 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6794,6 +6794,16 @@ RunPCA_Sparse <- function( return(object) } +SmoothLabels <- function(labels, clusters ) { + cluster.set <- unique(clusters) + smooth.labels <- labels + for (c in cluster.set) { + cell.c <- which(clusters == c) + smooth.labels[cell.c] <- names(sort(table(labels[cell.c]), decreasing = T)[1]) + } + return(smooth.labels) +} + \ No newline at end of file From 735ff6263eb1503afee7383596531a1697587709 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 17 Jan 2022 10:32:38 -0500 Subject: [PATCH 051/979] store weights mapquery --- DESCRIPTION | 2 +- R/integration.R | 7 +++++-- man/MapQuery.Rd | 3 +++ 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index ca4569e1f..94f8abaff 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -95,7 +95,7 @@ Collate: 'tree.R' 'utilities.R' 'zzz.R' -RoxygenNote: 7.1.1 +RoxygenNote: 7.1.2 Encoding: UTF-8 Suggests: ape, diff --git a/R/integration.R b/R/integration.R index dcafdf23b..661723953 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1910,6 +1910,7 @@ LocalStruct <- function( #' @inheritParams IntegrateEmbeddings #' @inheritParams TransferData #' @inheritParams ProjectUMAP +#' @param store.weights Determine if the weight matrix is stored. #' @param transferdata.args A named list of additional arguments to #' \code{\link{TransferData}} #' @param integrateembeddings.args A named list of additional arguments to @@ -1940,6 +1941,7 @@ MapQuery <- function( reference.reduction = NULL, reference.dims = NULL, query.dims = NULL, + store.weights = FALSE, reduction.model = NULL, transferdata.args = list(), integrateembeddings.args = list(), @@ -2040,8 +2042,9 @@ MapQuery <- function( integrateembeddings.args <- integrateembeddings.args[names(x = integrateembeddings.args) %in% names(x = formals(fun = IntegrateEmbeddings.TransferAnchorSet))] integrateembeddings.args$reductions <- integrateembeddings.args$reductions %||% anchor.reduction integrateembeddings.args$weight.reduction <- integrateembeddings.args$weight.reduction %||% anchor.reduction - - slot(object = query, name = "tools")$TransferData <- NULL + if (!store.weights) { + slot(object = query, name = "tools")$TransferData <- NULL + } reuse.weights.matrix <- FALSE if (!is.null(x = refdata)) { query <- invoke( diff --git a/man/MapQuery.Rd b/man/MapQuery.Rd index 7ca0a68ce..f1b9928ae 100644 --- a/man/MapQuery.Rd +++ b/man/MapQuery.Rd @@ -13,6 +13,7 @@ MapQuery( reference.reduction = NULL, reference.dims = NULL, query.dims = NULL, + store.weights = FALSE, reduction.model = NULL, transferdata.args = list(), integrateembeddings.args = list(), @@ -48,6 +49,8 @@ neighbor finding} \item{query.dims}{Dimensions (columns) to use from query} +\item{store.weights}{Determine if the weight matrix is stored.} + \item{reduction.model}{\code{DimReduc} object that contains the umap model} \item{transferdata.args}{A named list of additional arguments to From a26922ff6f6a362e61b5bb8472ca51664437431c Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 17 Jan 2022 11:05:35 -0500 Subject: [PATCH 052/979] fix store weights --- R/integration.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/integration.R b/R/integration.R index 4b6eac404..223ae9cc2 100644 --- a/R/integration.R +++ b/R/integration.R @@ -2042,9 +2042,7 @@ MapQuery <- function( integrateembeddings.args <- integrateembeddings.args[names(x = integrateembeddings.args) %in% names(x = formals(fun = IntegrateEmbeddings.TransferAnchorSet))] integrateembeddings.args$reductions <- integrateembeddings.args$reductions %||% anchor.reduction integrateembeddings.args$weight.reduction <- integrateembeddings.args$weight.reduction %||% anchor.reduction - if (!store.weights) { - slot(object = query, name = "tools")$TransferData <- NULL - } + slot(object = query, name = "tools")$TransferData <- NULL reuse.weights.matrix <- FALSE if (!is.null(x = refdata)) { query <- invoke( @@ -2078,7 +2076,9 @@ MapQuery <- function( ) ) } + if (!store.weights) { slot(object = query, name = "tools")$TransferData <- NULL + } if (!is.null(x = reduction.model)) { reference.dims <- reference.dims %||% slot(object = anchorset, name = "command")$dims query.dims <- query.dims %||% 1:ncol(x = query[[new.reduction.name]]) From 8c112bc40b0f8ddc9e5d29fb5d1e4954cfd08990 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 17 Jan 2022 12:27:05 -0500 Subject: [PATCH 053/979] new slot store weights --- R/integration.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/integration.R b/R/integration.R index 223ae9cc2..1b3f39ed2 100644 --- a/R/integration.R +++ b/R/integration.R @@ -2076,9 +2076,10 @@ MapQuery <- function( ) ) } - if (!store.weights) { - slot(object = query, name = "tools")$TransferData <- NULL + if (store.weights) { + slot(object = query, name = "tools")$MapQuery <- slot(object = query, name = "tools")$TransferData } + slot(object = query, name = "tools")$TransferData <- NULL if (!is.null(x = reduction.model)) { reference.dims <- reference.dims %||% slot(object = anchorset, name = "command")$dims query.dims <- query.dims %||% 1:ncol(x = query[[new.reduction.name]]) From dcaed72c01476fc193c410638546f223567a1dbe Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 26 Jan 2022 00:59:34 -0500 Subject: [PATCH 054/979] add bridge weights in TransferData --- R/integration.R | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/R/integration.R b/R/integration.R index 1b3f39ed2..5dfba10fc 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1910,7 +1910,7 @@ LocalStruct <- function( #' @inheritParams IntegrateEmbeddings #' @inheritParams TransferData #' @inheritParams ProjectUMAP -#' @param store.weights Determine if the weight matrix is stored. +#' @param store.weights Determine if the weight and anchor matrices are stored. #' @param transferdata.args A named list of additional arguments to #' \code{\link{TransferData}} #' @param integrateembeddings.args A named list of additional arguments to @@ -2061,6 +2061,7 @@ MapQuery <- function( transferdata.args$weight.reduction == integrateembeddings.args$weight.reduction) { reuse.weights.matrix <- TRUE } + } if (anchor.reduction != "cca"){ query <- invoke( @@ -2078,6 +2079,7 @@ MapQuery <- function( } if (store.weights) { slot(object = query, name = "tools")$MapQuery <- slot(object = query, name = "tools")$TransferData + slot(object = query, name = "tools")$MapQuery$anchor <- anchorset@anchors } slot(object = query, name = "tools")$TransferData <- NULL if (!is.null(x = reduction.model)) { @@ -3020,6 +3022,8 @@ TransferData <- function( integration.name = "integrated", slot = 'weights' ) + + anchors <- as.data.frame(x = anchors) query.cells <- unname(obj = sapply( X = query.cells, @@ -3047,6 +3051,26 @@ TransferData <- function( prediction.scores <- t(x = weights) %*% prediction.mat colnames(x = prediction.scores) <- possible.ids rownames(x = prediction.scores) <- query.cells + if ("bridge.sets" %in% names(anchorset@weight.reduction@misc)) { + bridge.weight <- anchorset@weight.reduction@misc$bridge.sets + bridge.prediction.matrix <- as.sparse( + x = dummy_cols( + refdata[[rd]][ bridge.weight$bridge.ref_anchor ] + )[, -1] + ) + colnames(bridge.prediction.matrix) <- gsub( + pattern = ".data_", + replacement = "", + x = colnames(bridge.prediction.matrix) + ) + bridge.prediction.matrix <- bridge.prediction.matrix[ , possible.ids, drop = FALSE] + + bridge.prediction.scores <- t(bridge.weight$query.weights) %*% + (t(bridge.weight$bridge.weights) %*% + bridge.prediction.matrix)[bridge.weight$query.ref_anchor,] + prediction.scores <- (prediction.scores + bridge.prediction.scores)/2 + prediction.scores <- as.matrix(prediction.scores) + } prediction.ids <- possible.ids[apply(X = prediction.scores, MARGIN = 1, FUN = which.max)] prediction.ids <- as.character(prediction.ids) prediction.max <- apply(X = prediction.scores, MARGIN = 1, FUN = max) @@ -5915,6 +5939,16 @@ FindBridgeAnchor <- function(object.list, } ) } + + if (anchor.type == "Transfer") { + + slot(object = anchor, name = "weight.reduction")@misc$bridge.sets <- list( + bridge.weights = bridge.object@tools$MapQuery$weights.matrix, + bridge.ref_anchor = bridge.object@tools$MapQuery$anchor[,1], + query.weights = object.list[[query]]@tools$MapQuery$weights.matrix, + query.ref_anchor = object.list[[query]]@tools$MapQuery$anchor[,1] + ) + } slot(object = anchor, name = "command") <- LogSeuratCommand( object = object.list[[1]], return.command = TRUE From e6b119f65f88bcd5937f3c838d14a92373d050a8 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 26 Jan 2022 09:51:11 -0500 Subject: [PATCH 055/979] add matrix t --- R/integration.R | 2 +- man/MapQuery.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/integration.R b/R/integration.R index 5dfba10fc..c19fda3b8 100644 --- a/R/integration.R +++ b/R/integration.R @@ -2861,7 +2861,7 @@ SelectIntegrationFeatures <- function( #' #' @references Stuart T, Butler A, et al. Comprehensive Integration of #' Single-Cell Data. Cell. 2019;177:1888-1902 \doi{10.1016/j.cell.2019.05.031} -#' +#' @importFrom Matrix t #' @export #' @concept integration #' @examples diff --git a/man/MapQuery.Rd b/man/MapQuery.Rd index f1b9928ae..1b9ee1441 100644 --- a/man/MapQuery.Rd +++ b/man/MapQuery.Rd @@ -49,7 +49,7 @@ neighbor finding} \item{query.dims}{Dimensions (columns) to use from query} -\item{store.weights}{Determine if the weight matrix is stored.} +\item{store.weights}{Determine if the weight and anchor matrices are stored.} \item{reduction.model}{\code{DimReduc} object that contains the umap model} From b48c75d28de584c0c6f98dff0c3020b3f6709b42 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 26 Jan 2022 10:38:06 -0500 Subject: [PATCH 056/979] only return weights transferdata --- R/integration.R | 169 +++++++++++++++++++++++++------------------- man/TransferData.Rd | 3 + 2 files changed, 98 insertions(+), 74 deletions(-) diff --git a/R/integration.R b/R/integration.R index c19fda3b8..38d4575c0 100644 --- a/R/integration.R +++ b/R/integration.R @@ -2044,7 +2044,7 @@ MapQuery <- function( integrateembeddings.args$weight.reduction <- integrateembeddings.args$weight.reduction %||% anchor.reduction slot(object = query, name = "tools")$TransferData <- NULL reuse.weights.matrix <- FALSE - if (!is.null(x = refdata)) { + query <- invoke( .fn = TransferData, .args = c(list( @@ -2053,6 +2053,7 @@ MapQuery <- function( query = query, refdata = refdata, store.weights = TRUE, + only.weights = is.null(x = refdata), verbose = verbose ), transferdata.args ) @@ -2061,8 +2062,7 @@ MapQuery <- function( transferdata.args$weight.reduction == integrateembeddings.args$weight.reduction) { reuse.weights.matrix <- TRUE } - - } + if (anchor.reduction != "cca"){ query <- invoke( .fn = IntegrateEmbeddings, @@ -2842,6 +2842,7 @@ SelectIntegrationFeatures <- function( #' or "counts" #' @param prediction.assay Return an \code{Assay} object with the prediction #' scores for each class stored in the \code{data} slot. +#' @param only.weights Only return weights matrix #' @param store.weights Optionally store the weights matrix used for predictions #' in the returned query object. #' @@ -2906,6 +2907,7 @@ TransferData <- function( verbose = TRUE, slot = "data", prediction.assay = FALSE, + only.weights = FALSE, store.weights = TRUE ) { combined.ob <- slot(object = anchorset, name = "object.list")[[1]] @@ -2930,6 +2932,7 @@ TransferData <- function( eps = eps, n.trees = n.trees, verbose = verbose, + only.weights = only.weights, slot = slot, prediction.assay = prediction.assay, label.transfer = label.transfer @@ -3022,8 +3025,14 @@ TransferData <- function( integration.name = "integrated", slot = 'weights' ) - - + if (only.weights) { + if (is.null(x = query)) { + return(weights) + } else { + slot(object = query, name = "tools")[["TransferData"]] <- list(weights.matrix = weights) + return(query) + } + } anchors <- as.data.frame(x = anchors) query.cells <- unname(obj = sapply( X = query.cells, @@ -5213,89 +5222,101 @@ ValidateParams_TransferData <- function( n.trees, verbose, slot, + only.weights, prediction.assay, label.transfer ) { - if (!inherits(x = refdata, what = "list")) { - refdata <- list(id = refdata) - } - for (i in 1:length(x = refdata)) { - if (inherits(x = refdata[[i]], what = c("character", "factor"))) { - # check is it's in the reference object - if (length(x = refdata[[i]]) == 1) { - if (is.null(x = reference)) { - warning("If providing a single string to refdata element number ", i, - ", please provide the reference object. Skipping element ", i, - ".", call. = FALSE, immediate. = TRUE) + ## check refdata + if (is.null(refdata)) { + if (!only.weights) { + stop("refdata is NULL and only.weights is FALSE") + } + } else { + if (!inherits(x = refdata, what = "list")) { + refdata <- list(id = refdata) + } + for (i in 1:length(x = refdata)) { + if (inherits(x = refdata[[i]], what = c("character", "factor"))) { + # check is it's in the reference object + if (length(x = refdata[[i]]) == 1) { + if (is.null(x = reference)) { + warning("If providing a single string to refdata element number ", i, + ", please provide the reference object. Skipping element ", i, + ".", call. = FALSE, immediate. = TRUE) + refdata[[i]] <- FALSE + next + } + if (refdata[[i]] %in% Assays(object = reference)) { + refdata[[i]] <- GetAssayData(object = reference, assay = refdata[[i]]) + colnames(x = refdata[[i]]) <- paste0(colnames(x = refdata[[i]]), "_reference") + label.transfer[[i]] <- FALSE + next + } else if (refdata[[i]] %in% colnames(x = reference[[]])) { + refdata[[i]] <- reference[[refdata[[i]]]][, 1] + } else { + warning("Element number ", i, " provided to refdata does not exist in ", + "the provided reference object.", call. = FALSE, immediate. = TRUE) + refdata[[i]] <- FALSE + next + } + } else if (length(x = refdata[[i]]) != length(x = reference.cells)) { + warning("Please provide a vector that is the same length as the number ", + "of reference cells used in anchor finding.\n", + "Length of vector provided: ", length(x = refdata[[i]]), "\n", + "Length of vector required: ", length(x = reference.cells), + "\nSkipping element ", i, ".", call. = FALSE, immediate. = TRUE) refdata[[i]] <- FALSE - next } - if (refdata[[i]] %in% Assays(object = reference)) { - refdata[[i]] <- GetAssayData(object = reference, assay = refdata[[i]]) - colnames(x = refdata[[i]]) <- paste0(colnames(x = refdata[[i]]), "_reference") - label.transfer[[i]] <- FALSE - next - } else if (refdata[[i]] %in% colnames(x = reference[[]])) { - refdata[[i]] <- reference[[refdata[[i]]]][, 1] - } else { - warning("Element number ", i, " provided to refdata does not exist in ", - "the provided reference object.", call. = FALSE, immediate. = TRUE) + label.transfer[[i]] <- TRUE + } else if (inherits(x = refdata[[i]], what = c("dgCMatrix", "matrix"))) { + if (ncol(x = refdata[[i]]) != length(x = reference.cells)) { + warning("Please provide a matrix that has the same number of columns as ", + "the number of reference cells used in anchor finding.\n", + "Number of columns in provided matrix : ", ncol(x = refdata[[i]]), "\n", + "Number of columns required : ", length(x = reference.cells), + "\nSkipping element ", i, ".", call. = FALSE, immediate. = TRUE) refdata[[i]] <- FALSE - next + } else { + colnames(x = refdata[[i]]) <- paste0(colnames(x = refdata[[i]]), "_reference") + if (any(!colnames(x = refdata[[i]]) == reference.cells)) { + if (any(!colnames(x = refdata[[i]]) %in% reference.cells) || any(!reference.cells %in% colnames(x = refdata[[i]]))) { + warning("Some (or all) of the column names of the provided refdata ", + "don't match the reference cells used in anchor finding ", + "\nSkipping element", i, ".", call. = FALSE, immediate. = TRUE) + refdata[[i]] <- FALSE + } else { + refdata[[i]] <- refdata[[i]][, reference.cells] + } + } + } + if (!slot %in% c("counts", "data")) { + stop("Please specify slot as either 'counts' or 'data'.") } - } else if (length(x = refdata[[i]]) != length(x = reference.cells)) { - warning("Please provide a vector that is the same length as the number ", - "of reference cells used in anchor finding.\n", - "Length of vector provided: ", length(x = refdata[[i]]), "\n", - "Length of vector required: ", length(x = reference.cells), - "\nSkipping element ", i, ".", call. = FALSE, immediate. = TRUE) + label.transfer[[i]] <- FALSE + } else { + warning("Please provide either a vector (character or factor) for label ", + "transfer or a matrix for feature transfer. \nType provided: ", + class(x = refdata[[i]])) refdata[[i]] <- FALSE } - label.transfer[[i]] <- TRUE - } else if (inherits(x = refdata[[i]], what = c("dgCMatrix", "matrix"))) { - if (ncol(x = refdata[[i]]) != length(x = reference.cells)) { - warning("Please provide a matrix that has the same number of columns as ", - "the number of reference cells used in anchor finding.\n", - "Number of columns in provided matrix : ", ncol(x = refdata[[i]]), "\n", - "Number of columns required : ", length(x = reference.cells), - "\nSkipping element ", i, ".", call. = FALSE, immediate. = TRUE) - refdata[[i]] <- FALSE - } else { - colnames(x = refdata[[i]]) <- paste0(colnames(x = refdata[[i]]), "_reference") - if (any(!colnames(x = refdata[[i]]) == reference.cells)) { - if (any(!colnames(x = refdata[[i]]) %in% reference.cells) || any(!reference.cells %in% colnames(x = refdata[[i]]))) { - warning("Some (or all) of the column names of the provided refdata ", - "don't match the reference cells used in anchor finding ", - "\nSkipping element", i, ".", call. = FALSE, immediate. = TRUE) - refdata[[i]] <- FALSE - } else { - refdata[[i]] <- refdata[[i]][, reference.cells] - } + if (names(x = refdata)[i] == "") { + possible.names <- make.unique(names = c(names(x = refdata), paste0("e", i))) + names(x = refdata)[i] <- possible.names[length(x = possible.names)] + if (verbose) { + message("refdata element ", i, " is not named. Setting name as ", names(x = refdata)[i]) } } - if (!slot %in% c("counts", "data")) { - stop("Please specify slot as either 'counts' or 'data'.") - } - label.transfer[[i]] <- FALSE - } else { - warning("Please provide either a vector (character or factor) for label ", - "transfer or a matrix for feature transfer. \nType provided: ", - class(x = refdata[[i]])) - refdata[[i]] <- FALSE } - if (names(x = refdata)[i] == "") { - possible.names <- make.unique(names = c(names(x = refdata), paste0("e", i))) - names(x = refdata)[i] <- possible.names[length(x = possible.names)] - if (verbose) { - message("refdata element ", i, " is not named. Setting name as ", names(x = refdata)[i]) - } + ModifyParam(param = "label.transfer", value = label.transfer) + if (all(unlist(x = lapply(X = refdata, FUN = isFALSE)))) { + stop("None of the provided refdata elements are valid.", call. = FALSE) } + ModifyParam(param = "refdata", value = refdata) } - ModifyParam(param = "label.transfer", value = label.transfer) - if (all(unlist(x = lapply(X = refdata, FUN = isFALSE)))) { - stop("None of the provided refdata elements are valid.", call. = FALSE) - } - ModifyParam(param = "refdata", value = refdata) + + + + object.reduction <- Reductions(object = slot(object = anchorset, name = "object.list")[[1]]) valid.weight.reduction <- c("pcaproject", "pca", "cca", "rpca.ref","lsiproject", "lsi", object.reduction) if (!inherits(x = weight.reduction, "DimReduc")) { diff --git a/man/TransferData.Rd b/man/TransferData.Rd index 6bbe17fcb..f7ec22b26 100644 --- a/man/TransferData.Rd +++ b/man/TransferData.Rd @@ -19,6 +19,7 @@ TransferData( verbose = TRUE, slot = "data", prediction.assay = FALSE, + only.weights = FALSE, store.weights = TRUE ) } @@ -78,6 +79,8 @@ or "counts"} \item{prediction.assay}{Return an \code{Assay} object with the prediction scores for each class stored in the \code{data} slot.} +\item{only.weights}{Only return weights matrix} + \item{store.weights}{Optionally store the weights matrix used for predictions in the returned query object.} } From a3119933db44e0c6e4d2dacd8b52c9649ffb000f Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 27 Jan 2022 11:13:04 -0500 Subject: [PATCH 057/979] reformat mapquery weights --- R/integration.R | 63 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 50 insertions(+), 13 deletions(-) diff --git a/R/integration.R b/R/integration.R index 38d4575c0..0e496ba54 100644 --- a/R/integration.R +++ b/R/integration.R @@ -2077,9 +2077,16 @@ MapQuery <- function( ) ) } + slot(object = query, name = "tools")$MapQuery <- NULL if (store.weights) { - slot(object = query, name = "tools")$MapQuery <- slot(object = query, name = "tools")$TransferData - slot(object = query, name = "tools")$MapQuery$anchor <- anchorset@anchors + slot(object = query, name = "tools")$MapQuery <- slot( + object = query, + name = "tools" + )$TransferData + slot(object = query, name = "tools")$MapQuery$anchor <- slot( + object = anchorset, + name = "anchors" + ) } slot(object = query, name = "tools")$TransferData <- NULL if (!is.null(x = reduction.model)) { @@ -3071,14 +3078,26 @@ TransferData <- function( pattern = ".data_", replacement = "", x = colnames(bridge.prediction.matrix) - ) - bridge.prediction.matrix <- bridge.prediction.matrix[ , possible.ids, drop = FALSE] - + ) + extra.id <- setdiff(possible.ids, colnames(bridge.prediction.matrix)) + if (length(extra.id) > 0) { + extra.prediction <- as.sparse( + matrix(data = 0, + nrow = nrow(bridge.prediction.matrix), + ncol = length(extra.id)) + ) + colnames(extra.prediction) <- extra.id + bridge.prediction.matrix <- cbind( + bridge.prediction.matrix, + extra.prediction + ) + } + bridge.prediction.matrix <- bridge.prediction.matrix[,possible.ids, drop = FALSE] bridge.prediction.scores <- t(bridge.weight$query.weights) %*% (t(bridge.weight$bridge.weights) %*% bridge.prediction.matrix)[bridge.weight$query.ref_anchor,] prediction.scores <- (prediction.scores + bridge.prediction.scores)/2 - prediction.scores <- as.matrix(prediction.scores) + prediction.scores <- as.matrix(x = prediction.scores) } prediction.ids <- possible.ids[apply(X = prediction.scores, MARGIN = 1, FUN = which.max)] prediction.ids <- as.character(prediction.ids) @@ -5862,6 +5881,8 @@ FindBridgeAnchor <- function(object.list, verbose = TRUE, ... ) { + + if (!is.null(laplacian.reduction)) { bridge.method <- "bridge graph" } else { @@ -5879,9 +5900,18 @@ FindBridgeAnchor <- function(object.list, } ) } + if (anchor.type == "Transfer") { reference <- reference %||% c(1) query <- setdiff(c(1,2), reference) + ## check weight matrix + if (is.null(bridge.object@tools$MapQuery)) { + stop("No weights stored between reference and bridge obejcts.", + "Please set store.weights to TRUE in MapQuery") + } else if (is.null(object.list[[query]]@tools$MapQuery)) { + stop("No weights stored between query and bridge obejcts.", + "Please set store.weights to TRUE in MapQuery") + } } bridge.reduction.name <- paste0(bridge.assay.name, ".reduc") @@ -5962,12 +5992,20 @@ FindBridgeAnchor <- function(object.list, } if (anchor.type == "Transfer") { - - slot(object = anchor, name = "weight.reduction")@misc$bridge.sets <- list( - bridge.weights = bridge.object@tools$MapQuery$weights.matrix, - bridge.ref_anchor = bridge.object@tools$MapQuery$anchor[,1], - query.weights = object.list[[query]]@tools$MapQuery$weights.matrix, - query.ref_anchor = object.list[[query]]@tools$MapQuery$anchor[,1] + slot( object = anchor,name = "weight.reduction" + )@misc$bridge.sets <- list( + bridge.weights = slot(object = bridge.object, + name = "tools" + )$MapQuery$weights.matrix, + bridge.ref_anchor = slot(object = bridge.object, + name = "tools" + )$MapQuery$anchor[,1], + query.weights = slot(object = object.list[[query]], + name = "tools" + )$MapQuery$weights.matrix, + query.ref_anchor = slot(object = object.list[[query]], + name = "tools" + )$MapQuery$anchor[,1] ) } slot(object = anchor, name = "command") <- LogSeuratCommand( @@ -5975,7 +6013,6 @@ FindBridgeAnchor <- function(object.list, return.command = TRUE ) return(anchor) - } From c7620c0916bf435ac6ea8b34574f9d9ac093a873 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 28 Jan 2022 15:11:25 -0500 Subject: [PATCH 058/979] check cell name --- R/integration.R | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/R/integration.R b/R/integration.R index 0e496ba54..47ad8f9c9 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6638,6 +6638,11 @@ IntegrateSketchEmbeddings <- function(object.list, x = c(list(features), lapply(X = object.list, function(x) rownames(x))) ) + # check cell names + cells.sketch.list <- unlist(lapply(X = sketch.list, function(x) Cells(x) )) + if (length(x = setdiff(x = cells.sketch.list, y = Cells(sketch.object))) != 0) { + stop("Cells name in object.list are not unique. Rename is needed.") + } my.lapply <- ifelse( test = verbose && nbrOfWorkers() == 1, yes = pblapply, @@ -6682,8 +6687,8 @@ IntegrateSketchEmbeddings <- function(object.list, EXPR = dictionary.method, 'embeddings'= { sketch.transform <- ginv( - X = Embeddings(object = inte.sub[[sketch.reduction.raw]])[q.cells ,]) %*% - Embeddings(object = inte.sub[[sketch.reduction]])[q.cells ,] + X = Embeddings(object = sketch.object[[sketch.reduction.raw]])[q.cells ,]) %*% + Embeddings(object = sketch.object[[sketch.reduction]])[q.cells ,] emb <- emb.list[[q]] %*% sketch.transform emb }, @@ -6691,13 +6696,13 @@ IntegrateSketchEmbeddings <- function(object.list, exp.mat <- t( x = as.matrix( x = GetAssayData( - inte.sub[[assay]], + sketch.object[[assay]], slot = 'data' )[features,q.cells] ) ) sketch.transform <- ginv(X = exp.mat) %*% - Embeddings(object = inte.sub[[sketch.reduction]])[q.cells ,] + Embeddings(object = sketch.object[[sketch.reduction]])[q.cells ,] emb <- as.matrix( x = t( x = GetAssayData( @@ -6717,13 +6722,13 @@ IntegrateSketchEmbeddings <- function(object.list, exp.mat <- as.matrix( x = t( x = GetAssayData( - inte.sub[[assay]], + sketch.object[[assay]], slot = 'data')[features,q.cells] ) %*% R ) sketch.transform <- ginv(X = exp.mat) %*% - Embeddings(object = inte.sub[[sketch.reduction]])[q.cells ,] + Embeddings(object = sketch.object[[sketch.reduction]])[q.cells ,] emb <- as.matrix( x = ( t( From 8ad12be0cadf533ae06b22b8eb25e5290e58db2a Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sat, 29 Jan 2022 21:50:25 -0500 Subject: [PATCH 059/979] chang error info --- R/integration.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/integration.R b/R/integration.R index 47ad8f9c9..332a6e353 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6641,7 +6641,7 @@ IntegrateSketchEmbeddings <- function(object.list, # check cell names cells.sketch.list <- unlist(lapply(X = sketch.list, function(x) Cells(x) )) if (length(x = setdiff(x = cells.sketch.list, y = Cells(sketch.object))) != 0) { - stop("Cells name in object.list are not unique. Rename is needed.") + stop("Cells name in object.list are the same with Cells in sketch.object.") } my.lapply <- ifelse( test = verbose && nbrOfWorkers() == 1, From febf2d36028bfb5f79fa10ef5b9614b439d510e9 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 7 Feb 2022 20:36:32 -0500 Subject: [PATCH 060/979] bridge weights option --- R/integration.R | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/R/integration.R b/R/integration.R index 0e496ba54..f0a41ab37 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5904,13 +5904,16 @@ FindBridgeAnchor <- function(object.list, if (anchor.type == "Transfer") { reference <- reference %||% c(1) query <- setdiff(c(1,2), reference) - ## check weight matrix + stored.bridge.weights <- FALSE + # check weight matrix if (is.null(bridge.object@tools$MapQuery)) { - stop("No weights stored between reference and bridge obejcts.", + warning("No weights stored between reference and bridge obejcts.", "Please set store.weights to TRUE in MapQuery") } else if (is.null(object.list[[query]]@tools$MapQuery)) { - stop("No weights stored between query and bridge obejcts.", + warning("No weights stored between query and bridge obejcts.", "Please set store.weights to TRUE in MapQuery") + } else { + stored.bridge.weights <- TRUE } } @@ -5991,23 +5994,24 @@ FindBridgeAnchor <- function(object.list, ) } - if (anchor.type == "Transfer") { - slot( object = anchor,name = "weight.reduction" - )@misc$bridge.sets <- list( - bridge.weights = slot(object = bridge.object, - name = "tools" - )$MapQuery$weights.matrix, - bridge.ref_anchor = slot(object = bridge.object, + if (stored.bridge.weights) { + slot( object = anchor,name = "weight.reduction" + )@misc$bridge.sets <- list( + bridge.weights = slot(object = bridge.object, name = "tools" - )$MapQuery$anchor[,1], - query.weights = slot(object = object.list[[query]], - name = "tools" - )$MapQuery$weights.matrix, - query.ref_anchor = slot(object = object.list[[query]], - name = "tools" - )$MapQuery$anchor[,1] - ) - } + )$MapQuery$weights.matrix, + bridge.ref_anchor = slot(object = bridge.object, + name = "tools" + )$MapQuery$anchor[,1], + query.weights = slot(object = object.list[[query]], + name = "tools" + )$MapQuery$weights.matrix, + query.ref_anchor = slot(object = object.list[[query]], + name = "tools" + )$MapQuery$anchor[,1] + ) + } + slot(object = anchor, name = "command") <- LogSeuratCommand( object = object.list[[1]], return.command = TRUE From c3bbbb91fddf5a55c645509de1e0a3c5f01ecdf1 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 7 Feb 2022 20:39:39 -0500 Subject: [PATCH 061/979] add ProjectDimReduc --- R/integration.R | 112 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) diff --git a/R/integration.R b/R/integration.R index fc32711ca..186e6fe7a 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6910,5 +6910,117 @@ SmoothLabels <- function(labels, clusters ) { } + +ProjectDimReduc <- function(query, + reference, + mode = c('pcaproject', 'lsiproject'), + reference.reduction, + query.assay = NULL, + reference.assay = NULL, + features = NULL, + do.scale = TRUE, + reduction.name = NULL, + reduction.key= NULL, + combine = FALSE, + verbose = TRUE +) { + + query.assay <- query.assay %||% DefaultAssay(object = query) + reference.assay <- reference.assay %||% DefaultAssay(object = reference) + DefaultAssay(query) <- query.assay + DefaultAssay(reference) <- reference.assay + reduction.name <- reduction.name %||% reference.reduction + reduction.key <- reduction.key %||% Key(reference[[reference.reduction]]) + + if (reduction.name %in% Reductions(query)) { + warning(reduction.name, + ' already exists in the query object. It will be overwritten.' + ) + } + features <- features %||% rownames(x = Loadings(object = reference[[reference.reduction]])) + features <- intersect(x = features, y = rownames(x = query)) + + if (mode == 'lsiproject') { + if (verbose) { + message('LSI projection to ', reference.reduction) + } + projected.embeddings <- ProjectSVD( + reduction = reference[[reference.reduction]], + data = GetAssayData(object = query, assay = query.assay, slot = "data"), + mode = "lsi", + do.center = FALSE, + do.scale = FALSE, + features = features, + use.original.stats = FALSE, + verbose = verbose + ) + } else if (mode == 'pcaproject') { + if (inherits(query[[query.assay]], what = 'SCTAssay')) { + if (verbose) { + message('PCA projection to ', reference.reduction, ' in SCT assay') + } + query <- suppressWarnings( + expr = GetResidual(object = query, + assay = query.assay, + features = features, + verbose = FALSE) + ) + query.mat <- GetAssayData(object = query, slot = 'scale.data')[features,] + + projected.embeddings <- t( + crossprod(x = Loadings( + object = reference[[reference.reduction]])[features, ], + y = query.mat + ) + ) + } else { + if (verbose) { + message('PCA projection to ', reference.reduction) + } + projected.embeddings <- ProjectCellEmbeddings( + reference = reference, + reduction = reference.reduction, + query = query, + scale = do.scale, + dims = 1:ncol(reference[[reference.reduction]]), + feature.mean = NULL, + verbose = verbose + ) + } + } + + query[[reduction.name]] <- CreateDimReducObject( + embeddings = projected.embeddings, + loadings = Loadings(reference[[reference.reduction]])[features,], + assay = query.assay, + key = reduction.key, + misc = Misc(reference[[reference.reduction]]) + ) + + if (combine) { + query <- DietSeurat(object = query, + dimreducs = reduction.name, + features = features, + assays = query.assay + ) + reference <- DietSeurat(object = reference, + dimreducs = reference.reduction, + features = features, + assays = reference.assay) + + suppressWarnings( + combine.obj <- merge(query, reference, + merge.dr = c(reduction.name, reference.reduction) + ) + ) + Idents(combine.obj) <- c(rep(x = 'query', times = ncol(query)), + rep(x = 'reference', times = ncol(reference))) + return(combine.obj) + } else { + return(query) + } +} + + \ No newline at end of file From dd0c4a573f56870cc4e1d0f5182649f63e47e392 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 7 Feb 2022 22:19:26 -0500 Subject: [PATCH 062/979] docu ProjectDimReduc --- NAMESPACE | 1 + R/integration.R | 51 +++++++++++++++++++++++++++++++------------------ 2 files changed, 33 insertions(+), 19 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 839021289..2b9c849f0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -271,6 +271,7 @@ export(PrepLDA) export(PrepSCTIntegration) export(Project) export(ProjectDim) +export(ProjectDimReduc) export(ProjectUMAP) export(PurpleAndYellow) export(Radius) diff --git a/R/integration.R b/R/integration.R index 186e6fe7a..44cf0991b 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6910,36 +6910,55 @@ SmoothLabels <- function(labels, clusters ) { } - + +#' Project query data to reference dimensional reduction +#' +#' @param query Query object +#' @param reference Reference object +#' @param mode Projection mode name for projection +#' \itemize{ +#' \item{pcaproject: PCA projection} +#' \item{lsiproject: LSI projection} +#' } +#' @param reference.reduction Name of dimensional reduction in the reference object +#' @param combine Determine if query and reference objects are combined +#' @param query.assay Assay used for query object +#' @param reference.assay Assay used for reference object +#' @param features Features used for projection +#' @param do.scale Determine if scale expression matrix in the pcaproject mode +#' @param reduction.name dimensional reduction name, reference.reduction is used by default +#' @param reduction.key dimensional reduction key, the key in reference.reduction +#' is used by default +#' @param verbose Print progress and message +#' +#' @return Returns a query-only or query-reference combined seurat object +#' @export ProjectDimReduc <- function(query, reference, mode = c('pcaproject', 'lsiproject'), - reference.reduction, + reference.reduction, + combine = FALSE, query.assay = NULL, reference.assay = NULL, features = NULL, do.scale = TRUE, reduction.name = NULL, reduction.key= NULL, - combine = FALSE, verbose = TRUE ) { - query.assay <- query.assay %||% DefaultAssay(object = query) reference.assay <- reference.assay %||% DefaultAssay(object = reference) - DefaultAssay(query) <- query.assay - DefaultAssay(reference) <- reference.assay + DefaultAssay(object = query) <- query.assay + DefaultAssay(object = reference) <- reference.assay reduction.name <- reduction.name %||% reference.reduction - reduction.key <- reduction.key %||% Key(reference[[reference.reduction]]) - - if (reduction.name %in% Reductions(query)) { + reduction.key <- reduction.key %||% Key(object = reference[[reference.reduction]]) + if (reduction.name %in% Reductions(object = query)) { warning(reduction.name, ' already exists in the query object. It will be overwritten.' ) } features <- features %||% rownames(x = Loadings(object = reference[[reference.reduction]])) features <- intersect(x = features, y = rownames(x = query)) - if (mode == 'lsiproject') { if (verbose) { message('LSI projection to ', reference.reduction) @@ -6988,7 +7007,6 @@ ProjectDimReduc <- function(query, ) } } - query[[reduction.name]] <- CreateDimReducObject( embeddings = projected.embeddings, loadings = Loadings(reference[[reference.reduction]])[features,], @@ -6996,7 +7014,6 @@ ProjectDimReduc <- function(query, key = reduction.key, misc = Misc(reference[[reference.reduction]]) ) - if (combine) { query <- DietSeurat(object = query, dimreducs = reduction.name, @@ -7007,20 +7024,16 @@ ProjectDimReduc <- function(query, dimreducs = reference.reduction, features = features, assays = reference.assay) - suppressWarnings( combine.obj <- merge(query, reference, merge.dr = c(reduction.name, reference.reduction) ) ) - Idents(combine.obj) <- c(rep(x = 'query', times = ncol(query)), - rep(x = 'reference', times = ncol(reference))) + Idents(combine.obj) <- c(rep(x = 'query', times = ncol(query)), + rep(x = 'reference', times = ncol(reference)) + ) return(combine.obj) } else { return(query) } } - - - - \ No newline at end of file From f1b2e8e537edf5c9af31286161a2ba3ffec5242c Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 7 Feb 2022 22:26:49 -0500 Subject: [PATCH 063/979] add package --- DESCRIPTION | 2 ++ man/RunSLSI.Rd | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4d26cbe00..5901d2b30 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -30,6 +30,7 @@ Depends: Imports: cluster, cowplot, + fastDummies, fitdistrplus, future, future.apply, @@ -59,6 +60,7 @@ Imports: RColorBrewer, Rcpp (>= 1.0.7), RcppAnnoy (>= 0.0.18), + RcppHNSW, reticulate, rlang, ROCR, diff --git a/man/RunSLSI.Rd b/man/RunSLSI.Rd index c89ed6886..5b7a05ad6 100644 --- a/man/RunSLSI.Rd +++ b/man/RunSLSI.Rd @@ -74,7 +74,7 @@ reductions slot } \description{ Run a supervised LSI (SLSI) dimensionality reduction supervised by a -cell-cell kernel. SLSI is used to capture a linear transformation of peaks +cell-cell kernel. SLSI is used to capture a linear transformation of peaks that maximizes its dependency to the given cell-cell kernel. } \concept{dimensional_reduction} From 191fdec83893eab81ca7e8b2a9fa7c52aa74a0d1 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 7 Feb 2022 23:54:24 -0500 Subject: [PATCH 064/979] revert develop --- DESCRIPTION | 2 -- man/RunSLSI.Rd | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5901d2b30..4d26cbe00 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -30,7 +30,6 @@ Depends: Imports: cluster, cowplot, - fastDummies, fitdistrplus, future, future.apply, @@ -60,7 +59,6 @@ Imports: RColorBrewer, Rcpp (>= 1.0.7), RcppAnnoy (>= 0.0.18), - RcppHNSW, reticulate, rlang, ROCR, diff --git a/man/RunSLSI.Rd b/man/RunSLSI.Rd index 5b7a05ad6..c89ed6886 100644 --- a/man/RunSLSI.Rd +++ b/man/RunSLSI.Rd @@ -74,7 +74,7 @@ reductions slot } \description{ Run a supervised LSI (SLSI) dimensionality reduction supervised by a -cell-cell kernel. SLSI is used to capture a linear transformation of peaks +cell-cell kernel. SLSI is used to capture a linear transformation of peaks that maximizes its dependency to the given cell-cell kernel. } \concept{dimensional_reduction} From a2c205cc428997e1f9412847dcd3f18bfeef47d6 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 8 Feb 2022 00:11:31 -0500 Subject: [PATCH 065/979] add package install --- DESCRIPTION | 2 ++ 1 file changed, 2 insertions(+) diff --git a/DESCRIPTION b/DESCRIPTION index 4d26cbe00..5901d2b30 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -30,6 +30,7 @@ Depends: Imports: cluster, cowplot, + fastDummies, fitdistrplus, future, future.apply, @@ -59,6 +60,7 @@ Imports: RColorBrewer, Rcpp (>= 1.0.7), RcppAnnoy (>= 0.0.18), + RcppHNSW, reticulate, rlang, ROCR, From e1feebc8252022f8044d4e86899b4511a9ae0542 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 8 Feb 2022 10:50:59 -0500 Subject: [PATCH 066/979] export NNtoGraph --- NAMESPACE | 1 + R/integration.R | 53 +++++++++++++++++++++++++++++++++++-------------- man/RunSLSI.Rd | 2 +- 3 files changed, 40 insertions(+), 16 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 28a5b2417..ad05efc72 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -253,6 +253,7 @@ export(MixingMetric) export(MixscapeHeatmap) export(MixscapeLDA) export(NNPlot) +export(NNtoGraph) export(Neighbors) export(NoAxes) export(NoGrid) diff --git a/R/integration.R b/R/integration.R index cff4002dc..653e83309 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5596,26 +5596,49 @@ ValidateParams_IntegrateEmbeddings_TransferAnchors <- function( } +#' Convert Neighbor class to an asymmetrical Graph class +#' @param nn.object A neighbor class object +#' @param col.cells Cells names of the neighbors, cell names in nn.object is used by default +#' @param weighted Determine if use distance in the Graph +#' +#' +#' @export +#' @importFrom Matrix sparseMatrix +#' @return Returns a Graph object -NNtoGraph <- function(nn.object, ncol.nn = NULL, col.cells = NULL) { - select_nn <- nn.object@nn.idx - col.cells <- col.cells %||% nn.object@cell.names - ncol.nn <- ncol.nn %||% length(col.cells) - k.nn <- ncol(select_nn) - j <- as.numeric(x = t(x = select_nn )) +NNtoGraph <- function( + nn.object, + col.cells = NULL, + weighted = FALSE) { + select_nn <- Indices(object = nn.object) + col.cells <- col.cells %||% Cells(x = nn.object) + ncol.nn <- length(x = col.cells) + k.nn <- ncol(x = select_nn) + j <- as.numeric(x = t(x = select_nn)) i <- ((1:length(x = j)) - 1) %/% k.nn + 1 - nn.matrix <- sparseMatrix( - i = i, - j = j, - x = 1, - dims = c(nrow(x = select_nn), ncol.nn) - ) - - rownames(x = nn.matrix) <- nn.object@cell.names + if (weighted) { + select_nn_dist <- Distances(object = nn.object) + dist.element <- as.numeric(x = t(x = select_nn_dist)) + nn.matrix <- Matrix::sparseMatrix( + i = i, + j = j, + x = dist.element, + dims = c(nrow(x = select_nn), ncol.nn) + ) + } else { + nn.matrix <- sparseMatrix( + i = i, + j = j, + x = 1, + dims = c(nrow(x = select_nn), ncol.nn) + ) + } + rownames(x = nn.matrix) <- Cells(x = nn.object) colnames(x = nn.matrix) <- col.cells - return( nn.matrix) + nn.matrix <- as.Graph(x = nn.matrix) + return(nn.matrix) } diff --git a/man/RunSLSI.Rd b/man/RunSLSI.Rd index c89ed6886..5b7a05ad6 100644 --- a/man/RunSLSI.Rd +++ b/man/RunSLSI.Rd @@ -74,7 +74,7 @@ reductions slot } \description{ Run a supervised LSI (SLSI) dimensionality reduction supervised by a -cell-cell kernel. SLSI is used to capture a linear transformation of peaks +cell-cell kernel. SLSI is used to capture a linear transformation of peaks that maximizes its dependency to the given cell-cell kernel. } \concept{dimensional_reduction} From ccc719046b8ffe8e6eff464e96b7953b6a2cabde Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 8 Feb 2022 14:25:34 -0500 Subject: [PATCH 067/979] fix dims bug --- R/integration.R | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/R/integration.R b/R/integration.R index 653e83309..61af5e9bf 100644 --- a/R/integration.R +++ b/R/integration.R @@ -2076,6 +2076,10 @@ MapQuery <- function( ), integrateembeddings.args ) ) + Misc( + object = query[[new.reduction.name]], + slot = 'ref.dims' + ) <- slot(object = anchorset, name = "command")$dims } slot(object = query, name = "tools")$MapQuery <- NULL if (store.weights) { @@ -5781,6 +5785,24 @@ BridgeCellsRepresentation <- function(object.list, if (verbose) { message("Constructing Bridge-cells representation") } + dims.list <- list() + for (i in 1:length(object.reduction.list)) { + ref.dims <- list( + object= Misc(object.list[[i]][[object.reduction.list[[i]]]], slot = 'ref.dims'), + bridge = Misc( bridge.object[[bridge.reduction.list[[i]]]], slot = 'ref.dims') + ) + all.dims <- list( + object = 1:ncol(object.list[[i]][[object.reduction.list[[i]]]]), + bridge = 1:ncol( bridge.object[[bridge.reduction.list[[i]] ]]) + ) + projected.dims.index <- which(sapply(ref.dims, function(x) !is.null(x))) + reference.dims.index <- setdiff(c(1:2), projected.dims.index) + dims.list[[i]] <- list() + dims.list[[i]][[reference.dims.index]] <- ref.dims[[projected.dims.index ]] + dims.list[[i]][[projected.dims.index]] <- all.dims[[projected.dims.index]] + names(dims.list[[i]]) <- c('object', 'bridge') + } + object.list <- my.lapply( X = 1:length(x = object.list), FUN = function(x) { @@ -5788,19 +5810,19 @@ BridgeCellsRepresentation <- function(object.list, X = Embeddings( object = bridge.object, reduction = bridge.reduction.list[[x]] - )[ ,dims.list[[x]]] + )[ ,dims.list[[x]]$bridge] ) if (!is.null(laplacian.reduction)) { lap.vector <- Embeddings(bridge.object[[laplacian.reduction]])[,laplacian.dims] X <- Embeddings( object = object.list[[x]], reduction = object.reduction.list[[x]] - )[, 1:length(x = dims.list[[x]])] %*% (SA.inv %*% lap.vector) + )[, dims.list[[x]]$object] %*% (SA.inv %*% lap.vector) } else { X <- Embeddings( object = object.list[[x]], reduction = object.reduction.list[[x]] - )[, 1:length(x = dims.list[[x]])] %*% SA.inv + )[, dims.list[[x]]$object] %*% SA.inv colnames(X) <- Cells(bridge.object) } if (l2.norm) { From a7be02dbbbe88cba3c94068d4c28028e207e85ab Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 8 Feb 2022 14:31:05 -0500 Subject: [PATCH 068/979] delete dims.list --- R/integration.R | 4 ---- 1 file changed, 4 deletions(-) diff --git a/R/integration.R b/R/integration.R index 61af5e9bf..d09222ae9 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5767,7 +5767,6 @@ BridgeCellsRepresentation <- function(object.list, bridge.object, object.reduction.list, bridge.reduction.list, - dims.list, laplacian.reduction = NULL, laplacian.dims = NULL, bridge.assay.name = "Bridge", @@ -5879,8 +5878,6 @@ BridgeCellsRepresentation <- function(object.list, #' to be reconstructed by bridge.obejct #' @param bridge.reduction.list A list of dimensional reductions from bridge.object used #' to reconstruct object.reduction.list -#' @param dims.list A list of dimensions to use for object.reduction.list and -#' bridge.reduction.list #' @param anchor.type The type of anchors. Can #' be one of: #' \itemize{ @@ -5914,7 +5911,6 @@ FindBridgeAnchor <- function(object.list, bridge.object, object.reduction.list, bridge.reduction.list, - dims.list, anchor.type = c("Integration", "Transfer")[1], reference = NULL, laplacian.reduction = "lap", From 5f638b12d129d82db0f6331acc4dcdeddd764d84 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 8 Feb 2022 14:53:54 -0500 Subject: [PATCH 069/979] allow no ref.dims --- R/integration.R | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/R/integration.R b/R/integration.R index d09222ae9..cebadec52 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5795,11 +5795,27 @@ BridgeCellsRepresentation <- function(object.list, bridge = 1:ncol( bridge.object[[bridge.reduction.list[[i]] ]]) ) projected.dims.index <- which(sapply(ref.dims, function(x) !is.null(x))) - reference.dims.index <- setdiff(c(1:2), projected.dims.index) - dims.list[[i]] <- list() - dims.list[[i]][[reference.dims.index]] <- ref.dims[[projected.dims.index ]] - dims.list[[i]][[projected.dims.index]] <- all.dims[[projected.dims.index]] - names(dims.list[[i]]) <- c('object', 'bridge') + if (length(projected.dims.index) == 0) { + warning('No reference dims found in the dimensional reduction,', + ' all dims in the dimensional reduction will be used.') + if (all.dims[[1]] == all.dims[[2]]) { + dims.list[[i]] <- all.dims + } else { + stop( 'The number of dimensions in the object.list ', + object.reduction.list[[i]], + ' (', length(all.dims[[1]]), ') ', + ' and the number of dimensions in the bridge object ', + bridge.reduction.list[[i]], + ' (', length(all.dims[[2]]), ') ', + ' is different.') + } + } else { + reference.dims.index <- setdiff(c(1:2), projected.dims.index) + dims.list[[i]] <- list() + dims.list[[i]][[reference.dims.index]] <- ref.dims[[projected.dims.index ]] + dims.list[[i]][[projected.dims.index]] <- all.dims[[projected.dims.index]] + names(dims.list[[i]]) <- c('object', 'bridge') + } } object.list <- my.lapply( @@ -5964,7 +5980,6 @@ FindBridgeAnchor <- function(object.list, bridge.object = bridge.object, object.reduction.list = object.reduction.list, bridge.reduction.list = bridge.reduction.list, - dims.list = dims.list, bridge.assay.name = bridge.assay.name, laplacian.reduction = laplacian.reduction, laplacian.dims = laplacian.dims, From b0e79acf733370baf4317eb8708c61571f9f5f9c Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 8 Feb 2022 22:13:02 -0500 Subject: [PATCH 070/979] fix integration anchor store weights --- R/integration.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/integration.R b/R/integration.R index cebadec52..d0824145a 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6049,7 +6049,7 @@ FindBridgeAnchor <- function(object.list, } ) } - + if (anchor.type == "Transfer") { if (stored.bridge.weights) { slot( object = anchor,name = "weight.reduction" )@misc$bridge.sets <- list( @@ -6067,6 +6067,7 @@ FindBridgeAnchor <- function(object.list, )$MapQuery$anchor[,1] ) } + } slot(object = anchor, name = "command") <- LogSeuratCommand( object = object.list[[1]], From 8352c1e411ed4fe880aef5d208b93edcfb856d50 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 10 Feb 2022 17:33:23 -0500 Subject: [PATCH 071/979] change function name --- R/integration.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/integration.R b/R/integration.R index d0824145a..709ff0681 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5651,7 +5651,7 @@ NNtoGraph <- function( # # @return Returns an Integration or TranserAnchor set -FindDirectAnchor <- function( +FindAssayAnchor <- function( object.list, reference = NULL, anchor.type = c("Integration", "Transfer")[1], @@ -6002,7 +6002,7 @@ FindBridgeAnchor <- function(object.list, } ) if (reduction == "direct") { - anchor <- FindDirectAnchor( + anchor <- FindAssayAnchor( object.list = object.list , reference = reference, slot = "data", From 8aabd87776e3852bbde00f7b9155da678bdb7878 Mon Sep 17 00:00:00 2001 From: timoast <4591688+timoast@users.noreply.github.com> Date: Sun, 13 Feb 2022 16:13:27 -0500 Subject: [PATCH 072/979] Remove sct version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5901d2b30..1bc165e0d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -67,7 +67,7 @@ Imports: Rtsne, scales, scattermore (>= 0.7), - sctransform (>= 0.3.3), + sctransform, SeuratObject (>= 4.0.4), shiny, spatstat.core, From d34dea809c5d69b20d020f1d595e83da751a594e Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 16 Feb 2022 00:09:13 -0500 Subject: [PATCH 073/979] update docu --- NAMESPACE | 2 +- R/generics.R | 14 +++++++++++--- R/integration.R | 25 +++++++++++++++++-------- 3 files changed, 29 insertions(+), 12 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index ad05efc72..4d0ce6feb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -140,6 +140,7 @@ export(BarcodeInflectionsPlot) export(BlackAndWhite) export(BlueAndRed) export(BoldTitle) +export(BridgeCellsRepresentation) export(BuildClusterTree) export(CalcPerturbSig) export(CalculateBarcodeInflections) @@ -303,7 +304,6 @@ export(RunMarkVario) export(RunMixscape) export(RunMoransI) export(RunPCA) -export(RunPCA_Sparse) export(RunSLSI) export(RunSPCA) export(RunTSNE) diff --git a/R/generics.R b/R/generics.R index e7d9efb6e..38f9d4f07 100644 --- a/R/generics.R +++ b/R/generics.R @@ -303,11 +303,19 @@ IntegrateEmbeddings <- function(anchorset, ...) { } -#' Leverage score -#' xxxx -#' xxxx +#' Calculate Leverage score for all cells +#' +#' Leverage score can be used to sample representative cells from scRNA data. +#' The more abundant population will be assigned less leverage score. +#' Leverage-score can guarantee that both abundant and rare populations will be sampled. +#' We used variable features in the data slot to calculate leverage score for all cells. +#' #' @param object An object #' @export LeverageScore +#' @return Returns a seurat object with additional column storing leverage score +#' #' @references Clarkson KL, Woodruff DP. +#' Low Rank Approximation and Regression in Input Sparsity Time. +#' Journal of the ACM (JACM). 2017 Jan 30;63(6):1-45. \url{https://https://arxiv.org/abs/1207.6365}; #' LeverageScore <- function(object, ...) { UseMethod(generic = 'LeverageScore', object = object) diff --git a/R/integration.R b/R/integration.R index 709ff0681..5e35ab92d 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5755,13 +5755,17 @@ FindAssayAnchor <- function( #' Use bridge cells to represent single-modality object -#' -#' -#' +#' +#' @inheritParams FindBridgeAnchor +#' @param return.all.assays if return all assays in the object.list. +#' Only bridge assay is returned by default. #' #' #' @importFrom MASS ginv -#‘ +#' @return Returns a object list in which each object has a bridge cell derived assay +#' @export +#' + BridgeCellsRepresentation <- function(object.list, bridge.object, @@ -6484,7 +6488,11 @@ LeverageScore.Assay <- function(object, return(score) } -#' ssssxxxxx + + +#' @inheritParams LeverageScoreSampling +#' @param slot The slot used for leverage score calculation. data slot is used by default +#' #' @rdname LeverageScore #' @export #' @method LeverageScore Seurat @@ -6652,7 +6660,8 @@ IntegrationReferenceIndex <- function(object) { #' construct the batch-corrected embeddings for all cells #' @param object.list A list of Seurat objects with all cells #' @param sketch.list A list of Seurat objects with sketched cells -#' @param sketch.object A sketched Seurat objects with integraetd embeddings +#' @param sketch.object A sketched Seurat objects with integrated embeddings +#' @param features Features used for sketch integration #' @param assay Assay name for raw expression #' @param sketch.reduction Dimensional reduction name for batch-corrected embeddings #' in the sketched object @@ -6911,8 +6920,8 @@ SparseMeanSd <- function(object, #' @importFrom Matrix t #' @importFrom rlang exec #' @importFrom irlba irlba -#' -#' @export +# +# RunPCA_Sparse <- function( object, features = NULL, From 7c1fd5254a770081f31c97b430561cf751fdf20b Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 16 Feb 2022 16:20:20 -0500 Subject: [PATCH 074/979] Add RSpectra to imports Specify version of sctransform --- DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1bc165e0d..f67b8d643 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -64,10 +64,11 @@ Imports: reticulate, rlang, ROCR, + RSpectra, Rtsne, scales, scattermore (>= 0.7), - sctransform, + sctransform (>= 0.3.3), SeuratObject (>= 4.0.4), shiny, spatstat.core, From ca503ac406805e00ed8cd655c9125152f4d5cfe2 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 16 Feb 2022 16:20:51 -0500 Subject: [PATCH 075/979] Update docs; style --- R/generics.R | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/R/generics.R b/R/generics.R index 38f9d4f07..9478bdf67 100644 --- a/R/generics.R +++ b/R/generics.R @@ -304,19 +304,24 @@ IntegrateEmbeddings <- function(anchorset, ...) { #' Calculate Leverage score for all cells -#' -#' Leverage score can be used to sample representative cells from scRNA data. -#' The more abundant population will be assigned less leverage score. -#' Leverage-score can guarantee that both abundant and rare populations will be sampled. -#' We used variable features in the data slot to calculate leverage score for all cells. -#' +#' +#' Leverage score can be used to sample representative cells from scRNA data. +#' The more abundant population will be assigned less leverage score. +#' Leverage-score can guarantee that both abundant and rare populations will +#' be sampled. We used variable features in the data slot to calculate leverage +#' score for all cells. +#' #' @param object An object -#' @export LeverageScore +#' #' @return Returns a seurat object with additional column storing leverage score -#' #' @references Clarkson KL, Woodruff DP. +#' +#' @export LeverageScore +#' +#' @references Clarkson KL, Woodruff DP. #' Low Rank Approximation and Regression in Input Sparsity Time. -#' Journal of the ACM (JACM). 2017 Jan 30;63(6):1-45. \url{https://https://arxiv.org/abs/1207.6365}; -#' +#' Journal of the ACM (JACM). 2017 Jan 30;63(6):1-45. +#' \url{https://https://arxiv.org/abs/1207.6365}; +#' LeverageScore <- function(object, ...) { UseMethod(generic = 'LeverageScore', object = object) } @@ -415,20 +420,21 @@ RunCCA <- function(object1, object2, ...) { #' Run Graph Laplacian Eigendecomposition #' -#' Run a graph laplacian dimensionality reduction. It is used as a low dimensional -#' representation for a cell-cell graph. The input graph should be symmetric -#' +#' Run a graph laplacian dimensionality reduction. It is used as a low +#' dimensional representation for a cell-cell graph. The input graph +#' should be symmetric +#' #' @param object A Seurat object -#' @param ... Arguments passed to RSpectra eigs_sym +#' @param ... Arguments passed to +#' \code{\link[RSpectra:eigs_sym]{RSpectra::eigs_sym}} #' -#' @return Returns Seurat object with the Graph laplacian eigenvector calculation -#' stored in the reductions slot -#' @export +#' @return Returns Seurat object with the Graph laplacian eigenvector +#' calculation stored in the reductions slot #' #' @rdname RunGraphLaplacian #' @export RunGraphLaplacian #' - + RunGraphLaplacian <- function(object, ...) { UseMethod(generic = 'RunGraphLaplacian', object = object) } From 5da373ae0cd4710081cb7fdf54051acba4327a23 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 16 Feb 2022 16:35:57 -0500 Subject: [PATCH 076/979] Update style --- R/integration.R | 381 ++++++++++++++++++++++++------------------------ 1 file changed, 194 insertions(+), 187 deletions(-) diff --git a/R/integration.R b/R/integration.R index 5e35ab92d..173a2a2e3 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1567,7 +1567,10 @@ IntegrateEmbeddings.IntegrationAnchorSet <- function( reference.datasets <- slot(object = anchorset, name = 'reference.objects') object.list <- slot(object = anchorset, name = 'object.list') anchors <- slot(object = anchorset, name = 'anchors') - reductions <- reductions %||% slot(object = anchorset, name = 'weight.reduction') + reductions <- reductions %||% slot( + object = anchorset, + name = 'weight.reduction' + ) ValidateParams_IntegrateEmbeddings_IntegrationAnchors( anchorset = anchorset, object.list = object.list, @@ -1624,7 +1627,9 @@ IntegrateEmbeddings.IntegrationAnchorSet <- function( ) if (length(x = reference.datasets) == length(x = object.list)) { reference.dr <- CreateDimReducObject( - embeddings = as.matrix(x = t(GetAssayData(reference.integrated[[new.reduction.name.safe]]))), + embeddings = as.matrix(x = t(GetAssayData( + object = reference.integrated[[new.reduction.name.safe]] + ))), assay = intdr.assay, loadings = Loadings(object = reductions), key = paste0(new.reduction.name.safe, "_") @@ -1910,15 +1915,15 @@ LocalStruct <- function( #' @inheritParams IntegrateEmbeddings #' @inheritParams TransferData #' @inheritParams ProjectUMAP -#' @param store.weights Determine if the weight and anchor matrices are stored. +#' @param store.weights Determine if the weight and anchor matrices are stored. #' @param transferdata.args A named list of additional arguments to #' \code{\link{TransferData}} #' @param integrateembeddings.args A named list of additional arguments to #' \code{\link{IntegrateEmbeddings}} #' @param projectumap.args A named list of additional arguments to #' \code{\link{ProjectUMAP}} -#' @return Returns a modified query Seurat object containing: #' +#' @return Returns a modified query Seurat object containing:#' #' \itemize{ #' \item{New Assays corresponding to the features transferred and/or their #' corresponding prediction scores from \code{\link{TransferData}}} @@ -1941,7 +1946,7 @@ MapQuery <- function( reference.reduction = NULL, reference.dims = NULL, query.dims = NULL, - store.weights = FALSE, + store.weights = FALSE, reduction.model = NULL, transferdata.args = list(), integrateembeddings.args = list(), @@ -1990,41 +1995,38 @@ MapQuery <- function( reference.dims <- query.dims <- 1:ncol(x = ref.cca.embedding) } else if (grepl(pattern = "lsi", x = transfer.reduction)) { anchor.reduction <- "lsiproject" - } else if (grepl(pattern = "direct", x = transfer.reduction)){ + } else if (grepl(pattern = "direct", x = transfer.reduction)) { anchor.reduction <- paste0( - slot(object = anchorset, - name = "command")$bridge.assay.name, + slot(object = anchorset, + name = "command")$bridge.assay.name, ".reduc" ) ref.reduction.emb <- Embeddings( - object = + object = slot( - object = anchorset, + object = anchorset, name = "object.list" )[[1]][[anchor.reduction]])[ slot(object = anchorset, name = "reference.cells"),] rownames(ref.reduction.emb) <- gsub( pattern = "_reference", replacement = "", - x = rownames(ref.reduction.emb) + x = rownames(ref.reduction.emb) ) reference[[anchor.reduction]] <- CreateDimReducObject( embeddings = ref.reduction.emb, - key = "L_", + key = "L_", assay = DefaultAssay(reference) ) - } + } else { stop("unkown type of anchors") } - - reference.reduction <- reference.reduction %||% slot(object = anchorset, name = "command")$reference.reduction %||% anchor.reduction new.reduction.name <- new.reduction.name %||% paste0("ref.", reference.reduction) - # checking TransferData parameters td.badargs <- names(x = transferdata.args)[!names(x = transferdata.args) %in% names(x = formals(fun = TransferData))] if (length(x = td.badargs) > 0) { @@ -2044,26 +2046,24 @@ MapQuery <- function( integrateembeddings.args$weight.reduction <- integrateembeddings.args$weight.reduction %||% anchor.reduction slot(object = query, name = "tools")$TransferData <- NULL reuse.weights.matrix <- FALSE - - query <- invoke( - .fn = TransferData, - .args = c(list( - anchorset = anchorset, - reference = reference, - query = query, - refdata = refdata, - store.weights = TRUE, - only.weights = is.null(x = refdata), - verbose = verbose - ), transferdata.args - ) + query <- invoke( + .fn = TransferData, + .args = c(list( + anchorset = anchorset, + reference = reference, + query = query, + refdata = refdata, + store.weights = TRUE, + only.weights = is.null(x = refdata), + verbose = verbose + ), transferdata.args ) - if (inherits(x = transferdata.args$weight.reduction , "character") && - transferdata.args$weight.reduction == integrateembeddings.args$weight.reduction) { - reuse.weights.matrix <- TRUE - } - - if (anchor.reduction != "cca"){ + ) + if (inherits(x = transferdata.args$weight.reduction , "character") && + transferdata.args$weight.reduction == integrateembeddings.args$weight.reduction) { + reuse.weights.matrix <- TRUE + } + if (anchor.reduction != "cca") { query <- invoke( .fn = IntegrateEmbeddings, .args = c(list( @@ -2081,12 +2081,12 @@ MapQuery <- function( slot = 'ref.dims' ) <- slot(object = anchorset, name = "command")$dims } - slot(object = query, name = "tools")$MapQuery <- NULL + slot(object = query, name = "tools")$MapQuery <- NULL if (store.weights) { slot(object = query, name = "tools")$MapQuery <- slot( - object = query, + object = query, name = "tools" - )$TransferData + )$TransferData slot(object = query, name = "tools")$MapQuery$anchor <- slot( object = anchorset, name = "anchors" @@ -2873,9 +2873,13 @@ SelectIntegrationFeatures <- function( #' #' @references Stuart T, Butler A, et al. Comprehensive Integration of #' Single-Cell Data. Cell. 2019;177:1888-1902 \doi{10.1016/j.cell.2019.05.031} +#' #' @importFrom Matrix t +#' #' @export +#' #' @concept integration +#' #' @examples #' \dontrun{ #' # to install the SeuratData package see https://github.com/satijalab/seurat-data @@ -2918,7 +2922,7 @@ TransferData <- function( verbose = TRUE, slot = "data", prediction.assay = FALSE, - only.weights = FALSE, + only.weights = FALSE, store.weights = TRUE ) { combined.ob <- slot(object = anchorset, name = "object.list")[[1]] @@ -2943,7 +2947,7 @@ TransferData <- function( eps = eps, n.trees = n.trees, verbose = verbose, - only.weights = only.weights, + only.weights = only.weights, slot = slot, prediction.assay = prediction.assay, label.transfer = label.transfer @@ -2952,7 +2956,7 @@ TransferData <- function( if (verbose) { message("Running PCA on query dataset") } - + features <- slot(object = anchorset, name = "anchor.features") query.ob <- query query.ob <- ScaleData(object = query.ob, features = features, verbose = FALSE) @@ -3043,7 +3047,7 @@ TransferData <- function( slot(object = query, name = "tools")[["TransferData"]] <- list(weights.matrix = weights) return(query) } - } + } anchors <- as.data.frame(x = anchors) query.cells <- unname(obj = sapply( X = query.cells, @@ -3061,8 +3065,12 @@ TransferData <- function( anchors$id1 <- refdata[[rd]][anchors[, "cell1"]] reference.ids <- factor(x = anchors$id1, levels = unique(x = refdata[[rd]])) possible.ids <- levels(x = reference.ids) - prediction.mat <- matrix(nrow = nrow(x = anchors), ncol = length(x = possible.ids), data = 0) - for(i in 1:length(x = possible.ids)) { + prediction.mat <- matrix( + nrow = nrow(x = anchors), + ncol = length(x = possible.ids), + data = 0 + ) + for (i in 1:length(x = possible.ids)) { prediction.mat[which(reference.ids == possible.ids[i]), i] = 1 } if (verbose) { @@ -3077,19 +3085,19 @@ TransferData <- function( x = dummy_cols( refdata[[rd]][ bridge.weight$bridge.ref_anchor ] )[, -1] - ) + ) colnames(bridge.prediction.matrix) <- gsub( pattern = ".data_", replacement = "", x = colnames(bridge.prediction.matrix) - ) + ) extra.id <- setdiff(possible.ids, colnames(bridge.prediction.matrix)) if (length(extra.id) > 0) { - extra.prediction <- as.sparse( - matrix(data = 0, - nrow = nrow(bridge.prediction.matrix), - ncol = length(extra.id)) - ) + extra.prediction <- as.sparse(x = matrix( + data = 0, + nrow = nrow(bridge.prediction.matrix), + ncol = length(extra.id) + )) colnames(extra.prediction) <- extra.id bridge.prediction.matrix <- cbind( bridge.prediction.matrix, @@ -3098,7 +3106,7 @@ TransferData <- function( } bridge.prediction.matrix <- bridge.prediction.matrix[,possible.ids, drop = FALSE] bridge.prediction.scores <- t(bridge.weight$query.weights) %*% - (t(bridge.weight$bridge.weights) %*% + (t(bridge.weight$bridge.weights) %*% bridge.prediction.matrix)[bridge.weight$query.ref_anchor,] prediction.scores <- (prediction.scores + bridge.prediction.scores)/2 prediction.scores <- as.matrix(x = prediction.scores) @@ -3106,7 +3114,7 @@ TransferData <- function( prediction.ids <- possible.ids[apply(X = prediction.scores, MARGIN = 1, FUN = which.max)] prediction.ids <- as.character(prediction.ids) prediction.max <- apply(X = prediction.scores, MARGIN = 1, FUN = max) - if (is.null(x = query)){ + if (is.null(x = query)) { prediction.scores <- cbind(prediction.scores, max = prediction.max) } predictions <- data.frame( @@ -5245,7 +5253,7 @@ ValidateParams_TransferData <- function( n.trees, verbose, slot, - only.weights, + only.weights, prediction.assay, label.transfer ) { @@ -5253,7 +5261,7 @@ ValidateParams_TransferData <- function( if (is.null(refdata)) { if (!only.weights) { stop("refdata is NULL and only.weights is FALSE") - } + } } else { if (!inherits(x = refdata, what = "list")) { refdata <- list(id = refdata) @@ -5336,10 +5344,10 @@ ValidateParams_TransferData <- function( } ModifyParam(param = "refdata", value = refdata) } - - - - + + + + object.reduction <- Reductions(object = slot(object = anchorset, name = "object.list")[[1]]) valid.weight.reduction <- c("pcaproject", "pca", "cca", "rpca.ref","lsiproject", "lsi", object.reduction) if (!inherits(x = weight.reduction, "DimReduc")) { @@ -5604,22 +5612,22 @@ ValidateParams_IntegrateEmbeddings_TransferAnchors <- function( #' @param nn.object A neighbor class object #' @param col.cells Cells names of the neighbors, cell names in nn.object is used by default #' @param weighted Determine if use distance in the Graph -#' -#' -#' @export -#' @importFrom Matrix sparseMatrix +#' #' @return Returns a Graph object - - - +#' +#' @importFrom Matrix sparseMatrix +#' +#' @export +#' NNtoGraph <- function( - nn.object, - col.cells = NULL, - weighted = FALSE) { + nn.object, + col.cells = NULL, + weighted = FALSE +) { select_nn <- Indices(object = nn.object) col.cells <- col.cells %||% Cells(x = nn.object) ncol.nn <- length(x = col.cells) - k.nn <- ncol(x = select_nn) + k.nn <- ncol(x = select_nn) j <- as.numeric(x = t(x = select_nn)) i <- ((1:length(x = j)) - 1) %/% k.nn + 1 if (weighted) { @@ -5650,7 +5658,6 @@ NNtoGraph <- function( # # # @return Returns an Integration or TranserAnchor set - FindAssayAnchor <- function( object.list, reference = NULL, @@ -5755,16 +5762,16 @@ FindAssayAnchor <- function( #' Use bridge cells to represent single-modality object -#' +#' #' @inheritParams FindBridgeAnchor -#' @param return.all.assays if return all assays in the object.list. +#' @param return.all.assays if return all assays in the object.list. #' Only bridge assay is returned by default. #' #' #' @importFrom MASS ginv #' @return Returns a object list in which each object has a bridge cell derived assay #' @export -#' +#' BridgeCellsRepresentation <- function(object.list, @@ -5772,7 +5779,7 @@ BridgeCellsRepresentation <- function(object.list, object.reduction.list, bridge.reduction.list, laplacian.reduction = NULL, - laplacian.dims = NULL, + laplacian.dims = NULL, bridge.assay.name = "Bridge", return.all.assays = FALSE, l2.norm = TRUE, @@ -5790,7 +5797,7 @@ BridgeCellsRepresentation <- function(object.list, } dims.list <- list() for (i in 1:length(object.reduction.list)) { - ref.dims <- list( + ref.dims <- list( object= Misc(object.list[[i]][[object.reduction.list[[i]]]], slot = 'ref.dims'), bridge = Misc( bridge.object[[bridge.reduction.list[[i]]]], slot = 'ref.dims') ) @@ -5803,15 +5810,15 @@ BridgeCellsRepresentation <- function(object.list, warning('No reference dims found in the dimensional reduction,', ' all dims in the dimensional reduction will be used.') if (all.dims[[1]] == all.dims[[2]]) { - dims.list[[i]] <- all.dims + dims.list[[i]] <- all.dims } else { - stop( 'The number of dimensions in the object.list ', + stop( 'The number of dimensions in the object.list ', object.reduction.list[[i]], ' (', length(all.dims[[1]]), ') ', ' and the number of dimensions in the bridge object ', - bridge.reduction.list[[i]], + bridge.reduction.list[[i]], ' (', length(all.dims[[2]]), ') ', - ' is different.') + ' is different.') } } else { reference.dims.index <- setdiff(c(1:2), projected.dims.index) @@ -5827,14 +5834,14 @@ BridgeCellsRepresentation <- function(object.list, FUN = function(x) { SA.inv <- ginv( X = Embeddings( - object = bridge.object, + object = bridge.object, reduction = bridge.reduction.list[[x]] )[ ,dims.list[[x]]$bridge] ) if (!is.null(laplacian.reduction)) { lap.vector <- Embeddings(bridge.object[[laplacian.reduction]])[,laplacian.dims] X <- Embeddings( - object = object.list[[x]], + object = object.list[[x]], reduction = object.reduction.list[[x]] )[, dims.list[[x]]$object] %*% (SA.inv %*% lap.vector) } else { @@ -5877,7 +5884,7 @@ BridgeCellsRepresentation <- function(object.list, #' Find bridge anchors between two modalities objects #' -#' First, bridge object is used to reconstruct two single-modality profiles and +#' First, bridge object is used to reconstruct two single-modality profiles and #' then project those cells into bridage graph laplacian space. #' Next, find a set of anchors between two single-modality objects. These #' anchors can later be used to integrate embeddings or transfer data from the reference to @@ -5889,7 +5896,7 @@ BridgeCellsRepresentation <- function(object.list, #' \item{ Find anchors between objects. It can be either IntegrationAnchors or TransferAnchor. #' } #' } -#' +#' #' @param object.list A list of Seurat objects between which to #' find anchors for downstream integration. #' @param bridge.object A multimodal bridge seurat which connects two @@ -5920,12 +5927,12 @@ BridgeCellsRepresentation <- function(object.list, #' @param verbose Print messages and progress #' @param ... Additional parameters passed to \code{FindIntegrationAnchors} or #' \code{FindTransferAnchors} -#' -#' +#' +#' #' @return Returns an \code{\link{AnchorSet}} object that can be used as input to #' \code{\link{IntegrateEmbeddings}}.or \code{\link{MapQuery}} #' @export -#' +#' FindBridgeAnchor <- function(object.list, bridge.object, @@ -5933,17 +5940,17 @@ FindBridgeAnchor <- function(object.list, bridge.reduction.list, anchor.type = c("Integration", "Transfer")[1], reference = NULL, - laplacian.reduction = "lap", - laplacian.dims = NULL, - reduction = c("direct", "cca")[1], + laplacian.reduction = "lap", + laplacian.dims = NULL, + reduction = c("direct", "cca")[1], bridge.assay.name = "Bridge", k.anchor = 20, k.score = 50, verbose = TRUE, ... ) { - - + + if (!is.null(laplacian.reduction)) { bridge.method <- "bridge graph" } else { @@ -5955,7 +5962,7 @@ FindBridgeAnchor <- function(object.list, EXPR = bridge.method, "bridge graph" = { message('Transform cells to bridge graph laplacian space') - }, + }, "bridge cells" = { message('Transform cells to bridge cells space') } @@ -5976,7 +5983,7 @@ FindBridgeAnchor <- function(object.list, } else { stored.bridge.weights <- TRUE } - } + } bridge.reduction.name <- paste0(bridge.assay.name, ".reduc") object.list <- BridgeCellsRepresentation( @@ -5991,7 +5998,7 @@ FindBridgeAnchor <- function(object.list, ) # assay to dimensional reduction object.list <- lapply( - X = object.list, + X = object.list, FUN = function(x) { x[[bridge.reduction.name]] <- CreateDimReducObject( embeddings = t(GetAssayData( @@ -6000,7 +6007,7 @@ FindBridgeAnchor <- function(object.list, assay = bridge.assay.name )), key = "L_", - assay = bridge.assay.name + assay = bridge.assay.name ) return(x) } @@ -6018,7 +6025,7 @@ FindBridgeAnchor <- function(object.list, verbose = verbose ) } else if (reduction == "cca") { - anchor <- switch(EXPR = anchor.type, + anchor <- switch(EXPR = anchor.type, "Integration" = { anchor <- FindIntegrationAnchors( object.list = object.list, @@ -6037,11 +6044,11 @@ FindBridgeAnchor <- function(object.list, object.list[[2]][[bridge.reduction.name]] ) anchor - }, + }, "Transfer" = { anchor <- FindTransferAnchors( reference = object.list[[reference]], - query = object.list[[query]], + query = object.list[[query]], reduction = "cca", scale = FALSE, k.filter = NA, @@ -6059,20 +6066,20 @@ FindBridgeAnchor <- function(object.list, )@misc$bridge.sets <- list( bridge.weights = slot(object = bridge.object, name = "tools" - )$MapQuery$weights.matrix, + )$MapQuery$weights.matrix, bridge.ref_anchor = slot(object = bridge.object, name = "tools" - )$MapQuery$anchor[,1], + )$MapQuery$anchor[,1], query.weights = slot(object = object.list[[query]], name = "tools" - )$MapQuery$weights.matrix, + )$MapQuery$weights.matrix, query.ref_anchor = slot(object = object.list[[query]], name = "tools" )$MapQuery$anchor[,1] ) } } - + slot(object = anchor, name = "command") <- LogSeuratCommand( object = object.list[[1]], return.command = TRUE @@ -6109,7 +6116,7 @@ TransferLablesNN <- function( x = dummy_cols( reference.object[[group.by]] )[, -1] - ) + ) colnames(reference.labels.matrix) <- gsub( pattern = paste0(group.by, "_"), replacement = "", @@ -6121,7 +6128,7 @@ TransferLablesNN <- function( prediction.max <- apply(X = query.label.mat, MARGIN = 1, FUN = which.max) query.label <- colnames(x = query.label.mat)[prediction.max] query.label.score <- apply(X = query.label.mat, MARGIN = 1, FUN = max) - + output.list <- list(labels = query.label, scores = query.label.score, prediction.mat = query.label.mat @@ -6135,25 +6142,25 @@ TransferExpressionNN<- function( reference.object, var.name = NULL ){ - - nn.matrix <- NNtoGraph(nn.object = nn.object, + + nn.matrix <- NNtoGraph(nn.object = nn.object, col.cells = Cells(reference.object) ) reference.exp.matrix <- FetchData(object = reference.object, vars = var.name) # remove NA reference.exp.matrix <- reference.exp.matrix[complete.cases(reference.exp.matrix), ,drop= F] nn.matrix <- nn.matrix[, rownames(reference.exp.matrix)] - + # remove NO neighbor query - nn.sum <- RowSumSparse(mat = nn.matrix) + nn.sum <- RowSumSparse(mat = nn.matrix) nn.matrix <- nn.matrix[nn.sum > 2, ] nn.sum <- nn.sum[nn.sum>2] - + # transfer data reference.exp.matrix <- as.matrix(reference.exp.matrix) query.exp.mat <- nn.matrix %*% reference.exp.matrix query.exp.mat <- sweep(x = query.exp.mat, MARGIN = 1, STATS = nn.sum, FUN = "/") - + # set output for all query cells query.exp.all <- data.frame(row.names = Cells(nn.object)) query.exp.all[rownames(query.exp.mat),1] <- query.exp.mat[,1] @@ -6168,7 +6175,7 @@ TransferExpressionNN<- function( #' @concept dimensional_reduction #' @export #' @method RunGraphLaplacian Seurat -#' +#' RunGraphLaplacian.Seurat <- function( object, graph, @@ -6199,7 +6206,7 @@ RunGraphLaplacian.Seurat <- function( #' @concept dimensional_reduction #' @rdname RunGraphLaplacian #' @export -#' +#' #' @importFrom Matrix diag t rowSums #' @importFrom RSpectra eigs_sym RunGraphLaplacian.default <- function(object, @@ -6347,12 +6354,12 @@ JLEmbed <- function(nrow, ncol, eps = 0.1, seed = NA, method = "li") { #' @param nsketch Number of rows in the random sketch matrix #' @param ndims Number of dimensions in the JL embeddings #' @param sampling.method Sampling method for generating random matrix -#' @param MARGIN Margin +#' @param MARGIN Margin #' @param eps error tolerance for JL embeddings #' @param seed Set a random seed #' @param verbose Print message and process #' @param ... -#' +#' #' @importFrom Matrix qrR #' @importFrom SeuratObject as.sparse #' @rdname LeverageScore @@ -6435,12 +6442,12 @@ LeverageScore.default <- function( } else { base::qr.R(qr = qr.sa) } - + # triangular matrix inverse - + R.inv <- as.sparse(backsolve(r = R , x = diag(ncol(R)))) - - + + if (isTRUE(x = verbose)) { message("Random projection") } @@ -6455,12 +6462,12 @@ LeverageScore.default <- function( } #' ssssxxxxx -#' +#' #' @rdname LeverageScore #' @export #' @method LeverageScore Assay -#' -#' +#' +#' LeverageScore.Assay <- function(object, features = NULL, nsketch = 5000L, @@ -6475,28 +6482,28 @@ LeverageScore.Assay <- function(object, ndims <- ndims%||%ncol(x = object) data <- GetAssayData(object, slot = slot)[features,] score <- LeverageScore( - object = data, - features = features, - nsketch = nsketch, - ndims = ndims, + object = data, + features = features, + nsketch = nsketch, + ndims = ndims, sampling.method = sampling.method, - seed = seed, + seed = seed, eps = eps, - verbose = verbose, + verbose = verbose, ... ) return(score) } - - + + #' @inheritParams LeverageScoreSampling #' @param slot The slot used for leverage score calculation. data slot is used by default -#' +#' #' @rdname LeverageScore #' @export #' @method LeverageScore Seurat -#' +#' LeverageScore.Seurat <- function(object, features = NULL, assay = NULL, @@ -6507,14 +6514,14 @@ LeverageScore.Seurat <- function(object, slot = "data", eps = 0.5, seed = 123, - over.write = FALSE, + over.write = FALSE, verbose = TRUE, ... ) { assay <- assay %||% DefaultAssay(object) features <- features %||% VariableFeatures(object = object[[assay]]) ndims <- ndims %||% ncol(x = object) - + if (is.null(features)) { stop("No variable features are set. Please run FindVariableFeatures.") } @@ -6523,20 +6530,20 @@ LeverageScore.Seurat <- function(object, } object[[var.name]] <- LeverageScore( object = GetAssay(object = object, assay = assay), - features = features, - nsketch = nsketch, - ndims = ndims, - sampling.method = sampling.method, - seed = seed, + features = features, + nsketch = nsketch, + ndims = ndims, + sampling.method = sampling.method, + seed = seed, slot = slot, - eps = eps, - verbose = verbose, + eps = eps, + verbose = verbose, ... ) return(object) } -# Check if the var.name already existed in the meta.data +# Check if the var.name already existed in the meta.data # CheckMetaVarName <- function(object, var.name) { if (var.name %in% colnames(x = object[[]])) { @@ -6553,7 +6560,7 @@ CheckMetaVarName <- function(object, var.name) { } -#' Subset objects based on Leverage score +#' Subset objects based on Leverage score #' #' @param object A seurat object #' @param num.cells Number of sampled cells @@ -6563,17 +6570,17 @@ CheckMetaVarName <- function(object, var.name) { #' @param over.write If over write the variable with leverage score #' @param seed Set a random seed.By default, sets the seed to 123 #' @param ... Arguments passed to LeverageScore -#' +#' #' @return Returns a sub-sampled seurat object #' @export -#' +#' LeverageScoreSampling <- function( object, num.cells = 5000, assay = NULL, features = NULL, var.name = "leverage.score", - over.write = FALSE, + over.write = FALSE, seed = 123, ...) { if (!over.write) { @@ -6581,10 +6588,10 @@ LeverageScoreSampling <- function( } object <- LeverageScore( object = object, - assay = assay, - features = features, - var.name = var.name, - over.write = over.write, + assay = assay, + features = features, + var.name = var.name, + over.write = over.write, seed = seed, ... ) @@ -6681,8 +6688,8 @@ IntegrationReferenceIndex <- function(object) { #' @param merged.object A merged seurat object containing all cells #' @param reference.index Index for the integration reference #' @param verbose Print progress and message -#' -#' +#' +#' #' @importFrom MASS ginv #' @importFrom Matrix t #' @export @@ -6697,7 +6704,7 @@ IntegrateSketchEmbeddings <- function(object.list, reduction.key = 'PCcorrect_', dictionary.method = c('sketch', 'data','embeddings')[1], sketch.ratio = 0.8, - sketch.reduction.raw = NULL, + sketch.reduction.raw = NULL, merged.object = NULL, reference.index = NULL, verbose = TRUE) { @@ -6750,7 +6757,7 @@ IntegrateSketchEmbeddings <- function(object.list, } emb.list.query <- my.lapply( X = query.index, - FUN = + FUN = function(q) { q.cells <- Cells(x = sketch.list[[q]]) emb <- switch( @@ -6761,7 +6768,7 @@ IntegrateSketchEmbeddings <- function(object.list, Embeddings(object = sketch.object[[sketch.reduction]])[q.cells ,] emb <- emb.list[[q]] %*% sketch.transform emb - }, + }, 'data' = { exp.mat <- t( x = as.matrix( @@ -6778,11 +6785,11 @@ IntegrateSketchEmbeddings <- function(object.list, x = GetAssayData( object = object.list[[q]], slot = 'data')[features,] - ) %*% + ) %*% sketch.transform ) emb - }, + }, 'sketch' = { R <- t( x = CountSketch( @@ -6794,7 +6801,7 @@ IntegrateSketchEmbeddings <- function(object.list, x = GetAssayData( sketch.object[[assay]], slot = 'data')[features,q.cells] - ) %*% + ) %*% R ) sketch.transform <- ginv(X = exp.mat) %*% @@ -6805,8 +6812,8 @@ IntegrateSketchEmbeddings <- function(object.list, x = GetAssayData( object = object.list[[q]], slot = 'data')[features,] - ) %*% - R) %*% + ) %*% + R) %*% sketch.transform ) emb @@ -6893,10 +6900,10 @@ ProjectDataEmbeddings <- function(object, SparseMeanSd <- function(object, - assay = NULL, - slot = 'data', - features = NULL, - eps = 1e-8 + assay = NULL, + slot = 'data', + features = NULL, + eps = 1e-8 ){ assay <- assay%||% DefaultAssay(object) features <- features %||% rownames(object[[assay]]) @@ -6921,7 +6928,7 @@ SparseMeanSd <- function(object, #' @importFrom rlang exec #' @importFrom irlba irlba # -# +# RunPCA_Sparse <- function( object, features = NULL, @@ -6976,9 +6983,9 @@ SmoothLabels <- function(labels, clusters ) { } - + #' Project query data to reference dimensional reduction -#' +#' #' @param query Query object #' @param reference Reference object #' @param mode Projection mode name for projection @@ -6987,7 +6994,7 @@ SmoothLabels <- function(labels, clusters ) { #' \item{lsiproject: LSI projection} #' } #' @param reference.reduction Name of dimensional reduction in the reference object -#' @param combine Determine if query and reference objects are combined +#' @param combine Determine if query and reference objects are combined #' @param query.assay Assay used for query object #' @param reference.assay Assay used for reference object #' @param features Features used for projection @@ -6996,20 +7003,20 @@ SmoothLabels <- function(labels, clusters ) { #' @param reduction.key dimensional reduction key, the key in reference.reduction #' is used by default #' @param verbose Print progress and message -#' +#' #' @return Returns a query-only or query-reference combined seurat object #' @export ProjectDimReduc <- function(query, - reference, + reference, mode = c('pcaproject', 'lsiproject'), reference.reduction, combine = FALSE, - query.assay = NULL, - reference.assay = NULL, - features = NULL, - do.scale = TRUE, - reduction.name = NULL, - reduction.key= NULL, + query.assay = NULL, + reference.assay = NULL, + features = NULL, + do.scale = TRUE, + reduction.name = NULL, + reduction.key= NULL, verbose = TRUE ) { query.assay <- query.assay %||% DefaultAssay(object = query) @@ -7035,7 +7042,7 @@ ProjectDimReduc <- function(query, mode = "lsi", do.center = FALSE, do.scale = FALSE, - features = features, + features = features, use.original.stats = FALSE, verbose = verbose ) @@ -7045,13 +7052,13 @@ ProjectDimReduc <- function(query, message('PCA projection to ', reference.reduction, ' in SCT assay') } query <- suppressWarnings( - expr = GetResidual(object = query, - assay = query.assay, - features = features, + expr = GetResidual(object = query, + assay = query.assay, + features = features, verbose = FALSE) ) query.mat <- GetAssayData(object = query, slot = 'scale.data')[features,] - + projected.embeddings <- t( crossprod(x = Loadings( object = reference[[reference.reduction]])[features, ], @@ -7082,20 +7089,20 @@ ProjectDimReduc <- function(query, ) if (combine) { query <- DietSeurat(object = query, - dimreducs = reduction.name, - features = features, + dimreducs = reduction.name, + features = features, assays = query.assay ) - reference <- DietSeurat(object = reference, - dimreducs = reference.reduction, - features = features, + reference <- DietSeurat(object = reference, + dimreducs = reference.reduction, + features = features, assays = reference.assay) suppressWarnings( combine.obj <- merge(query, reference, merge.dr = c(reduction.name, reference.reduction) ) ) - Idents(combine.obj) <- c(rep(x = 'query', times = ncol(query)), + Idents(combine.obj) <- c(rep(x = 'query', times = ncol(query)), rep(x = 'reference', times = ncol(reference)) ) return(combine.obj) From d22e54ef3638d0f09ee142773ee405a208490b69 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 16 Feb 2022 16:36:06 -0500 Subject: [PATCH 077/979] Update docs --- man/BridgeCellsRepresentation.Rd | 50 ++++++++++++++++ man/FindBridgeAnchor.Rd | 87 ++++++++++++++++++++++++++++ man/IntegrateSketchEmbeddings.Rd | 70 +++++++++++++++++++++++ man/LeverageScore.Rd | 98 ++++++++++++++++++++++++++++++++ man/LeverageScoreSampling.Rd | 40 +++++++++++++ man/NNtoGraph.Rd | 21 +++++++ man/ProjectDimReduc.Rd | 57 +++++++++++++++++++ man/RunGraphLaplacian.Rd | 49 ++++++++++++++++ 8 files changed, 472 insertions(+) create mode 100644 man/BridgeCellsRepresentation.Rd create mode 100644 man/FindBridgeAnchor.Rd create mode 100644 man/IntegrateSketchEmbeddings.Rd create mode 100644 man/LeverageScore.Rd create mode 100644 man/LeverageScoreSampling.Rd create mode 100644 man/NNtoGraph.Rd create mode 100644 man/ProjectDimReduc.Rd create mode 100644 man/RunGraphLaplacian.Rd diff --git a/man/BridgeCellsRepresentation.Rd b/man/BridgeCellsRepresentation.Rd new file mode 100644 index 000000000..9f66b362a --- /dev/null +++ b/man/BridgeCellsRepresentation.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/integration.R +\name{BridgeCellsRepresentation} +\alias{BridgeCellsRepresentation} +\title{Use bridge cells to represent single-modality object} +\usage{ +BridgeCellsRepresentation( + object.list, + bridge.object, + object.reduction.list, + bridge.reduction.list, + laplacian.reduction = NULL, + laplacian.dims = NULL, + bridge.assay.name = "Bridge", + return.all.assays = FALSE, + l2.norm = TRUE, + do.center = FALSE, + verbose = TRUE +) +} +\arguments{ +\item{object.list}{A list of Seurat objects between which to +find anchors for downstream integration.} + +\item{bridge.object}{A multimodal bridge seurat which connects two +single-modality objects} + +\item{object.reduction.list}{A list of dimensional reductions from object.list used +to be reconstructed by bridge.obejct} + +\item{bridge.reduction.list}{A list of dimensional reductions from bridge.object used +to reconstruct object.reduction.list} + +\item{laplacian.reduction}{Name of bridge graph laplacian dimensional reduction} + +\item{laplacian.dims}{Dimensions used for bridge graph laplacian dimensional reduction} + +\item{bridge.assay.name}{Assay name used for bridge object reconstruction value} + +\item{return.all.assays}{if return all assays in the object.list. +Only bridge assay is returned by default.} + +\item{verbose}{Print messages and progress} +} +\value{ +Returns a object list in which each object has a bridge cell derived assay +} +\description{ +Use bridge cells to represent single-modality object +} diff --git a/man/FindBridgeAnchor.Rd b/man/FindBridgeAnchor.Rd new file mode 100644 index 000000000..f90afdfe7 --- /dev/null +++ b/man/FindBridgeAnchor.Rd @@ -0,0 +1,87 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/integration.R +\name{FindBridgeAnchor} +\alias{FindBridgeAnchor} +\title{Find bridge anchors between two modalities objects} +\usage{ +FindBridgeAnchor( + object.list, + bridge.object, + object.reduction.list, + bridge.reduction.list, + anchor.type = c("Integration", "Transfer")[1], + reference = NULL, + laplacian.reduction = "lap", + laplacian.dims = NULL, + reduction = c("direct", "cca")[1], + bridge.assay.name = "Bridge", + k.anchor = 20, + k.score = 50, + verbose = TRUE, + ... +) +} +\arguments{ +\item{object.list}{A list of Seurat objects between which to +find anchors for downstream integration.} + +\item{bridge.object}{A multimodal bridge seurat which connects two +single-modality objects} + +\item{object.reduction.list}{A list of dimensional reductions from object.list used +to be reconstructed by bridge.obejct} + +\item{bridge.reduction.list}{A list of dimensional reductions from bridge.object used +to reconstruct object.reduction.list} + +\item{anchor.type}{The type of anchors. Can +be one of: +\itemize{ + \item{Integration: Generate IntegrationAnchors for integration} + \item{Transfer: Generate TransferAnchors for transfering data} +}} + +\item{reference}{A vector specifying the object/s to be used as a reference +during integration or transfer data.} + +\item{laplacian.reduction}{Name of bridge graph laplacian dimensional reduction} + +\item{laplacian.dims}{Dimensions used for bridge graph laplacian dimensional reduction} + +\item{reduction}{Dimensional reduction to perform when finding anchors. Can +be one of: +\itemize{ + \item{cca: Canonical correlation analysis} + \item{direct: Use assay data as a dimensional reduction} +}} + +\item{bridge.assay.name}{Assay name used for bridge object reconstruction value} + +\item{k.anchor}{How many neighbors (k) to use when picking anchors} + +\item{k.score}{How many neighbors (k) to use when scoring anchors} + +\item{verbose}{Print messages and progress} + +\item{...}{Additional parameters passed to \code{FindIntegrationAnchors} or +\code{FindTransferAnchors}} +} +\value{ +Returns an \code{\link{AnchorSet}} object that can be used as input to +\code{\link{IntegrateEmbeddings}}.or \code{\link{MapQuery}} +} +\description{ +First, bridge object is used to reconstruct two single-modality profiles and +then project those cells into bridage graph laplacian space. +Next, find a set of anchors between two single-modality objects. These +anchors can later be used to integrate embeddings or transfer data from the reference to +query object using the \code{\link{MapQuery}} object. +} +\details{ +\itemize{ + \item{ Bridge cells reconstruction + } + \item{ Find anchors between objects. It can be either IntegrationAnchors or TransferAnchor. + } +} +} diff --git a/man/IntegrateSketchEmbeddings.Rd b/man/IntegrateSketchEmbeddings.Rd new file mode 100644 index 000000000..ca6a74a10 --- /dev/null +++ b/man/IntegrateSketchEmbeddings.Rd @@ -0,0 +1,70 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/integration.R +\name{IntegrateSketchEmbeddings} +\alias{IntegrateSketchEmbeddings} +\title{Integrate embeddings from batch-corrected sketch cell embeddings +First construct a sketch-cell representation for all cells and +then use this and the batch-corrected embeddings of sketched cells to +construct the batch-corrected embeddings for all cells} +\usage{ +IntegrateSketchEmbeddings( + object.list, + sketch.list, + sketch.object, + features = NULL, + assay = "RNA", + sketch.reduction = "integrated_dr", + reduction.name = "pca.correct", + reduction.key = "PCcorrect_", + dictionary.method = c("sketch", "data", "embeddings")[1], + sketch.ratio = 0.8, + sketch.reduction.raw = NULL, + merged.object = NULL, + reference.index = NULL, + verbose = TRUE +) +} +\arguments{ +\item{object.list}{A list of Seurat objects with all cells} + +\item{sketch.list}{A list of Seurat objects with sketched cells} + +\item{sketch.object}{A sketched Seurat objects with integrated embeddings} + +\item{features}{Features used for sketch integration} + +\item{assay}{Assay name for raw expression} + +\item{sketch.reduction}{Dimensional reduction name for batch-corrected embeddings +in the sketched object} + +\item{reduction.name}{dimensional reduction name, pca.correct by default} + +\item{reduction.key}{dimensional reduction key, specifies the string before +the number for the dimension names. PCcorrect_ by default} + +\item{dictionary.method}{Methods to construct sketch-cell representation +for all cells. sketch by default. Can be one of: +\itemize{ +\item{sketch: Use random sketched data slot} +\item{data: Use data slot} +\item{embeddings: Use uncorrected dimensional reduction in the sketched object} +}} + +\item{sketch.ratio}{Sketch ratio of data slot when dictionary.method is set to sketch} + +\item{sketch.reduction.raw}{Uncorrected dimensional reduction name in the sketched object +when dictionary.method is set to embeddings} + +\item{merged.object}{A merged seurat object containing all cells} + +\item{reference.index}{Index for the integration reference} + +\item{verbose}{Print progress and message} +} +\description{ +Integrate embeddings from batch-corrected sketch cell embeddings +First construct a sketch-cell representation for all cells and +then use this and the batch-corrected embeddings of sketched cells to +construct the batch-corrected embeddings for all cells +} diff --git a/man/LeverageScore.Rd b/man/LeverageScore.Rd new file mode 100644 index 000000000..bf9856cf2 --- /dev/null +++ b/man/LeverageScore.Rd @@ -0,0 +1,98 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generics.R, R/integration.R +\name{LeverageScore} +\alias{LeverageScore} +\alias{LeverageScore.default} +\alias{LeverageScore.Assay} +\alias{LeverageScore.Seurat} +\title{Calculate Leverage score for all cells} +\usage{ +LeverageScore(object, ...) + +\method{LeverageScore}{default}( + object, + features = NULL, + nsketch = 5000L, + ndims = NULL, + sampling.method = c("CountSketch", "Gaussian"), + MARGIN = 2L, + eps = 0.5, + seed = 123, + verbose = TRUE, + ... +) + +\method{LeverageScore}{Assay}( + object, + features = NULL, + nsketch = 5000L, + ndims = NULL, + sampling.method = c("CountSketch", "Gaussian")[1], + slot = "data", + seed = 123, + eps = 0.5, + verbose = TRUE, + ... +) + +\method{LeverageScore}{Seurat}( + object, + features = NULL, + assay = NULL, + nsketch = 5000L, + ndims = NULL, + var.name = "leverage.score", + sampling.method = c("CountSketch", "Gaussian")[1], + slot = "data", + eps = 0.5, + seed = 123, + over.write = FALSE, + verbose = TRUE, + ... +) +} +\arguments{ +\item{object}{An object} + +\item{...}{} + +\item{features}{Features used to calculate leverage score} + +\item{nsketch}{Number of rows in the random sketch matrix} + +\item{ndims}{Number of dimensions in the JL embeddings} + +\item{sampling.method}{Sampling method for generating random matrix} + +\item{MARGIN}{Margin} + +\item{eps}{error tolerance for JL embeddings} + +\item{seed}{Set a random seed} + +\item{verbose}{Print message and process} + +\item{slot}{The slot used for leverage score calculation. data slot is used by default} + +\item{assay}{Assay used to calculate leverage score} + +\item{var.name}{Variable name stored leverage score in the meta.data} + +\item{over.write}{If over write the variable with leverage score} +} +\value{ +Returns a seurat object with additional column storing leverage score +} +\description{ +Leverage score can be used to sample representative cells from scRNA data. +The more abundant population will be assigned less leverage score. +Leverage-score can guarantee that both abundant and rare populations will +be sampled. We used variable features in the data slot to calculate leverage +score for all cells. +} +\references{ +Clarkson KL, Woodruff DP. +Low Rank Approximation and Regression in Input Sparsity Time. +Journal of the ACM (JACM). 2017 Jan 30;63(6):1-45. +\url{https://https://arxiv.org/abs/1207.6365}; +} diff --git a/man/LeverageScoreSampling.Rd b/man/LeverageScoreSampling.Rd new file mode 100644 index 000000000..fdcd7ef9a --- /dev/null +++ b/man/LeverageScoreSampling.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/integration.R +\name{LeverageScoreSampling} +\alias{LeverageScoreSampling} +\title{Subset objects based on Leverage score} +\usage{ +LeverageScoreSampling( + object, + num.cells = 5000, + assay = NULL, + features = NULL, + var.name = "leverage.score", + over.write = FALSE, + seed = 123, + ... +) +} +\arguments{ +\item{object}{A seurat object} + +\item{num.cells}{Number of sampled cells} + +\item{assay}{Assay used to calculate leverage score} + +\item{features}{Features used to calculate leverage score} + +\item{var.name}{Variable name stored leverage score in the meta.data} + +\item{over.write}{If over write the variable with leverage score} + +\item{seed}{Set a random seed.By default, sets the seed to 123} + +\item{...}{Arguments passed to LeverageScore} +} +\value{ +Returns a sub-sampled seurat object +} +\description{ +Subset objects based on Leverage score +} diff --git a/man/NNtoGraph.Rd b/man/NNtoGraph.Rd new file mode 100644 index 000000000..d4711c280 --- /dev/null +++ b/man/NNtoGraph.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/integration.R +\name{NNtoGraph} +\alias{NNtoGraph} +\title{Convert Neighbor class to an asymmetrical Graph class} +\usage{ +NNtoGraph(nn.object, col.cells = NULL, weighted = FALSE) +} +\arguments{ +\item{nn.object}{A neighbor class object} + +\item{col.cells}{Cells names of the neighbors, cell names in nn.object is used by default} + +\item{weighted}{Determine if use distance in the Graph} +} +\value{ +Returns a Graph object +} +\description{ +Convert Neighbor class to an asymmetrical Graph class +} diff --git a/man/ProjectDimReduc.Rd b/man/ProjectDimReduc.Rd new file mode 100644 index 000000000..cf15812ba --- /dev/null +++ b/man/ProjectDimReduc.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/integration.R +\name{ProjectDimReduc} +\alias{ProjectDimReduc} +\title{Project query data to reference dimensional reduction} +\usage{ +ProjectDimReduc( + query, + reference, + mode = c("pcaproject", "lsiproject"), + reference.reduction, + combine = FALSE, + query.assay = NULL, + reference.assay = NULL, + features = NULL, + do.scale = TRUE, + reduction.name = NULL, + reduction.key = NULL, + verbose = TRUE +) +} +\arguments{ +\item{query}{Query object} + +\item{reference}{Reference object} + +\item{mode}{Projection mode name for projection + \itemize{ +\item{pcaproject: PCA projection} +\item{lsiproject: LSI projection} +}} + +\item{reference.reduction}{Name of dimensional reduction in the reference object} + +\item{combine}{Determine if query and reference objects are combined} + +\item{query.assay}{Assay used for query object} + +\item{reference.assay}{Assay used for reference object} + +\item{features}{Features used for projection} + +\item{do.scale}{Determine if scale expression matrix in the pcaproject mode} + +\item{reduction.name}{dimensional reduction name, reference.reduction is used by default} + +\item{reduction.key}{dimensional reduction key, the key in reference.reduction +is used by default} + +\item{verbose}{Print progress and message} +} +\value{ +Returns a query-only or query-reference combined seurat object +} +\description{ +Project query data to reference dimensional reduction +} diff --git a/man/RunGraphLaplacian.Rd b/man/RunGraphLaplacian.Rd new file mode 100644 index 000000000..595f58d26 --- /dev/null +++ b/man/RunGraphLaplacian.Rd @@ -0,0 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generics.R, R/integration.R +\name{RunGraphLaplacian} +\alias{RunGraphLaplacian} +\alias{RunGraphLaplacian.Seurat} +\alias{RunGraphLaplacian.default} +\title{Run Graph Laplacian Eigendecomposition} +\usage{ +RunGraphLaplacian(object, ...) + +\method{RunGraphLaplacian}{Seurat}( + object, + graph, + reduction.name = "lap", + reduction.key = "LAP_", + n = 50, + verbose = TRUE, + ... +) + +\method{RunGraphLaplacian}{default}(object, n = 50, reduction.key = "LAP_", verbose = TRUE, ...) +} +\arguments{ +\item{object}{A Seurat object} + +\item{...}{Arguments passed to +\code{\link[RSpectra:eigs_sym]{RSpectra::eigs_sym}}} + +\item{graph}{The name of graph} + +\item{reduction.name}{dimensional reduction name, lap by default} + +\item{reduction.key}{dimensional reduction key, specifies the string before +the number for the dimension names. LAP by default} + +\item{n}{Total Number of Eigenvectors to compute and store (50 by default)} + +\item{verbose}{Print message and process} +} +\value{ +Returns Seurat object with the Graph laplacian eigenvector +calculation stored in the reductions slot +} +\description{ +Run a graph laplacian dimensionality reduction. It is used as a low +dimensional representation for a cell-cell graph. The input graph +should be symmetric +} +\concept{dimensional_reduction} From ca7098d05e500d6a5dc3da42036c851a843907bc Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 18 Feb 2022 06:57:38 -0500 Subject: [PATCH 078/979] update dims --- R/integration.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/integration.R b/R/integration.R index 173a2a2e3..7f854955d 100644 --- a/R/integration.R +++ b/R/integration.R @@ -3004,7 +3004,7 @@ TransferData <- function( } weight.reduction <- combined.ob[[weight.reduction]] } - dims <- dims %||% (1:ncol(weight.reduction)) + dims <- dims %||% seq_len(length.out = ncol(x = weight.reduction)) if (max(dims) > ncol(x = weight.reduction)) { stop("dims is larger than the number of available dimensions in ", "weight.reduction (", ncol(x = weight.reduction), ").", call. = FALSE) From 5b2896c5570afc6ad3029c1bfc84fcf3a637a2b5 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Tue, 22 Feb 2022 13:51:46 -0500 Subject: [PATCH 079/979] fix formatting --- vignettes/integration_rpca.Rmd | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vignettes/integration_rpca.Rmd b/vignettes/integration_rpca.Rmd index e0f6c96e2..525627602 100644 --- a/vignettes/integration_rpca.Rmd +++ b/vignettes/integration_rpca.Rmd @@ -34,10 +34,11 @@ In this vignette, we present a slightly modified workflow for the integration of By identifying shared sources of variation between datasets, CCA is well-suited for identifying anchors when cell types are conserved, but there are very substantial differences in gene expression across experiments. CCA-based integration therefore enables integrative analysis when experimental conditions or disease states introduce very strong expression shifts, or when integrating datasets across modalities and species. However, CCA-based integration may also lead to overcorrection, especially when a large proportion of cells are non-overlapping across datasets. -RPCA-based integration runs significantly faster, and also represents a more conservative approach where cells in different biological states are less likely to 'align' after integration. We therefore,recommend RPCA during integrative analysis where: +RPCA-based integration runs significantly faster, and also represents a more conservative approach where cells in different biological states are less likely to 'align' after integration. We therefore recommend RPCA during integrative analysis where: + * A substantial fraction of cells in one dataset have no matching type in the other * Datasets originate from the same platform (i.e. multiple lanes of 10x genomics) -* There are a large number of datasets or cells to integrate (see INSERT LINK for more tips on integrating large datasets) +* There are a large number of datasets or cells to integrate (see [here](https://satijalab.org/seurat/articles/integration_large_datasets.html) for more tips on integrating large datasets) Below, we demonstrate the use of reciprocal PCA to align the same stimulated and resting datasets first analyzed in our [introduction to scRNA-seq integration](integration_introduction.html) vignette. While the list of commands is nearly identical, this workflow requires users to run principal components analysis (PCA) individually on each dataset prior to integration. Users should also set the 'reduction' argument to 'rpca', when running `FindIntegrationAnchors()`. From b77a57275996e84a3e13eb5e2e40c3aac931fb63 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 24 Feb 2022 18:43:07 -0500 Subject: [PATCH 080/979] single object BridgeRep --- R/integration.R | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/R/integration.R b/R/integration.R index 7f854955d..c9a769617 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5795,6 +5795,13 @@ BridgeCellsRepresentation <- function(object.list, if (verbose) { message("Constructing Bridge-cells representation") } + single.object = FALSE + if (length(x = object.list) == 1 & + inherits(x = object.list, what = 'Seurat') + ) { + object.list <- list(object.list) + single.object = TRUE + } dims.list <- list() for (i in 1:length(object.reduction.list)) { ref.dims <- list( @@ -5879,6 +5886,9 @@ BridgeCellsRepresentation <- function(object.list, } ) } + if (single.object) { + object.list <- object.list[[1]] + } return(object.list) } From 9a143c2606e65d89d364fd923fb4e81e7aa462b8 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 24 Feb 2022 22:11:59 -0500 Subject: [PATCH 081/979] add two bridge integration wrapper functions --- R/integration.R | 224 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 194 insertions(+), 30 deletions(-) diff --git a/R/integration.R b/R/integration.R index c9a769617..7d39ef8c3 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5670,7 +5670,7 @@ FindAssayAnchor <- function( verbose = TRUE ) { reduction.name <- reduction %||% paste0(assay, ".reduc") - if (!reduction %in% Reductions(object.list[[1]])) { + if ( is.null(x = reduction) || !reduction %in% Reductions(object.list[[1]])) { object.list <- lapply(object.list, function(x) { if (is.null(reduction)) { x[[reduction.name]] <- CreateDimReducObject( @@ -5941,7 +5941,6 @@ BridgeCellsRepresentation <- function(object.list, #' #' @return Returns an \code{\link{AnchorSet}} object that can be used as input to #' \code{\link{IntegrateEmbeddings}}.or \code{\link{MapQuery}} -#' @export #' FindBridgeAnchor <- function(object.list, @@ -5954,13 +5953,13 @@ FindBridgeAnchor <- function(object.list, laplacian.dims = NULL, reduction = c("direct", "cca")[1], bridge.assay.name = "Bridge", + reference.bridge.stored = FALSE, k.anchor = 20, k.score = 50, verbose = TRUE, ... ) { - if (!is.null(laplacian.reduction)) { bridge.method <- "bridge graph" } else { @@ -5996,39 +5995,36 @@ FindBridgeAnchor <- function(object.list, } bridge.reduction.name <- paste0(bridge.assay.name, ".reduc") - object.list <- BridgeCellsRepresentation( - object.list = object.list , - bridge.object = bridge.object, - object.reduction.list = object.reduction.list, - bridge.reduction.list = bridge.reduction.list, - bridge.assay.name = bridge.assay.name, - laplacian.reduction = laplacian.reduction, - laplacian.dims = laplacian.dims, - verbose = verbose - ) - # assay to dimensional reduction - object.list <- lapply( - X = object.list, - FUN = function(x) { - x[[bridge.reduction.name]] <- CreateDimReducObject( - embeddings = t(GetAssayData( - object = x, - slot = "data", - assay = bridge.assay.name - )), - key = "L_", - assay = bridge.assay.name - ) - return(x) - } - ) + if (reference.bridge.stored) { + object.list[[query]] <- BridgeCellsRepresentation( + object.list = object.list[[query]] , + bridge.object = bridge.object, + object.reduction.list = object.reduction.list[[query]] , + bridge.reduction.list = bridge.reduction.list[[query]] , + bridge.assay.name = bridge.assay.name, + laplacian.reduction = laplacian.reduction, + laplacian.dims = laplacian.dims, + verbose = verbose + ) + } else { + object.list <- BridgeCellsRepresentation( + object.list = object.list , + bridge.object = bridge.object, + object.reduction.list = object.reduction.list, + bridge.reduction.list = bridge.reduction.list, + bridge.assay.name = bridge.assay.name, + laplacian.reduction = laplacian.reduction, + laplacian.dims = laplacian.dims, + verbose = verbose + ) + } + if (reduction == "direct") { anchor <- FindAssayAnchor( object.list = object.list , reference = reference, slot = "data", anchor.type = anchor.type, - reduction = bridge.reduction.name, assay = bridge.assay.name, k.anchor = k.anchor, k.score = k.score, @@ -7120,3 +7116,171 @@ ProjectDimReduc <- function(query, return(query) } } + + + + +PrepareBridgeReference <- function ( + reference, + bridge, + reference.reduction = 'spca', + reference.dims = 1:50, + normlization.method = c('SCT', 'LogNormalization'), + reference.assay = NULL, + bridge.ref.assay = 'RNA', + bridge.query.assay = 'ATAC', + bridge.query.features = NULL, + supervised.reduction = c(NULL, 'slsi', 'spca' )[1], + bridge.query.reduction = NULL, + laplacian.reduction.name = 'lap', + laplacian.reduction.key = 'lap_', + laplacian.reduction.dims = 1:50, + verbose = TRUE +) { + ## checking + if (!is.null(x = bridge.query.reduction) & !is.null(x = supervised.reduction)) { + stop('bridge.query.reduction and supervised.reduction can only set one') + } + if (is.null(x = bridge.query.reduction) & is.null(x = supervised.reduction)) { + stop('Both bridge.query.reduction and supervised.reduction are NULL. One of them needs to be set') + } + bridge.query.features <- bridge.query.features %||% + VariableFeatures(object = bridge[[bridge.query.assay]]) + if (length(x = bridge.query.features == 0)) { + stop('bridge object', bridge.query.assay, + ' has no variable genes and bridge.query.features has no input') + } + + # modality harmonization + reference.assay <- reference.assay %||% DefaultAssay(reference) + DefaultAssay(reference) <- reference.assay + DefaultAssay(bridge) <- bridge.ref.assay + ref.anchor <- FindTransferAnchors( + reference = reference, + reference.reduction = reference.reduction, + normalization.method = normlization.method, + dims = reference.dims, + query = bridge, + recompute.residuals = TRUE, + features = rownames(reference[[reference.reduction]]@feature.loadings), + k.filter = NA, + verbose = verbose + ) + bridge <- MapQuery(anchorset = ref.anchor, + reference = reference, + query = bridge, + store.weights = TRUE, + verbose = verbose + ) + + bridge.ref.reduction <- paste0('ref.', reference.reduction) + bridge <- FindNeighbors(object = bridge, + reduction = bridge.ref.reduction, + dims = 1:ncol(bridge[[bridge.ref.reduction]]), + return.neighbor = FALSE, + graph.name = c('bridge.ref.nn', 'bridge.ref.snn'), + prune.SNN = 0) + bridge <- RunGraphLaplacian(object = bridge, + graph = "bridge.ref.snn", + reduction.name = laplacian.reduction.name, + reduction.key = laplacian.reduction.key, + verbose = verbose) + DefaultAssay(object = bridge) <- bridge.query.assay + if (!is.null(supervised.reduction)) { + bridge <- switch(EXPR = supervised.reduction, + 'slsi' = { + bridge.reduc <- RunSLSI(object = bridge, + features = VariableFeatures(bridge), + graph = 'bridge.ref.nn', + assay = bridge.query.assay + ) + bridge.reduc + }, + 'spca' = { + bridge.reduc <- RunSPCA(object = bridge, + features = VariableFeatures(bridge), + graph = 'bridge.ref.snn', + assay = bridge.query.assay + ) + bridge.reduc + } + ) + } + + # bridge representation + reference.bridge <- BridgeCellsRepresentation(object.list = reference, + bridge.object = bridge, + object.reduction.list = c(reference.reduction), + bridge.reduction.list = c(bridge.ref.reduction), + laplacian.reduction = laplacian.reduction.name, + laplacian.dims = laplacian.reduction.dims + ) + + param.list <- list( + reference.reduction = reference.reduction, + reference.dims = reference.dims, + reference.assay = reference.assay, + bridge.ref.assay = bridge.ref.assay, + bridge.query.assay = bridge.query.assay, + supervised.reduction = supervised.reduction, + bridge.ref.reduction = bridge.ref.reduction, + bridge.query.reduction = bridge.query.reduction, + laplacian.reduction.name = laplacian.reduction.name, + laplacian.reduction.dims = laplacian.reduction.dims + ) + + output.list <- list(bridge = bridge, + reference = reference.bridge, + params.list = param.list + ) + return(output.list) +} + +FindBridgeTransferAnchors <- function( + BridgeReference, + query, + query.assay = NULL, + dims, + reduction = c('lsiproject', 'pcaproject')[1] +){ + + query.assay <- query.assay %||% DefaultAssay(query) + DefaultAssay(query) <- query.assay + + + bridge.query.assay <- BridgeReference$params.list$bridge.query.assay + bridge.query.reduction <- BridgeReference$params.list$bridge.query.reduction %||% + BridgeReference$params.list$supervised.reduction + + reference.reduction <- BridgeReference$params.list$reference.reduction + bridge.ref.reduction <- BridgeReference$params.list$bridge.ref.reduction + + DefaultAssay(BridgeReference$bridge) <- bridge.query.assay + + if ( reduction == "lsiproject") { + + query.anchor <- FindTransferAnchors( reference = BridgeReference$bridge, + reference.reduction = bridge.query.reduction, + dims = dims, + query = query, + reduction = reduction, + scale = FALSE, + features = rownames(BridgeReference$bridge[[bridge.query.reduction]]@feature.loadings ), + k.filter = NA) + query <- MapQuery(anchorset = query.anchor, + reference = BridgeReference$bridge, + query = query, + store.weights = TRUE + ) + } + + + bridge_anchor <- FindBridgeAnchor(object.list = list(BridgeReference$reference, query), + bridge.object = BridgeReference$bridge, + object.reduction.list = list(reference.reduction, paste0('ref.', bridge.query.reduction)), + bridge.reduction.list = list(bridge.ref.reduction, bridge.query.reduction), + anchor.type = "Transfer", + reference.bridge.stored = TRUE + ) + return(bridge_anchor) +} From e5bd262ff3193c5fd6d0a40f74b67185bbe460ed Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 24 Feb 2022 22:17:43 -0500 Subject: [PATCH 082/979] change reduction.list to reduction --- R/integration.R | 103 +++++++++++++++++++++++------------------------- 1 file changed, 49 insertions(+), 54 deletions(-) diff --git a/R/integration.R b/R/integration.R index 7d39ef8c3..0f4ed9b1b 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5776,8 +5776,8 @@ FindAssayAnchor <- function( BridgeCellsRepresentation <- function(object.list, bridge.object, - object.reduction.list, - bridge.reduction.list, + object.reduction, + bridge.reduction, laplacian.reduction = NULL, laplacian.dims = NULL, bridge.assay.name = "Bridge", @@ -5803,14 +5803,14 @@ BridgeCellsRepresentation <- function(object.list, single.object = TRUE } dims.list <- list() - for (i in 1:length(object.reduction.list)) { + for (i in 1:length(object.reduction)) { ref.dims <- list( - object= Misc(object.list[[i]][[object.reduction.list[[i]]]], slot = 'ref.dims'), - bridge = Misc( bridge.object[[bridge.reduction.list[[i]]]], slot = 'ref.dims') + object= Misc(object.list[[i]][[object.reduction[[i]]]], slot = 'ref.dims'), + bridge = Misc( bridge.object[[bridge.reduction[[i]]]], slot = 'ref.dims') ) all.dims <- list( - object = 1:ncol(object.list[[i]][[object.reduction.list[[i]]]]), - bridge = 1:ncol( bridge.object[[bridge.reduction.list[[i]] ]]) + object = 1:ncol(object.list[[i]][[object.reduction[[i]]]]), + bridge = 1:ncol( bridge.object[[bridge.reduction[[i]] ]]) ) projected.dims.index <- which(sapply(ref.dims, function(x) !is.null(x))) if (length(projected.dims.index) == 0) { @@ -5820,10 +5820,10 @@ BridgeCellsRepresentation <- function(object.list, dims.list[[i]] <- all.dims } else { stop( 'The number of dimensions in the object.list ', - object.reduction.list[[i]], + object.reduction[[i]], ' (', length(all.dims[[1]]), ') ', ' and the number of dimensions in the bridge object ', - bridge.reduction.list[[i]], + bridge.reduction[[i]], ' (', length(all.dims[[2]]), ') ', ' is different.') } @@ -5842,19 +5842,19 @@ BridgeCellsRepresentation <- function(object.list, SA.inv <- ginv( X = Embeddings( object = bridge.object, - reduction = bridge.reduction.list[[x]] + reduction = bridge.reduction[[x]] )[ ,dims.list[[x]]$bridge] ) if (!is.null(laplacian.reduction)) { lap.vector <- Embeddings(bridge.object[[laplacian.reduction]])[,laplacian.dims] X <- Embeddings( object = object.list[[x]], - reduction = object.reduction.list[[x]] + reduction = object.reduction[[x]] )[, dims.list[[x]]$object] %*% (SA.inv %*% lap.vector) } else { X <- Embeddings( object = object.list[[x]], - reduction = object.reduction.list[[x]] + reduction = object.reduction[[x]] )[, dims.list[[x]]$object] %*% SA.inv colnames(X) <- Cells(bridge.object) } @@ -5911,10 +5911,10 @@ BridgeCellsRepresentation <- function(object.list, #' find anchors for downstream integration. #' @param bridge.object A multimodal bridge seurat which connects two #' single-modality objects -#' @param object.reduction.list A list of dimensional reductions from object.list used +#' @param object.reduction A list of dimensional reductions from object.list used #' to be reconstructed by bridge.obejct -#' @param bridge.reduction.list A list of dimensional reductions from bridge.object used -#' to reconstruct object.reduction.list +#' @param bridge.reduction A list of dimensional reductions from bridge.object used +#' to reconstruct object.reduction #' @param anchor.type The type of anchors. Can #' be one of: #' \itemize{ @@ -5945,8 +5945,8 @@ BridgeCellsRepresentation <- function(object.list, FindBridgeAnchor <- function(object.list, bridge.object, - object.reduction.list, - bridge.reduction.list, + object.reduction, + bridge.reduction, anchor.type = c("Integration", "Transfer")[1], reference = NULL, laplacian.reduction = "lap", @@ -5999,8 +5999,8 @@ FindBridgeAnchor <- function(object.list, object.list[[query]] <- BridgeCellsRepresentation( object.list = object.list[[query]] , bridge.object = bridge.object, - object.reduction.list = object.reduction.list[[query]] , - bridge.reduction.list = bridge.reduction.list[[query]] , + object.reduction = object.reduction[[query]] , + bridge.reduction = bridge.reduction[[query]] , bridge.assay.name = bridge.assay.name, laplacian.reduction = laplacian.reduction, laplacian.dims = laplacian.dims, @@ -6010,8 +6010,8 @@ FindBridgeAnchor <- function(object.list, object.list <- BridgeCellsRepresentation( object.list = object.list , bridge.object = bridge.object, - object.reduction.list = object.reduction.list, - bridge.reduction.list = bridge.reduction.list, + object.reduction = object.reduction, + bridge.reduction = bridge.reduction, bridge.assay.name = bridge.assay.name, laplacian.reduction = laplacian.reduction, laplacian.dims = laplacian.dims, @@ -7129,9 +7129,9 @@ PrepareBridgeReference <- function ( reference.assay = NULL, bridge.ref.assay = 'RNA', bridge.query.assay = 'ATAC', - bridge.query.features = NULL, supervised.reduction = c(NULL, 'slsi', 'spca' )[1], bridge.query.reduction = NULL, + bridge.query.features = NULL, laplacian.reduction.name = 'lap', laplacian.reduction.key = 'lap_', laplacian.reduction.dims = 1:50, @@ -7150,7 +7150,6 @@ PrepareBridgeReference <- function ( stop('bridge object', bridge.query.assay, ' has no variable genes and bridge.query.features has no input') } - # modality harmonization reference.assay <- reference.assay %||% DefaultAssay(reference) DefaultAssay(reference) <- reference.assay @@ -7172,7 +7171,6 @@ PrepareBridgeReference <- function ( store.weights = TRUE, verbose = verbose ) - bridge.ref.reduction <- paste0('ref.', reference.reduction) bridge <- FindNeighbors(object = bridge, reduction = bridge.ref.reduction, @@ -7206,16 +7204,15 @@ PrepareBridgeReference <- function ( } ) } - # bridge representation - reference.bridge <- BridgeCellsRepresentation(object.list = reference, - bridge.object = bridge, - object.reduction.list = c(reference.reduction), - bridge.reduction.list = c(bridge.ref.reduction), - laplacian.reduction = laplacian.reduction.name, - laplacian.dims = laplacian.reduction.dims + reference.bridge <- BridgeCellsRepresentation( + object.list = reference, + bridge.object = bridge, + object.reduction = c(reference.reduction), + bridge.reduction = c(bridge.ref.reduction), + laplacian.reduction = laplacian.reduction.name, + laplacian.dims = laplacian.reduction.dims ) - param.list <- list( reference.reduction = reference.reduction, reference.dims = reference.dims, @@ -7243,44 +7240,42 @@ FindBridgeTransferAnchors <- function( dims, reduction = c('lsiproject', 'pcaproject')[1] ){ - query.assay <- query.assay %||% DefaultAssay(query) DefaultAssay(query) <- query.assay - - bridge.query.assay <- BridgeReference$params.list$bridge.query.assay bridge.query.reduction <- BridgeReference$params.list$bridge.query.reduction %||% - BridgeReference$params.list$supervised.reduction + BridgeReference$params.list$supervised.reduction reference.reduction <- BridgeReference$params.list$reference.reduction bridge.ref.reduction <- BridgeReference$params.list$bridge.ref.reduction DefaultAssay(BridgeReference$bridge) <- bridge.query.assay - if ( reduction == "lsiproject") { - - query.anchor <- FindTransferAnchors( reference = BridgeReference$bridge, - reference.reduction = bridge.query.reduction, - dims = dims, - query = query, - reduction = reduction, - scale = FALSE, - features = rownames(BridgeReference$bridge[[bridge.query.reduction]]@feature.loadings ), - k.filter = NA) + if (reduction == "lsiproject") { + + query.anchor <- FindTransferAnchors( + reference = BridgeReference$bridge, + reference.reduction = bridge.query.reduction, + dims = dims, + query = query, + reduction = reduction, + scale = FALSE, + features = rownames(BridgeReference$bridge[[bridge.query.reduction]]@feature.loadings ), + k.filter = NA + ) query <- MapQuery(anchorset = query.anchor, reference = BridgeReference$bridge, query = query, store.weights = TRUE ) } - - - bridge_anchor <- FindBridgeAnchor(object.list = list(BridgeReference$reference, query), - bridge.object = BridgeReference$bridge, - object.reduction.list = list(reference.reduction, paste0('ref.', bridge.query.reduction)), - bridge.reduction.list = list(bridge.ref.reduction, bridge.query.reduction), - anchor.type = "Transfer", - reference.bridge.stored = TRUE + bridge_anchor <- FindBridgeAnchor( + object.list = list(BridgeReference$reference, query), + bridge.object = BridgeReference$bridge, + object.reduction = c(reference.reduction, paste0('ref.', bridge.query.reduction)), + bridge.reduction = c(bridge.ref.reduction, bridge.query.reduction), + anchor.type = "Transfer", + reference.bridge.stored = TRUE ) return(bridge_anchor) } From fe2339377046ed3cb6053fd5e81a6c322473c8a5 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 24 Feb 2022 23:04:34 -0500 Subject: [PATCH 083/979] define BridgeReferenceSet --- R/integration.R | 48 ++++++++++++++++++++++++++---------------------- R/objects.R | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 22 deletions(-) diff --git a/R/integration.R b/R/integration.R index 0f4ed9b1b..b1015f6da 100644 --- a/R/integration.R +++ b/R/integration.R @@ -7146,9 +7146,9 @@ PrepareBridgeReference <- function ( } bridge.query.features <- bridge.query.features %||% VariableFeatures(object = bridge[[bridge.query.assay]]) - if (length(x = bridge.query.features == 0)) { - stop('bridge object', bridge.query.assay, - ' has no variable genes and bridge.query.features has no input') + if (length(x = bridge.query.features) == 0) { + stop('bridge object ', bridge.query.assay, + ' assay has no variable genes and bridge.query.features has no input') } # modality harmonization reference.assay <- reference.assay %||% DefaultAssay(reference) @@ -7213,7 +7213,9 @@ PrepareBridgeReference <- function ( laplacian.reduction = laplacian.reduction.name, laplacian.dims = laplacian.reduction.dims ) - param.list <- list( + + + params <- list( reference.reduction = reference.reduction, reference.dims = reference.dims, reference.assay = reference.assay, @@ -7225,14 +7227,18 @@ PrepareBridgeReference <- function ( laplacian.reduction.name = laplacian.reduction.name, laplacian.reduction.dims = laplacian.reduction.dims ) - - output.list <- list(bridge = bridge, - reference = reference.bridge, - params.list = param.list + bridge_reference.set <- new( + Class = "BridgeReferenceSet", + bridge = bridge, + reference = reference.bridge, + params = params ) - return(output.list) + return(bridge_reference.set) } + + + FindBridgeTransferAnchors <- function( BridgeReference, query, @@ -7242,36 +7248,34 @@ FindBridgeTransferAnchors <- function( ){ query.assay <- query.assay %||% DefaultAssay(query) DefaultAssay(query) <- query.assay - bridge.query.assay <- BridgeReference$params.list$bridge.query.assay - bridge.query.reduction <- BridgeReference$params.list$bridge.query.reduction %||% - BridgeReference$params.list$supervised.reduction - - reference.reduction <- BridgeReference$params.list$reference.reduction - bridge.ref.reduction <- BridgeReference$params.list$bridge.ref.reduction - - DefaultAssay(BridgeReference$bridge) <- bridge.query.assay + params <- slot(object = BridgeReference, name = "params") + bridge.query.assay <- params$bridge.query.assay + bridge.query.reduction <- params$bridge.query.reduction %||% params$supervised.reduction + reference.reduction <- params$reference.reduction + bridge.ref.reduction <- params$bridge.ref.reduction + DefaultAssay(BridgeReference@bridge) <- bridge.query.assay if (reduction == "lsiproject") { query.anchor <- FindTransferAnchors( - reference = BridgeReference$bridge, + reference = BridgeReference@bridge, reference.reduction = bridge.query.reduction, dims = dims, query = query, reduction = reduction, scale = FALSE, - features = rownames(BridgeReference$bridge[[bridge.query.reduction]]@feature.loadings ), + features = rownames(BridgeReference@bridge[[bridge.query.reduction]]@feature.loadings ), k.filter = NA ) query <- MapQuery(anchorset = query.anchor, - reference = BridgeReference$bridge, + reference = BridgeReference@bridge, query = query, store.weights = TRUE ) } bridge_anchor <- FindBridgeAnchor( - object.list = list(BridgeReference$reference, query), - bridge.object = BridgeReference$bridge, + object.list = list(BridgeReference@reference, query), + bridge.object = BridgeReference@bridge, object.reduction = c(reference.reduction, paste0('ref.', bridge.query.reduction)), bridge.reduction = c(bridge.ref.reduction, bridge.query.reduction), anchor.type = "Transfer", diff --git a/R/objects.R b/R/objects.R index b86812bc0..9045de9aa 100644 --- a/R/objects.R +++ b/R/objects.R @@ -116,6 +116,31 @@ ModalityWeights <- setClass( ) ) + + + +#' The BridgeReferenceSet Class +#' The BridgeReferenceSet is an output from PrepareBridgeReference +#' @slot bridge The multi-omic object +#' @slot reference The Reference object only containing bridge representation assay +#' @slot params A list of parameters used in the PrepareBridgeReference +#' @slot command Store log of parameters that were used +#' +#' @name BridgeReferenceSet-class +#' @rdname BridgeReferenceSet-class +#' @concept objects +#' @exportClass BridgeReferenceSet +#' +BridgeReferenceSet <- setClass( + Class = "BridgeReferenceSet", + slots = list( + bridge = "ANY", + reference = "ANY", + params = "list", + command = "ANY" + ) +) + #' The IntegrationData Class #' #' The IntegrationData object is an intermediate storage container used internally throughout the @@ -2301,6 +2326,22 @@ setMethod( } ) +setMethod( + f = 'show', + signature = 'BridgeReferenceSet', + definition = function(object) { + cat( + 'A BridgeReferenceSet object has a bridge object with ', + ncol(slot(object = object, name = 'bridge')), + 'cells and a reference object with ', + ncol(slot(object = object, name = 'reference')), + 'cells. \n','The bridge query reduction is ', + slot(object = object, name = 'params')$bridge.query.reduction %||% + slot(object = object, name = 'params')$supervised.reduction, + "\n This can be used as input to FindBridgeTransferAnchors and FindBridgeIntegrationAnchors") + } +) + setMethod( f = 'show', signature = 'SCTModel', From 8e3610272b8329dc4b258cf1b9ff1028a8764223 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 25 Feb 2022 13:19:44 -0500 Subject: [PATCH 084/979] add docu --- NAMESPACE | 4 +- R/integration.R | 105 ++++++++++++++++++++++++------- man/BridgeCellsRepresentation.Rd | 12 ++-- man/FindBridgeAnchor.Rd | 13 ++-- man/MapQuery.Rd | 3 +- 5 files changed, 98 insertions(+), 39 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 4d0ce6feb..6981f5c6f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -184,7 +184,7 @@ export(FeatureScatter) export(FetchData) export(FilterSlideSeq) export(FindAllMarkers) -export(FindBridgeAnchor) +export(FindBridgeTransferAnchors) export(FindClusters) export(FindConservedMarkers) export(FindIntegrationAnchors) @@ -273,6 +273,7 @@ export(PredictAssay) export(PrepLDA) export(PrepSCTFindMarkers) export(PrepSCTIntegration) +export(PrepareBridgeReference) export(Project) export(ProjectDim) export(ProjectDimReduc) @@ -363,6 +364,7 @@ export(as.sparse) export(scalefactors) exportClasses(AnchorSet) exportClasses(Assay) +exportClasses(BridgeReferenceSet) exportClasses(DimReduc) exportClasses(Graph) exportClasses(IntegrationAnchorSet) diff --git a/R/integration.R b/R/integration.R index b1015f6da..1c8be8108 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6849,6 +6849,7 @@ IntegrateSketchEmbeddings <- function(object.list, return(merged.object) } + ProjectDataEmbeddings <- function(object, assay = 'RNA', feature.loadings, @@ -7118,12 +7119,39 @@ ProjectDimReduc <- function(query, } - - +#' Prepare the multi-omic bridge datasets and unimodal reference +#' First perform within-modality harmonization of bridge and reference +#' then perform dimensional reduction on the SNN graph of bridge datasets +#' via Laplacian Eigendecomposition +#' Lastly, construct a bridge dictionary representation for unimodal reference cells +#' @param reference A reference Seurat object +#' @param bridge A multi-omic bridge Seurat object +#' @param reference.reduction Name of dimensional reduction of the reference object +#' @param reference.dims Number of dimensions used for the reference.reduction +#' @param normlization.method Name of normalization method used: LogNormalize +#' or SCT +#' @param reference.assay Assay name for reference +#' in the sketched object +#' @param bridge.ref.assay Assay name for bridge used for reference mapping +#' @param bridge.query.assay Assay name for bridge used for query mapping +#' @param supervised.reduction Determine if perform supervised LSI or supervised PCA +#' @param bridge.query.reduction Name of dimensions used for the query-bridge harmonization. +#' bridge.query.reduction and supervised.reduction needs to set one +#' @param bridge.query.features Features used for bridge query dimensional reduction +#' @param laplacian.reduction.name Name of dimensional reduction name of graph laplacian eigenspace +#' @param laplacian.reduction.key dimensional reduction key +#' @param laplacian.reduction.dims Number of dimenions used for graph laplacian eigenspace +#' @param verbose Print progress and message +#' +#'#' @return +#' @export +#' @return Returns a \code{BridgeReferenceSet} that can be used as input to \code{\link{FindBridgeTransferAnchors}} +#' The parameters used are stored in the \code{BridgeReferenceSet} as well +#' PrepareBridgeReference <- function ( reference, bridge, - reference.reduction = 'spca', + reference.reduction = 'pca', reference.dims = 1:50, normlization.method = c('SCT', 'LogNormalization'), reference.assay = NULL, @@ -7174,7 +7202,7 @@ PrepareBridgeReference <- function ( bridge.ref.reduction <- paste0('ref.', reference.reduction) bridge <- FindNeighbors(object = bridge, reduction = bridge.ref.reduction, - dims = 1:ncol(bridge[[bridge.ref.reduction]]), + dims = 1:ncol(x = bridge[[bridge.ref.reduction]]), return.neighbor = FALSE, graph.name = c('bridge.ref.nn', 'bridge.ref.snn'), prune.SNN = 0) @@ -7213,38 +7241,66 @@ PrepareBridgeReference <- function ( laplacian.reduction = laplacian.reduction.name, laplacian.dims = laplacian.reduction.dims ) - - - params <- list( - reference.reduction = reference.reduction, - reference.dims = reference.dims, - reference.assay = reference.assay, - bridge.ref.assay = bridge.ref.assay, - bridge.query.assay = bridge.query.assay, - supervised.reduction = supervised.reduction, - bridge.ref.reduction = bridge.ref.reduction, - bridge.query.reduction = bridge.query.reduction, - laplacian.reduction.name = laplacian.reduction.name, - laplacian.reduction.dims = laplacian.reduction.dims - ) bridge_reference.set <- new( Class = "BridgeReferenceSet", bridge = bridge, reference = reference.bridge, - params = params + params = list( + reference.reduction = reference.reduction, + reference.dims = reference.dims, + reference.assay = reference.assay, + bridge.ref.assay = bridge.ref.assay, + bridge.query.assay = bridge.query.assay, + supervised.reduction = supervised.reduction, + bridge.ref.reduction = bridge.ref.reduction, + bridge.query.reduction = bridge.query.reduction, + laplacian.reduction.name = laplacian.reduction.name, + laplacian.reduction.dims = laplacian.reduction.dims + ) ) return(bridge_reference.set) } +#' Find bridge anchors between unimodal query and the other unimodal reference +#' usnig a pre-computed \code{\link{BridgeReferenceSet}}. +#' +#' First, harmonized the bridge and query cells in the bridge query reduction space. +#' Then, constructe the bridge dictionary representations for query cells. +#' Next, find a set of anchors between query and reference in the bridge graph laplacian eigenspace. +#' These anchors can later be used to integrate embeddings or transfer data from the reference to +#' query object using the \code{\link{MapQuery}} object. - +#' @param BridgeReference BridgeReferenceSet object generated from +#' \code{\link{PrepareBridgeReference}} +#' @param query A query Seurat object +#' @param query.assay Assay name for query-bridge integration +#' @param dims Number of dimensions for query-bridge integration +#' @param reduction Dimensional reduction to perform when finding anchors. +#' Options are: +#' \itemize{ +#' \item{pcaproject: Project the PCA from the bridge onto the query. We +#' recommend using PCA when bridge and query datasets are from scRNA-seq} +#' \item{lsiproject: Project the LSI from the bridge onto the query. We +#' recommend using LSI when bridge and query datasets are from scATAC-seq or scCUT&TAG data. +#' This requires that LSI or supervised LSI has been computed for the bridge dataset, and the +#' same features (eg, peaks or genome bins) are present in both the bridge +#' and query. +#' } +#' @param verbose Print messages and progress +#' +#' @export +#' @return Returns an \code{AnchorSet} object that can be used as input to +#' \code{\link{TransferData}}, \code{\link{IntegrateEmbeddings}} and +#' \code{\link{MapQuery}}. +#' FindBridgeTransferAnchors <- function( BridgeReference, query, query.assay = NULL, dims, - reduction = c('lsiproject', 'pcaproject')[1] + reduction = c('lsiproject', 'pcaproject')[1], + verbose = TRUE ){ query.assay <- query.assay %||% DefaultAssay(query) DefaultAssay(query) <- query.assay @@ -7256,7 +7312,6 @@ FindBridgeTransferAnchors <- function( DefaultAssay(BridgeReference@bridge) <- bridge.query.assay if (reduction == "lsiproject") { - query.anchor <- FindTransferAnchors( reference = BridgeReference@bridge, reference.reduction = bridge.query.reduction, @@ -7265,7 +7320,8 @@ FindBridgeTransferAnchors <- function( reduction = reduction, scale = FALSE, features = rownames(BridgeReference@bridge[[bridge.query.reduction]]@feature.loadings ), - k.filter = NA + k.filter = NA, + verbose = verbose ) query <- MapQuery(anchorset = query.anchor, reference = BridgeReference@bridge, @@ -7279,7 +7335,8 @@ FindBridgeTransferAnchors <- function( object.reduction = c(reference.reduction, paste0('ref.', bridge.query.reduction)), bridge.reduction = c(bridge.ref.reduction, bridge.query.reduction), anchor.type = "Transfer", - reference.bridge.stored = TRUE + reference.bridge.stored = TRUE, + verbose = verbose ) return(bridge_anchor) } diff --git a/man/BridgeCellsRepresentation.Rd b/man/BridgeCellsRepresentation.Rd index 9f66b362a..bda19ce2c 100644 --- a/man/BridgeCellsRepresentation.Rd +++ b/man/BridgeCellsRepresentation.Rd @@ -7,8 +7,8 @@ BridgeCellsRepresentation( object.list, bridge.object, - object.reduction.list, - bridge.reduction.list, + object.reduction, + bridge.reduction, laplacian.reduction = NULL, laplacian.dims = NULL, bridge.assay.name = "Bridge", @@ -25,11 +25,11 @@ find anchors for downstream integration.} \item{bridge.object}{A multimodal bridge seurat which connects two single-modality objects} -\item{object.reduction.list}{A list of dimensional reductions from object.list used +\item{object.reduction}{A list of dimensional reductions from object.list used to be reconstructed by bridge.obejct} -\item{bridge.reduction.list}{A list of dimensional reductions from bridge.object used -to reconstruct object.reduction.list} +\item{bridge.reduction}{A list of dimensional reductions from bridge.object used +to reconstruct object.reduction} \item{laplacian.reduction}{Name of bridge graph laplacian dimensional reduction} @@ -37,7 +37,7 @@ to reconstruct object.reduction.list} \item{bridge.assay.name}{Assay name used for bridge object reconstruction value} -\item{return.all.assays}{if return all assays in the object.list. +\item{return.all.assays}{if return all assays in the object.list. Only bridge assay is returned by default.} \item{verbose}{Print messages and progress} diff --git a/man/FindBridgeAnchor.Rd b/man/FindBridgeAnchor.Rd index f90afdfe7..dc2261d85 100644 --- a/man/FindBridgeAnchor.Rd +++ b/man/FindBridgeAnchor.Rd @@ -7,14 +7,15 @@ FindBridgeAnchor( object.list, bridge.object, - object.reduction.list, - bridge.reduction.list, + object.reduction, + bridge.reduction, anchor.type = c("Integration", "Transfer")[1], reference = NULL, laplacian.reduction = "lap", laplacian.dims = NULL, reduction = c("direct", "cca")[1], bridge.assay.name = "Bridge", + reference.bridge.stored = FALSE, k.anchor = 20, k.score = 50, verbose = TRUE, @@ -28,11 +29,11 @@ find anchors for downstream integration.} \item{bridge.object}{A multimodal bridge seurat which connects two single-modality objects} -\item{object.reduction.list}{A list of dimensional reductions from object.list used +\item{object.reduction}{A list of dimensional reductions from object.list used to be reconstructed by bridge.obejct} -\item{bridge.reduction.list}{A list of dimensional reductions from bridge.object used -to reconstruct object.reduction.list} +\item{bridge.reduction}{A list of dimensional reductions from bridge.object used +to reconstruct object.reduction} \item{anchor.type}{The type of anchors. Can be one of: @@ -71,7 +72,7 @@ Returns an \code{\link{AnchorSet}} object that can be used as input to \code{\link{IntegrateEmbeddings}}.or \code{\link{MapQuery}} } \description{ -First, bridge object is used to reconstruct two single-modality profiles and +First, bridge object is used to reconstruct two single-modality profiles and then project those cells into bridage graph laplacian space. Next, find a set of anchors between two single-modality objects. These anchors can later be used to integrate embeddings or transfer data from the reference to diff --git a/man/MapQuery.Rd b/man/MapQuery.Rd index 1b9ee1441..035f75b4d 100644 --- a/man/MapQuery.Rd +++ b/man/MapQuery.Rd @@ -65,8 +65,7 @@ neighbor finding} \item{verbose}{Print progress bars and output} } \value{ -Returns a modified query Seurat object containing: - +Returns a modified query Seurat object containing:#' \itemize{ \item{New Assays corresponding to the features transferred and/or their corresponding prediction scores from \code{\link{TransferData}}} From ede1d38e43b15ecd2b2781ef63c2d8f20fd542e6 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 25 Feb 2022 15:18:50 -0500 Subject: [PATCH 085/979] update docu --- R/integration.R | 17 ++++++++++------- man/BridgeCellsRepresentation.Rd | 7 ++++++- man/FindBridgeAnchor.Rd | 2 ++ man/LeverageScore.Rd | 9 +++------ man/RunGraphLaplacian.Rd | 3 +-- 5 files changed, 22 insertions(+), 16 deletions(-) diff --git a/R/integration.R b/R/integration.R index 1c8be8108..d60736366 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5765,6 +5765,9 @@ FindAssayAnchor <- function( #' #' @inheritParams FindBridgeAnchor #' @param return.all.assays if return all assays in the object.list. +#' @param l2.norm if l2 normalize dictionary representation +#' @param do.center if center dictionary representation +#' #' Only bridge assay is returned by default. #' #' @@ -5881,7 +5884,7 @@ BridgeCellsRepresentation <- function(object.list, object.list <- my.lapply( X = object.list, FUN = function(x) { - x <- DietSeurat(object = x, assay = bridge.assay.name, scale.data = TRUE) + x <- DietSeurat(object = x, assays = bridge.assay.name, scale.data = TRUE) return(x) } ) @@ -5932,6 +5935,7 @@ BridgeCellsRepresentation <- function(object.list, #' \item{direct: Use assay data as a dimensional reduction} #' } #' @param bridge.assay.name Assay name used for bridge object reconstruction value +#' @param reference.bridge.stored If refernece has stored the bridge dictionary representation #' @param k.anchor How many neighbors (k) to use when picking anchors #' @param k.score How many neighbors (k) to use when scoring anchors #' @param verbose Print messages and progress @@ -6207,6 +6211,7 @@ RunGraphLaplacian.Seurat <- function( #' @param reduction.key dimensional reduction key, specifies the string before #' the number for the dimension names. LAP by default #' @param verbose Print message and process +#' @param ... Arguments passed to eigs_sym #' #' #' @concept dimensional_reduction @@ -6379,8 +6384,7 @@ LeverageScore.default <- function( MARGIN = 2L, eps = 0.5, seed = 123, - verbose = TRUE, - ... + verbose = TRUE ) { features <- features %||% rownames(x = object) if (length(x = features) > 5000) { @@ -6482,8 +6486,8 @@ LeverageScore.Assay <- function(object, slot = "data", seed = 123, eps = 0.5, - verbose = TRUE, - ...) { + verbose = TRUE + ) { features <- features %||% VariableFeatures(object = object) ndims <- ndims%||%ncol(x = object) data <- GetAssayData(object, slot = slot)[features,] @@ -6521,8 +6525,7 @@ LeverageScore.Seurat <- function(object, eps = 0.5, seed = 123, over.write = FALSE, - verbose = TRUE, - ... + verbose = TRUE ) { assay <- assay %||% DefaultAssay(object) features <- features %||% VariableFeatures(object = object[[assay]]) diff --git a/man/BridgeCellsRepresentation.Rd b/man/BridgeCellsRepresentation.Rd index bda19ce2c..ec4c6964d 100644 --- a/man/BridgeCellsRepresentation.Rd +++ b/man/BridgeCellsRepresentation.Rd @@ -37,7 +37,12 @@ to reconstruct object.reduction} \item{bridge.assay.name}{Assay name used for bridge object reconstruction value} -\item{return.all.assays}{if return all assays in the object.list. +\item{return.all.assays}{if return all assays in the object.list.} + +\item{l2.norm}{if l2 normalize dictionary representation} + +\item{do.center}{if center dictionary representation + Only bridge assay is returned by default.} \item{verbose}{Print messages and progress} diff --git a/man/FindBridgeAnchor.Rd b/man/FindBridgeAnchor.Rd index dc2261d85..8b715ba18 100644 --- a/man/FindBridgeAnchor.Rd +++ b/man/FindBridgeAnchor.Rd @@ -58,6 +58,8 @@ be one of: \item{bridge.assay.name}{Assay name used for bridge object reconstruction value} +\item{reference.bridge.stored}{If refernece has stored the bridge dictionary representation} + \item{k.anchor}{How many neighbors (k) to use when picking anchors} \item{k.score}{How many neighbors (k) to use when scoring anchors} diff --git a/man/LeverageScore.Rd b/man/LeverageScore.Rd index bf9856cf2..13a9a19d5 100644 --- a/man/LeverageScore.Rd +++ b/man/LeverageScore.Rd @@ -18,8 +18,7 @@ LeverageScore(object, ...) MARGIN = 2L, eps = 0.5, seed = 123, - verbose = TRUE, - ... + verbose = TRUE ) \method{LeverageScore}{Assay}( @@ -31,8 +30,7 @@ LeverageScore(object, ...) slot = "data", seed = 123, eps = 0.5, - verbose = TRUE, - ... + verbose = TRUE ) \method{LeverageScore}{Seurat}( @@ -47,8 +45,7 @@ LeverageScore(object, ...) eps = 0.5, seed = 123, over.write = FALSE, - verbose = TRUE, - ... + verbose = TRUE ) } \arguments{ diff --git a/man/RunGraphLaplacian.Rd b/man/RunGraphLaplacian.Rd index 595f58d26..1e1994993 100644 --- a/man/RunGraphLaplacian.Rd +++ b/man/RunGraphLaplacian.Rd @@ -23,8 +23,7 @@ RunGraphLaplacian(object, ...) \arguments{ \item{object}{A Seurat object} -\item{...}{Arguments passed to -\code{\link[RSpectra:eigs_sym]{RSpectra::eigs_sym}}} +\item{...}{Arguments passed to eigs_sym} \item{graph}{The name of graph} From 105c636d0250cf287dc9da55158ca038e53bb385 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 25 Feb 2022 15:28:37 -0500 Subject: [PATCH 086/979] fix docu --- R/generics.R | 2 +- R/integration.R | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/R/generics.R b/R/generics.R index 9478bdf67..830fdb68a 100644 --- a/R/generics.R +++ b/R/generics.R @@ -312,7 +312,7 @@ IntegrateEmbeddings <- function(anchorset, ...) { #' score for all cells. #' #' @param object An object -#' +#' @param ... Arguments passed to other methods #' @return Returns a seurat object with additional column storing leverage score #' #' @export LeverageScore diff --git a/R/integration.R b/R/integration.R index d60736366..6a7bada32 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6229,7 +6229,7 @@ RunGraphLaplacian.default <- function(object, if (!all( slot(object = t(x = object), name = "x") == slot(object = object, name = "x") )) { - step("Input graph is not symmetric") + stop("Input graph is not symmetric") } if (verbose) { message("Generating normalized laplacian graph") @@ -6499,8 +6499,7 @@ LeverageScore.Assay <- function(object, sampling.method = sampling.method, seed = seed, eps = eps, - verbose = verbose, - ... + verbose = verbose ) return(score) } @@ -6546,8 +6545,7 @@ LeverageScore.Seurat <- function(object, seed = seed, slot = slot, eps = eps, - verbose = verbose, - ... + verbose = verbose ) return(object) } From 4309d0788e0a67ec34fd9b4ab221cd35f6c0d8c5 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 25 Feb 2022 16:24:48 -0500 Subject: [PATCH 087/979] fix docu --- R/integration.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/integration.R b/R/integration.R index 6a7bada32..9309af20a 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6369,7 +6369,6 @@ JLEmbed <- function(nrow, ncol, eps = 0.1, seed = NA, method = "li") { #' @param eps error tolerance for JL embeddings #' @param seed Set a random seed #' @param verbose Print message and process -#' @param ... #' #' @importFrom Matrix qrR #' @importFrom SeuratObject as.sparse @@ -7288,6 +7287,7 @@ PrepareBridgeReference <- function ( #' same features (eg, peaks or genome bins) are present in both the bridge #' and query. #' } +#' } #' @param verbose Print messages and progress #' #' @export From a8b952471960f7773a4bec0f1957c3bbabd54d34 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 25 Feb 2022 16:42:46 -0500 Subject: [PATCH 088/979] fix leverage score docu --- R/integration.R | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/R/integration.R b/R/integration.R index 9309af20a..c10a0db09 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6383,7 +6383,8 @@ LeverageScore.default <- function( MARGIN = 2L, eps = 0.5, seed = 123, - verbose = TRUE + verbose = TRUE, + ... ) { features <- features %||% rownames(x = object) if (length(x = features) > 5000) { @@ -6485,7 +6486,8 @@ LeverageScore.Assay <- function(object, slot = "data", seed = 123, eps = 0.5, - verbose = TRUE + verbose = TRUE, + ... ) { features <- features %||% VariableFeatures(object = object) ndims <- ndims%||%ncol(x = object) @@ -6498,7 +6500,8 @@ LeverageScore.Assay <- function(object, sampling.method = sampling.method, seed = seed, eps = eps, - verbose = verbose + verbose = verbose, + ... ) return(score) } @@ -6523,7 +6526,8 @@ LeverageScore.Seurat <- function(object, eps = 0.5, seed = 123, over.write = FALSE, - verbose = TRUE + verbose = TRUE, + ... ) { assay <- assay %||% DefaultAssay(object) features <- features %||% VariableFeatures(object = object[[assay]]) @@ -6544,7 +6548,8 @@ LeverageScore.Seurat <- function(object, seed = seed, slot = slot, eps = eps, - verbose = verbose + verbose = verbose, + ... ) return(object) } From 8d0cd533451d66f1787c8101a43e73012b3ef6e2 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 25 Feb 2022 18:31:34 -0500 Subject: [PATCH 089/979] fastintegration wrapper --- R/integration.R | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/R/integration.R b/R/integration.R index c10a0db09..e00ce2922 100644 --- a/R/integration.R +++ b/R/integration.R @@ -7346,3 +7346,47 @@ FindBridgeTransferAnchors <- function( ) return(bridge_anchor) } + + + +#' @importFrom rlang invoke +#' +FastIntegration <- function( + object.list, + reference = NULL, + reduction = 'rpca', + anchor.features = 2000, + k.anchor = 20, + dims = 1:30, + new.reduction.name = 'integrated_dr', + npcs = 50, + findintegrationanchors.args = list(), + verbose = TRUE +) { + anchor <- invoke( + .fn = FindIntegrationAnchors, + .args = c(list( + object.list = object.list, + reference = reference, + anchor.features = anchor.features, + reduction = reduction, + k.anchor = k.anchor, + dims = dims, + verbose = verbose + ), findintegrationanchors.args + ) + ) + + object_merged <- merge(object.list[[1]], object.list[2:length(object.list)]) + anchor.feature <- slot(object = anchor, name = 'anchor.features') + object_merged <- ScaleData(object_merged,features = anchor.feature, verbose = FALSE) + object_merged <- RunPCA(object_merged, features = features, verbose = FALSE, npcs = npcs) + temp <- atoms_merged[["pca"]] + object_merged <- IntegrateEmbeddings( + anchorset = anchor, + reductions = object_merged[['pca']], + new.reduction.name = new.reduction.name, + verbose = verbose) + object_merged[['pca']] <- temp + return(object_merged) +} From 638ebddca7ba5cb07c902ffaf13240644660a571 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sun, 27 Feb 2022 18:47:22 -0500 Subject: [PATCH 090/979] add run pca into wrapepr --- R/integration.R | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/R/integration.R b/R/integration.R index e00ce2922..23272f206 100644 --- a/R/integration.R +++ b/R/integration.R @@ -7358,11 +7358,37 @@ FastIntegration <- function( anchor.features = 2000, k.anchor = 20, dims = 1:30, + scale = TRUE, new.reduction.name = 'integrated_dr', npcs = 50, findintegrationanchors.args = list(), verbose = TRUE ) { + my.lapply <- ifelse( + test = verbose && nbrOfWorkers() == 1, + yes = pblapply, + no = future_lapply + ) + + if (reduction == 'rpca') { + if (is.numeric(x = anchor.features)) { + anchor.features <- SelectIntegrationFeatures( + object.list = object.list, + nfeatures = anchor.features, + verbose = FALSE + ) + } + if (verbose) { + message('Performing PCA for each object') + } + object.list <- my.lapply(X = object.list, + FUN = function(x) { + x <- ScaleData(x, features = anchor.features, do.scale = scale, verbose = FALSE) + x <- RunPCA(x, features = anchor.features, verbose = FALSE) + return(x) + } + ) + } anchor <- invoke( .fn = FindIntegrationAnchors, .args = c(list( @@ -7370,6 +7396,7 @@ FastIntegration <- function( reference = reference, anchor.features = anchor.features, reduction = reduction, + scale = scale, k.anchor = k.anchor, dims = dims, verbose = verbose @@ -7381,12 +7408,13 @@ FastIntegration <- function( anchor.feature <- slot(object = anchor, name = 'anchor.features') object_merged <- ScaleData(object_merged,features = anchor.feature, verbose = FALSE) object_merged <- RunPCA(object_merged, features = features, verbose = FALSE, npcs = npcs) - temp <- atoms_merged[["pca"]] + temp <- object_merged[["pca"]] object_merged <- IntegrateEmbeddings( anchorset = anchor, reductions = object_merged[['pca']], new.reduction.name = new.reduction.name, verbose = verbose) object_merged[['pca']] <- temp + VariableFeatures(object_merged) <- anchor.feature return(object_merged) } From 24902651a93c31599a7aaac182fbc03b6bd6d9ae Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sun, 27 Feb 2022 22:45:46 -0500 Subject: [PATCH 091/979] docu FastIntegration --- NAMESPACE | 1 + R/integration.R | 53 +++++++++++++++++++++++++++++++------------- man/LeverageScore.Rd | 11 +++++---- 3 files changed, 45 insertions(+), 20 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 6981f5c6f..37e7eb035 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -177,6 +177,7 @@ export(Embeddings) export(ExpMean) export(ExpSD) export(ExpVar) +export(FastIntegration) export(FastRowScale) export(FeatureLocator) export(FeaturePlot) diff --git a/R/integration.R b/R/integration.R index 23272f206..5f8664993 100644 --- a/R/integration.R +++ b/R/integration.R @@ -7305,7 +7305,7 @@ FindBridgeTransferAnchors <- function( query, query.assay = NULL, dims, - reduction = c('lsiproject', 'pcaproject')[1], + reduction = c('lsiproject', 'pcaproject')[1], verbose = TRUE ){ query.assay <- query.assay %||% DefaultAssay(query) @@ -7316,7 +7316,6 @@ FindBridgeTransferAnchors <- function( reference.reduction <- params$reference.reduction bridge.ref.reduction <- params$bridge.ref.reduction DefaultAssay(BridgeReference@bridge) <- bridge.query.assay - if (reduction == "lsiproject") { query.anchor <- FindTransferAnchors( reference = BridgeReference@bridge, @@ -7348,8 +7347,22 @@ FindBridgeTransferAnchors <- function( } - +#' Perform integration on the joint PCA cell embeddings +#' This is a convenience wrapper function around the following three functions +#' that are often run together when perform integration. +#' #' \code{\link{FindIntegrationAnchors}}, \code{\link{RunPCA}}, +#' \code{\link{IntegrateEmbeddings}}. +#' +#' @inheritParams FindIntegrationAnchors +#' @param new.reduction.name Name of integrated dimensional reduction +#' @param npcs Total Number of PCs to compute and store (50 by default) +#' @param findintegrationanchors.args A named list of additional arguments to +#' \code{\link{FindIntegrationAnchors}} +#' @param verbose Print messages and progress +#' #' @importFrom rlang invoke +#' @return Returns a Seurat object with integrated dimensional reduction +#' @export #' FastIntegration <- function( object.list, @@ -7369,15 +7382,14 @@ FastIntegration <- function( yes = pblapply, no = future_lapply ) - + if (is.numeric(x = anchor.features)) { + anchor.features <- SelectIntegrationFeatures( + object.list = object.list, + nfeatures = anchor.features, + verbose = FALSE + ) + } if (reduction == 'rpca') { - if (is.numeric(x = anchor.features)) { - anchor.features <- SelectIntegrationFeatures( - object.list = object.list, - nfeatures = anchor.features, - verbose = FALSE - ) - } if (verbose) { message('Performing PCA for each object') } @@ -7403,11 +7415,19 @@ FastIntegration <- function( ), findintegrationanchors.args ) ) - - object_merged <- merge(object.list[[1]], object.list[2:length(object.list)]) + object_merged <- merge(x = object.list[[1]], + y = object.list[2:length(object.list)] + ) anchor.feature <- slot(object = anchor, name = 'anchor.features') - object_merged <- ScaleData(object_merged,features = anchor.feature, verbose = FALSE) - object_merged <- RunPCA(object_merged, features = features, verbose = FALSE, npcs = npcs) + object_merged <- ScaleData(object = object_merged, + features = anchor.feature, + verbose = FALSE + ) + object_merged <- RunPCA(object_merged, + features = anchor.feature, + verbose = FALSE, + npcs = npcs + ) temp <- object_merged[["pca"]] object_merged <- IntegrateEmbeddings( anchorset = anchor, @@ -7415,6 +7435,7 @@ FastIntegration <- function( new.reduction.name = new.reduction.name, verbose = verbose) object_merged[['pca']] <- temp - VariableFeatures(object_merged) <- anchor.feature + VariableFeatures(object = object_merged) <- anchor.feature return(object_merged) } + diff --git a/man/LeverageScore.Rd b/man/LeverageScore.Rd index 13a9a19d5..f93c552ce 100644 --- a/man/LeverageScore.Rd +++ b/man/LeverageScore.Rd @@ -18,7 +18,8 @@ LeverageScore(object, ...) MARGIN = 2L, eps = 0.5, seed = 123, - verbose = TRUE + verbose = TRUE, + ... ) \method{LeverageScore}{Assay}( @@ -30,7 +31,8 @@ LeverageScore(object, ...) slot = "data", seed = 123, eps = 0.5, - verbose = TRUE + verbose = TRUE, + ... ) \method{LeverageScore}{Seurat}( @@ -45,13 +47,14 @@ LeverageScore(object, ...) eps = 0.5, seed = 123, over.write = FALSE, - verbose = TRUE + verbose = TRUE, + ... ) } \arguments{ \item{object}{An object} -\item{...}{} +\item{...}{Arguments passed to other methods} \item{features}{Features used to calculate leverage score} From 0653d42f81fe77842137cb0e25f55bb85e152761 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 28 Feb 2022 00:04:33 -0500 Subject: [PATCH 092/979] update IntegrateSketchEmbeddings --- R/integration.R | 80 +++++++++++-------------------------------------- 1 file changed, 18 insertions(+), 62 deletions(-) diff --git a/R/integration.R b/R/integration.R index 5f8664993..43ff19415 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6691,13 +6691,9 @@ IntegrationReferenceIndex <- function(object) { #' \itemize{ #' \item{sketch: Use random sketched data slot} #' \item{data: Use data slot} -#' \item{embeddings: Use uncorrected dimensional reduction in the sketched object} #' } #' @param sketch.ratio Sketch ratio of data slot when dictionary.method is set to sketch -#' @param sketch.reduction.raw Uncorrected dimensional reduction name in the sketched object -#' when dictionary.method is set to embeddings #' @param merged.object A merged seurat object containing all cells -#' @param reference.index Index for the integration reference #' @param verbose Print progress and message #' #' @@ -6705,23 +6701,21 @@ IntegrationReferenceIndex <- function(object) { #' @importFrom Matrix t #' @export -IntegrateSketchEmbeddings <- function(object.list, - sketch.list, - sketch.object, - features = NULL, - assay = 'RNA', - sketch.reduction = 'integrated_dr', - reduction.name ='pca.correct', - reduction.key = 'PCcorrect_', - dictionary.method = c('sketch', 'data','embeddings')[1], - sketch.ratio = 0.8, - sketch.reduction.raw = NULL, - merged.object = NULL, - reference.index = NULL, - verbose = TRUE) { - reference.index <- reference.index %||% IntegrationReferenceIndex(object = sketch.object) - features <- rownames(x = Loadings(object = sketch.object[[sketch.reduction]])) - query.index <- setdiff(x = 1:length(x = object.list), y = reference.index) +IntegrateSketchEmbeddings <- function( + object.list, + sketch.list, + sketch.object, + features = NULL, + assay = 'RNA', + sketch.reduction = 'integrated_dr', + reduction.name ='pca.correct', + reduction.key = 'PCcorrect_', + dictionary.method = c('sketch', 'data')[1], + sketch.ratio = 0.8, + merged.object = NULL, + verbose = TRUE) { + # check features + features <- features %||%rownames(x = Loadings(object = sketch.object[[sketch.reduction]])) features <- Reduce(f = intersect, x = c(list(features), lapply(X = object.list, function(x) rownames(x))) @@ -6736,50 +6730,16 @@ IntegrateSketchEmbeddings <- function(object.list, yes = pblapply, no = future_lapply ) - if (verbose) { - message("Center and scale based on sketch cells") - } - # mean and sd - if (dictionary.method == 'embeddings') { - scale.set <- 1:length(object.list) - if (is.null(sketch.reduction.raw)) { - stop("When dictionary.method is embeddings, sketch.reduction.raw needs to be specified") - } - } else { - scale.set <- reference.index - } - mean_sd.i <- SparseMeanSd(object = sketch.object) - emb.list <- lapply( - X = scale.set, - FUN = function(i) { - DefaultAssay(sketch.list[[i]]) <- DefaultAssay(object.list[[i]]) <- assay - emb.i <- ProjectDataEmbeddings( - object = object.list[[i]], - assay = assay, - feature.loadings = Loadings(sketch.object[[sketch.reduction]]), - ref.mean = mean_sd.i$mean, - ref.sd = mean_sd.i$sd - ) - return(emb.i) - } - ) if (verbose) { message("Correcting embeddings") } - emb.list.query <- my.lapply( - X = query.index, + emb.list <- my.lapply( + X = 1:length(sketch.list), FUN = function(q) { q.cells <- Cells(x = sketch.list[[q]]) emb <- switch( EXPR = dictionary.method, - 'embeddings'= { - sketch.transform <- ginv( - X = Embeddings(object = sketch.object[[sketch.reduction.raw]])[q.cells ,]) %*% - Embeddings(object = sketch.object[[sketch.reduction]])[q.cells ,] - emb <- emb.list[[q]] %*% sketch.transform - emb - }, 'data' = { exp.mat <- t( x = as.matrix( @@ -6833,11 +6793,7 @@ IntegrateSketchEmbeddings <- function(object.list, return(emb) } ) - if (dictionary.method == 'embeddings') { - emb.m <- Reduce(f = rbind, x = c(emb.list[reference.index], emb.list.query)) - } else { - emb.m <- Reduce(f = rbind, x = c(emb.list[1], emb.list.query)) - } + emb.m <- Reduce(f = rbind, x = emb.list) correct.dr <- CreateDimReducObject( embeddings = as.matrix(emb.m), loadings = Loadings(sketch.object[[sketch.reduction]])[features,], From ee7379260ecc3bd11608e28ac11052ffac2f66a6 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 28 Feb 2022 10:40:54 -0500 Subject: [PATCH 093/979] update functions --- R/integration.R | 25 +++++++++++++------------ man/IntegrateSketchEmbeddings.Rd | 10 +--------- 2 files changed, 14 insertions(+), 21 deletions(-) diff --git a/R/integration.R b/R/integration.R index 43ff19415..c833c9cd7 100644 --- a/R/integration.R +++ b/R/integration.R @@ -7106,8 +7106,8 @@ ProjectDimReduc <- function(query, #' #'#' @return #' @export -#' @return Returns a \code{BridgeReferenceSet} that can be used as input to \code{\link{FindBridgeTransferAnchors}} -#' The parameters used are stored in the \code{BridgeReferenceSet} as well +#' @return Returns a \code{bridge.reference.setSet} that can be used as input to \code{\link{FindBridgeTransferAnchors}} +#' The parameters used are stored in the \code{bridge.reference.setSet} as well #' PrepareBridgeReference <- function ( reference, @@ -7232,7 +7232,7 @@ PrepareBridgeReference <- function ( #' These anchors can later be used to integrate embeddings or transfer data from the reference to #' query object using the \code{\link{MapQuery}} object. -#' @param BridgeReference BridgeReferenceSet object generated from +#' @param bridge.reference.set BridgeReferenceSet object generated from #' \code{\link{PrepareBridgeReference}} #' @param query A query Seurat object #' @param query.assay Assay name for query-bridge integration @@ -7257,7 +7257,7 @@ PrepareBridgeReference <- function ( #' \code{\link{MapQuery}}. #' FindBridgeTransferAnchors <- function( - BridgeReference, + bridge.reference.set, query, query.assay = NULL, dims, @@ -7266,33 +7266,33 @@ FindBridgeTransferAnchors <- function( ){ query.assay <- query.assay %||% DefaultAssay(query) DefaultAssay(query) <- query.assay - params <- slot(object = BridgeReference, name = "params") + params <- slot(object = bridge.reference.set, name = "params") bridge.query.assay <- params$bridge.query.assay bridge.query.reduction <- params$bridge.query.reduction %||% params$supervised.reduction reference.reduction <- params$reference.reduction bridge.ref.reduction <- params$bridge.ref.reduction - DefaultAssay(BridgeReference@bridge) <- bridge.query.assay + DefaultAssay(bridge.reference.set@bridge) <- bridge.query.assay if (reduction == "lsiproject") { query.anchor <- FindTransferAnchors( - reference = BridgeReference@bridge, + reference = bridge.reference.set@bridge, reference.reduction = bridge.query.reduction, dims = dims, query = query, reduction = reduction, scale = FALSE, - features = rownames(BridgeReference@bridge[[bridge.query.reduction]]@feature.loadings ), + features = rownames(bridge.reference.set@bridge[[bridge.query.reduction]]@feature.loadings ), k.filter = NA, verbose = verbose ) query <- MapQuery(anchorset = query.anchor, - reference = BridgeReference@bridge, + reference = bridge.reference.set@bridge, query = query, store.weights = TRUE ) } bridge_anchor <- FindBridgeAnchor( - object.list = list(BridgeReference@reference, query), - bridge.object = BridgeReference@bridge, + object.list = list(bridge.reference.set@reference, query), + bridge.object = bridge.reference.set@bridge, object.reduction = c(reference.reduction, paste0('ref.', bridge.query.reduction)), bridge.reduction = c(bridge.ref.reduction, bridge.query.reduction), anchor.type = "Transfer", @@ -7303,7 +7303,8 @@ FindBridgeTransferAnchors <- function( } -#' Perform integration on the joint PCA cell embeddings +#' Perform integration on the joint PCA cell embeddings. +#' #' This is a convenience wrapper function around the following three functions #' that are often run together when perform integration. #' #' \code{\link{FindIntegrationAnchors}}, \code{\link{RunPCA}}, diff --git a/man/IntegrateSketchEmbeddings.Rd b/man/IntegrateSketchEmbeddings.Rd index ca6a74a10..c0862c51b 100644 --- a/man/IntegrateSketchEmbeddings.Rd +++ b/man/IntegrateSketchEmbeddings.Rd @@ -16,11 +16,9 @@ IntegrateSketchEmbeddings( sketch.reduction = "integrated_dr", reduction.name = "pca.correct", reduction.key = "PCcorrect_", - dictionary.method = c("sketch", "data", "embeddings")[1], + dictionary.method = c("sketch", "data")[1], sketch.ratio = 0.8, - sketch.reduction.raw = NULL, merged.object = NULL, - reference.index = NULL, verbose = TRUE ) } @@ -48,18 +46,12 @@ for all cells. sketch by default. Can be one of: \itemize{ \item{sketch: Use random sketched data slot} \item{data: Use data slot} -\item{embeddings: Use uncorrected dimensional reduction in the sketched object} }} \item{sketch.ratio}{Sketch ratio of data slot when dictionary.method is set to sketch} -\item{sketch.reduction.raw}{Uncorrected dimensional reduction name in the sketched object -when dictionary.method is set to embeddings} - \item{merged.object}{A merged seurat object containing all cells} -\item{reference.index}{Index for the integration reference} - \item{verbose}{Print progress and message} } \description{ From ce2884681a84a128b01cad203693ee4b76bd06ae Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 28 Feb 2022 11:48:25 -0500 Subject: [PATCH 094/979] change function name --- NAMESPACE | 2 +- R/integration.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 37e7eb035..7ed86b3c1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -177,7 +177,7 @@ export(Embeddings) export(ExpMean) export(ExpSD) export(ExpVar) -export(FastIntegration) +export(FastAnchorIntegration) export(FastRowScale) export(FeatureLocator) export(FeaturePlot) diff --git a/R/integration.R b/R/integration.R index c833c9cd7..19f634cd7 100644 --- a/R/integration.R +++ b/R/integration.R @@ -7321,7 +7321,7 @@ FindBridgeTransferAnchors <- function( #' @return Returns a Seurat object with integrated dimensional reduction #' @export #' -FastIntegration <- function( +FastAnchorIntegration <- function( object.list, reference = NULL, reduction = 'rpca', From 2fb764e08ae0495a6ec35fcf32c49236d9dfbcc0 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 28 Feb 2022 17:35:49 -0500 Subject: [PATCH 095/979] fix parameter name --- R/integration.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/integration.R b/R/integration.R index 19f634cd7..62e645827 100644 --- a/R/integration.R +++ b/R/integration.R @@ -7089,7 +7089,7 @@ ProjectDimReduc <- function(query, #' @param bridge A multi-omic bridge Seurat object #' @param reference.reduction Name of dimensional reduction of the reference object #' @param reference.dims Number of dimensions used for the reference.reduction -#' @param normlization.method Name of normalization method used: LogNormalize +#' @param normalization.method Name of normalization method used: LogNormalize #' or SCT #' @param reference.assay Assay name for reference #' in the sketched object @@ -7114,7 +7114,7 @@ PrepareBridgeReference <- function ( bridge, reference.reduction = 'pca', reference.dims = 1:50, - normlization.method = c('SCT', 'LogNormalization'), + normalization.method = c('SCT', 'LogNormalization'), reference.assay = NULL, bridge.ref.assay = 'RNA', bridge.query.assay = 'ATAC', From 85aa3a719b67b3fc854d78dbdebe43957c84bb93 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 28 Feb 2022 20:28:26 -0500 Subject: [PATCH 096/979] fix spelling error --- R/integration.R | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/R/integration.R b/R/integration.R index 62e645827..bde2d13c7 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6676,7 +6676,7 @@ IntegrationReferenceIndex <- function(object) { #' First construct a sketch-cell representation for all cells and #' then use this and the batch-corrected embeddings of sketched cells to #' construct the batch-corrected embeddings for all cells -#' @param object.list A list of Seurat objects with all cells +#' @param object A Seurat object with all cells for one dataset #' @param sketch.list A list of Seurat objects with sketched cells #' @param sketch.object A sketched Seurat objects with integrated embeddings #' @param features Features used for sketch integration @@ -6702,7 +6702,7 @@ IntegrationReferenceIndex <- function(object) { #' @export IntegrateSketchEmbeddings <- function( - object.list, + object = NULL, sketch.list, sketch.object, features = NULL, @@ -6716,10 +6716,7 @@ IntegrateSketchEmbeddings <- function( verbose = TRUE) { # check features features <- features %||%rownames(x = Loadings(object = sketch.object[[sketch.reduction]])) - features <- Reduce(f = intersect, - x = c(list(features), - lapply(X = object.list, function(x) rownames(x))) - ) + features <- intersect(features, rownames(object)) # check cell names cells.sketch.list <- unlist(lapply(X = sketch.list, function(x) Cells(x) )) if (length(x = setdiff(x = cells.sketch.list, y = Cells(sketch.object))) != 0) { @@ -7118,7 +7115,7 @@ PrepareBridgeReference <- function ( reference.assay = NULL, bridge.ref.assay = 'RNA', bridge.query.assay = 'ATAC', - supervised.reduction = c(NULL, 'slsi', 'spca' )[1], + supervised.reduction = c('slsi', 'spca', NULL )[1], bridge.query.reduction = NULL, bridge.query.features = NULL, laplacian.reduction.name = 'lap', @@ -7146,7 +7143,7 @@ PrepareBridgeReference <- function ( ref.anchor <- FindTransferAnchors( reference = reference, reference.reduction = reference.reduction, - normalization.method = normlization.method, + normalization.method = normalization.method, dims = reference.dims, query = bridge, recompute.residuals = TRUE, From d68f40292468711fcc8c6e9170c3e476fe3f385d Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 28 Feb 2022 21:35:17 -0500 Subject: [PATCH 097/979] update argument --- R/integration.R | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/R/integration.R b/R/integration.R index c833c9cd7..7efe6564a 100644 --- a/R/integration.R +++ b/R/integration.R @@ -7095,7 +7095,16 @@ ProjectDimReduc <- function(query, #' in the sketched object #' @param bridge.ref.assay Assay name for bridge used for reference mapping #' @param bridge.query.assay Assay name for bridge used for query mapping -#' @param supervised.reduction Determine if perform supervised LSI or supervised PCA +#' @param supervised.reduction Determine if perform supervised dimensional reduction +#' #' Options are: +#' \itemize{ +#' \item{slsi: Perform supervised LSI as the dimensional reduction for +#' the bridge-query integration} +#' \item{spca: Perform supervised PCA as the dimensional reduction for +#' the bridge-query integration} +#' \item{NULL: no supervised dimensional reduction will be calculated. +#' bridge.query.reduction is used for the bridge-query integration} +#' } #' @param bridge.query.reduction Name of dimensions used for the query-bridge harmonization. #' bridge.query.reduction and supervised.reduction needs to set one #' @param bridge.query.features Features used for bridge query dimensional reduction @@ -7106,8 +7115,9 @@ ProjectDimReduc <- function(query, #' #'#' @return #' @export -#' @return Returns a \code{bridge.reference.setSet} that can be used as input to \code{\link{FindBridgeTransferAnchors}} -#' The parameters used are stored in the \code{bridge.reference.setSet} as well +#' @return Returns a \code{BridgeReferenceSet} that can be used as input to +#' \code{\link{FindBridgeTransferAnchors}}. +#' The parameters used are stored in the \code{BridgeReferenceSet} as well #' PrepareBridgeReference <- function ( reference, @@ -7224,7 +7234,7 @@ PrepareBridgeReference <- function ( #' Find bridge anchors between unimodal query and the other unimodal reference -#' usnig a pre-computed \code{\link{BridgeReferenceSet}}. +#' using a pre-computed \code{\link{BridgeReferenceSet}}. #' #' First, harmonized the bridge and query cells in the bridge query reduction space. #' Then, constructe the bridge dictionary representations for query cells. @@ -7232,7 +7242,7 @@ PrepareBridgeReference <- function ( #' These anchors can later be used to integrate embeddings or transfer data from the reference to #' query object using the \code{\link{MapQuery}} object. -#' @param bridge.reference.set BridgeReferenceSet object generated from +#' @param extended.reference BridgeReferenceSet object generated from #' \code{\link{PrepareBridgeReference}} #' @param query A query Seurat object #' @param query.assay Assay name for query-bridge integration @@ -7257,7 +7267,7 @@ PrepareBridgeReference <- function ( #' \code{\link{MapQuery}}. #' FindBridgeTransferAnchors <- function( - bridge.reference.set, + extended.reference, query, query.assay = NULL, dims, @@ -7266,33 +7276,33 @@ FindBridgeTransferAnchors <- function( ){ query.assay <- query.assay %||% DefaultAssay(query) DefaultAssay(query) <- query.assay - params <- slot(object = bridge.reference.set, name = "params") + params <- slot(object = extended.reference, name = "params") bridge.query.assay <- params$bridge.query.assay bridge.query.reduction <- params$bridge.query.reduction %||% params$supervised.reduction reference.reduction <- params$reference.reduction bridge.ref.reduction <- params$bridge.ref.reduction - DefaultAssay(bridge.reference.set@bridge) <- bridge.query.assay + DefaultAssay(extended.reference@bridge) <- bridge.query.assay if (reduction == "lsiproject") { query.anchor <- FindTransferAnchors( - reference = bridge.reference.set@bridge, + reference = extended.reference@bridge, reference.reduction = bridge.query.reduction, dims = dims, query = query, reduction = reduction, scale = FALSE, - features = rownames(bridge.reference.set@bridge[[bridge.query.reduction]]@feature.loadings ), + features = rownames(extended.reference@bridge[[bridge.query.reduction]]@feature.loadings), k.filter = NA, verbose = verbose - ) + ) query <- MapQuery(anchorset = query.anchor, - reference = bridge.reference.set@bridge, + reference = extended.reference@bridge, query = query, store.weights = TRUE ) } bridge_anchor <- FindBridgeAnchor( - object.list = list(bridge.reference.set@reference, query), - bridge.object = bridge.reference.set@bridge, + object.list = list(extended.reference@reference, query), + bridge.object = extended.reference@bridge, object.reduction = c(reference.reduction, paste0('ref.', bridge.query.reduction)), bridge.reduction = c(bridge.ref.reduction, bridge.query.reduction), anchor.type = "Transfer", From 4868f64989388e82869047dd7f75330089e6b9db Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 1 Mar 2022 10:24:14 -0500 Subject: [PATCH 098/979] convert object.list to object --- R/integration.R | 141 ++++++++++++++----------------- man/IntegrateSketchEmbeddings.Rd | 13 ++- 2 files changed, 68 insertions(+), 86 deletions(-) diff --git a/R/integration.R b/R/integration.R index e53682ec2..cd6d5e804 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6676,8 +6676,8 @@ IntegrationReferenceIndex <- function(object) { #' First construct a sketch-cell representation for all cells and #' then use this and the batch-corrected embeddings of sketched cells to #' construct the batch-corrected embeddings for all cells +#' #' @param object A Seurat object with all cells for one dataset -#' @param sketch.list A list of Seurat objects with sketched cells #' @param sketch.object A sketched Seurat objects with integrated embeddings #' @param features Features used for sketch integration #' @param assay Assay name for raw expression @@ -6693,17 +6693,15 @@ IntegrationReferenceIndex <- function(object) { #' \item{data: Use data slot} #' } #' @param sketch.ratio Sketch ratio of data slot when dictionary.method is set to sketch -#' @param merged.object A merged seurat object containing all cells #' @param verbose Print progress and message #' -#' +#' @return Returns a Seurat object with an integrated dimensional reduction #' @importFrom MASS ginv #' @importFrom Matrix t #' @export IntegrateSketchEmbeddings <- function( - object = NULL, - sketch.list, + object, sketch.object, features = NULL, assay = 'RNA', @@ -6712,15 +6710,17 @@ IntegrateSketchEmbeddings <- function( reduction.key = 'PCcorrect_', dictionary.method = c('sketch', 'data')[1], sketch.ratio = 0.8, - merged.object = NULL, verbose = TRUE) { # check features features <- features %||%rownames(x = Loadings(object = sketch.object[[sketch.reduction]])) features <- intersect(features, rownames(object)) # check cell names - cells.sketch.list <- unlist(lapply(X = sketch.list, function(x) Cells(x) )) - if (length(x = setdiff(x = cells.sketch.list, y = Cells(sketch.object))) != 0) { - stop("Cells name in object.list are the same with Cells in sketch.object.") + cells.sketch <- intersect(x = Cells(sketch.object), y = Cells(object)) + if (length(x = cells.sketch) == 0) { + stop("Cell names in object are the same with those in sketch.object.") + } + if (verbose) { + message(length(cells.sketch),' atomic cells are identified in the sketch.object') } my.lapply <- ifelse( test = verbose && nbrOfWorkers() == 1, @@ -6730,84 +6730,69 @@ IntegrateSketchEmbeddings <- function( if (verbose) { message("Correcting embeddings") } - emb.list <- my.lapply( - X = 1:length(sketch.list), - FUN = - function(q) { - q.cells <- Cells(x = sketch.list[[q]]) - emb <- switch( - EXPR = dictionary.method, - 'data' = { - exp.mat <- t( - x = as.matrix( - x = GetAssayData( - sketch.object[[assay]], - slot = 'data' - )[features,q.cells] - ) - ) - sketch.transform <- ginv(X = exp.mat) %*% - Embeddings(object = sketch.object[[sketch.reduction]])[q.cells ,] - emb <- as.matrix( - x = t( - x = GetAssayData( - object = object.list[[q]], - slot = 'data')[features,] - ) %*% - sketch.transform - ) - emb - }, - 'sketch' = { - R <- t( - x = CountSketch( - nrow = round(sketch.ratio * length(x = features)), ncol = length(x = features) - ) - ) - exp.mat <- as.matrix( - x = t( - x = GetAssayData( - sketch.object[[assay]], - slot = 'data')[features,q.cells] - ) %*% - R - ) - sketch.transform <- ginv(X = exp.mat) %*% - Embeddings(object = sketch.object[[sketch.reduction]])[q.cells ,] - emb <- as.matrix( - x = ( - t( - x = GetAssayData( - object = object.list[[q]], - slot = 'data')[features,] - ) %*% - R) %*% - sketch.transform - ) - emb - } + emb <- switch( + EXPR = dictionary.method, + 'data' = { + exp.mat <- t( + x = as.matrix( + x = GetAssayData( + sketch.object[[assay]], + slot = 'data' + )[features, cells.sketch] + ) + ) + sketch.transform <- ginv(X = exp.mat) %*% + Embeddings(object = sketch.object[[sketch.reduction]])[cells.sketch ,] + emb <- as.matrix( + x = t( + x = GetAssayData( + object = object, + slot = 'data')[features,] + ) %*% + sketch.transform ) - return(emb) + emb + }, + 'sketch' = { + R <- t( + x = CountSketch( + nrow = round(sketch.ratio * length(x = features)), ncol = length(x = features) + ) + ) + exp.mat <- as.matrix( + x = t( + x = GetAssayData( + sketch.object[[assay]], + slot = 'data')[features,cells.sketch] + ) %*% + R + ) + sketch.transform <- ginv(X = exp.mat) %*% + Embeddings(object = sketch.object[[sketch.reduction]])[cells.sketch ,] + emb <- as.matrix( + x = ( + t( + x = GetAssayData( + object = object, + slot = 'data')[features,] + ) %*% + R) %*% + sketch.transform + ) + emb } - ) - emb.m <- Reduce(f = rbind, x = emb.list) - correct.dr <- CreateDimReducObject( - embeddings = as.matrix(emb.m), + ) + object[[reduction.name]] <- CreateDimReducObject( + embeddings = as.matrix(emb), loadings = Loadings(sketch.object[[sketch.reduction]])[features,], key = reduction.key, assay = assay ) - if (is.null(x = merged.object)) { - if (verbose) { - message("Merging all objects") - } - merged.object <- merge(x = object.list[[1]], y = object.list[2:length(object.list)]) - } - merged.object[[reduction.name]] <- correct.dr - return(merged.object) + return(object) } + ProjectDataEmbeddings <- function(object, assay = 'RNA', feature.loadings, diff --git a/man/IntegrateSketchEmbeddings.Rd b/man/IntegrateSketchEmbeddings.Rd index c0862c51b..d6fae40e8 100644 --- a/man/IntegrateSketchEmbeddings.Rd +++ b/man/IntegrateSketchEmbeddings.Rd @@ -8,8 +8,7 @@ then use this and the batch-corrected embeddings of sketched cells to construct the batch-corrected embeddings for all cells} \usage{ IntegrateSketchEmbeddings( - object.list, - sketch.list, + object, sketch.object, features = NULL, assay = "RNA", @@ -18,14 +17,11 @@ IntegrateSketchEmbeddings( reduction.key = "PCcorrect_", dictionary.method = c("sketch", "data")[1], sketch.ratio = 0.8, - merged.object = NULL, verbose = TRUE ) } \arguments{ -\item{object.list}{A list of Seurat objects with all cells} - -\item{sketch.list}{A list of Seurat objects with sketched cells} +\item{object}{A Seurat object with all cells for one dataset} \item{sketch.object}{A sketched Seurat objects with integrated embeddings} @@ -50,10 +46,11 @@ for all cells. sketch by default. Can be one of: \item{sketch.ratio}{Sketch ratio of data slot when dictionary.method is set to sketch} -\item{merged.object}{A merged seurat object containing all cells} - \item{verbose}{Print progress and message} } +\value{ +Returns a Seurat object with an integrated dimensional reduction +} \description{ Integrate embeddings from batch-corrected sketch cell embeddings First construct a sketch-cell representation for all cells and From 05940c697049715b53c1b59bffb581770178eb44 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 1 Mar 2022 11:48:52 -0500 Subject: [PATCH 099/979] uodate docu --- R/integration.R | 204 ++++++++++++++----------------- man/BridgeCellsRepresentation.Rd | 25 ++-- man/FindBridgeAnchor.Rd | 17 ++- man/IntegrateSketchEmbeddings.Rd | 26 ++-- man/LeverageScoreSampling.Rd | 8 +- 5 files changed, 122 insertions(+), 158 deletions(-) diff --git a/R/integration.R b/R/integration.R index cd6d5e804..8b1532b09 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5657,11 +5657,11 @@ NNtoGraph <- function( # Find Anchor direct from assay # # -# @return Returns an Integration or TranserAnchor set +# @return Returns a TranserAnchor or Integration set FindAssayAnchor <- function( object.list, reference = NULL, - anchor.type = c("Integration", "Transfer")[1], + anchor.type = c("Transfer", "Integration"), assay = "Bridge", slot = "data", reduction = NULL, @@ -5669,6 +5669,7 @@ FindAssayAnchor <- function( k.score = 50, verbose = TRUE ) { + anchor.type <- match.arg(arg = anchor.type) reduction.name <- reduction %||% paste0(assay, ".reduc") if ( is.null(x = reduction) || !reduction %in% Reductions(object.list[[1]])) { object.list <- lapply(object.list, function(x) { @@ -5761,22 +5762,28 @@ FindAssayAnchor <- function( } -#' Use bridge cells to represent single-modality object +#' Constructing a dictionary representation for each unimodal dataset #' -#' @inheritParams FindBridgeAnchor -#' @param return.all.assays if return all assays in the object.list. -#' @param l2.norm if l2 normalize dictionary representation -#' @param do.center if center dictionary representation #' +#' @param object.list A list of Seurat objects +#' @param bridge.object A multi-omic bridge Seurat which is used as the basis to +#' represent unimodal datasets +#' @param object.reduction A list of dimensional reductions from object.list used +#' to be reconstructed by bridge.object. +#' @param bridge.reduction A list of dimensional reductions from bridge.object used +#' to reconstruct object.reduction. +#' @param laplacian.reduction Name of bridge graph laplacian dimensional reduction +#' @param laplacian.dims Dimensions used for bridge graph laplacian dimensional reduction +#' @param bridge.assay.name Assay name used for bridge object reconstruction value +#' @param return.all.assays if return all assays in the object.list. #' Only bridge assay is returned by default. -#' +#' @param l2.norm Determine if l2 normalize dictionary representation. #' #' @importFrom MASS ginv #' @return Returns a object list in which each object has a bridge cell derived assay #' @export #' - BridgeCellsRepresentation <- function(object.list, bridge.object, object.reduction, @@ -5786,7 +5793,6 @@ BridgeCellsRepresentation <- function(object.list, bridge.assay.name = "Bridge", return.all.assays = FALSE, l2.norm = TRUE, - do.center = FALSE, verbose = TRUE ) { laplacian.dims <- laplacian.dims %||% 1:ncol(bridge.object[[laplacian.reduction]]) @@ -5838,7 +5844,6 @@ BridgeCellsRepresentation <- function(object.list, names(dims.list[[i]]) <- c('object', 'bridge') } } - object.list <- my.lapply( X = 1:length(x = object.list), FUN = function(x) { @@ -5871,12 +5876,6 @@ BridgeCellsRepresentation <- function(object.list, object.list[[x]][[bridge.assay.name]]@misc$SA.inv <- SA.inv DefaultAssay(object.list[[x]]) <- bridge.assay.name VariableFeatures(object = object.list[[x]]) <- rownames(object.list[[x]]) - object.list[[x]] <- ScaleData( - object = object.list[[x]], - do.scale = FALSE, - do.center = do.center, - verbose = FALSE - ) return (object.list[[x]]) } ) @@ -5895,7 +5894,7 @@ BridgeCellsRepresentation <- function(object.list, return(object.list) } -#' Find bridge anchors between two modalities objects +#' Find bridge anchors between two unimodal datasets #' #' First, bridge object is used to reconstruct two single-modality profiles and #' then project those cells into bridage graph laplacian space. @@ -5910,14 +5909,7 @@ BridgeCellsRepresentation <- function(object.list, #' } #' } #' -#' @param object.list A list of Seurat objects between which to -#' find anchors for downstream integration. -#' @param bridge.object A multimodal bridge seurat which connects two -#' single-modality objects -#' @param object.reduction A list of dimensional reductions from object.list used -#' to be reconstructed by bridge.obejct -#' @param bridge.reduction A list of dimensional reductions from bridge.object used -#' to reconstruct object.reduction +#' @inheritParams BridgeCellsRepresentation #' @param anchor.type The type of anchors. Can #' be one of: #' \itemize{ @@ -5926,15 +5918,12 @@ BridgeCellsRepresentation <- function(object.list, #' } #' @param reference A vector specifying the object/s to be used as a reference #' during integration or transfer data. -#' @param laplacian.reduction Name of bridge graph laplacian dimensional reduction -#' @param laplacian.dims Dimensions used for bridge graph laplacian dimensional reduction #' @param reduction Dimensional reduction to perform when finding anchors. Can #' be one of: #' \itemize{ #' \item{cca: Canonical correlation analysis} #' \item{direct: Use assay data as a dimensional reduction} #' } -#' @param bridge.assay.name Assay name used for bridge object reconstruction value #' @param reference.bridge.stored If refernece has stored the bridge dictionary representation #' @param k.anchor How many neighbors (k) to use when picking anchors #' @param k.score How many neighbors (k) to use when scoring anchors @@ -5951,11 +5940,11 @@ FindBridgeAnchor <- function(object.list, bridge.object, object.reduction, bridge.reduction, - anchor.type = c("Integration", "Transfer")[1], + anchor.type = c("Transfer", "Integration"), reference = NULL, laplacian.reduction = "lap", laplacian.dims = NULL, - reduction = c("direct", "cca")[1], + reduction = c("direct", "cca"), bridge.assay.name = "Bridge", reference.bridge.stored = FALSE, k.anchor = 20, @@ -5963,7 +5952,8 @@ FindBridgeAnchor <- function(object.list, verbose = TRUE, ... ) { - + anchor.type <- match.arg(arg = anchor.type) + reduction <- match.arg(arg = reduction) if (!is.null(laplacian.reduction)) { bridge.method <- "bridge graph" } else { @@ -5997,7 +5987,6 @@ FindBridgeAnchor <- function(object.list, stored.bridge.weights <- TRUE } } - bridge.reduction.name <- paste0(bridge.assay.name, ".reduc") if (reference.bridge.stored) { object.list[[query]] <- BridgeCellsRepresentation( @@ -6022,7 +6011,6 @@ FindBridgeAnchor <- function(object.list, verbose = verbose ) } - if (reduction == "direct") { anchor <- FindAssayAnchor( object.list = object.list , @@ -6035,6 +6023,19 @@ FindBridgeAnchor <- function(object.list, verbose = verbose ) } else if (reduction == "cca") { + # set data slot to scale.data slot + object.list <- lapply( + X = object.list, + FUN = function(x) { + x <- SetAssayData( + object = x, + slot = "scale.data", + new.data = as.matrix( + x = GetAssayData(object = x, slot = "data") + )) + return(x) + } + ) anchor <- switch(EXPR = anchor.type, "Integration" = { anchor <- FindIntegrationAnchors( @@ -6089,7 +6090,6 @@ FindBridgeAnchor <- function(object.list, ) } } - slot(object = anchor, name = "command") <- LogSeuratCommand( object = object.list[[1]], return.command = TRUE @@ -6151,8 +6151,7 @@ TransferExpressionNN<- function( nn.object, reference.object, var.name = NULL -){ - +) { nn.matrix <- NNtoGraph(nn.object = nn.object, col.cells = Cells(reference.object) ) @@ -6299,33 +6298,6 @@ LiProj <- function(nrow, ncol, eps = 0.1, seed = NA) { } -LiProj <- function(nrow, ncol, eps = 0.1, seed = NA) { - if (!is.na(x = seed)) { - set.seed(seed = seed) - } - if (!is.null(x = eps)) { - if (eps > 1 || eps <= 0) { - stop("'eps' must be 0 < eps <= 1") - } - ncol <- floor(x = 4 * log(x = ncol) / ((eps ^ 2) / 2 - (eps ^ 3 / 3))) - } - s <- ceiling(x = sqrt(x = ncol)) - prob <- c( - 1 / (2 * s), - 1 - (1 / s), - 1 / (2 * s) - ) - return(matrix( - data = sample( - x = seq.int(from = -1L, to = 1L), - size = nrow * ncol, - replace = TRUE, - prob = prob - ), - nrow = nrow - )) -} - JLEmbed <- function(nrow, ncol, eps = 0.1, seed = NA, method = "li") { if (!is.na(x = seed)) { set.seed(seed = seed) @@ -6454,10 +6426,7 @@ LeverageScore.default <- function( } # triangular matrix inverse - R.inv <- as.sparse(backsolve(r = R , x = diag(ncol(R)))) - - if (isTRUE(x = verbose)) { message("Random projection") } @@ -6471,8 +6440,7 @@ LeverageScore.default <- function( return(rowSums(x = Z ^ 2)) } -#' ssssxxxxx -#' + #' @rdname LeverageScore #' @export #' @method LeverageScore Assay @@ -6571,9 +6539,9 @@ CheckMetaVarName <- function(object, var.name) { } -#' Subset objects based on Leverage score +#' Sampling cells from objects based on Leverage score #' -#' @param object A seurat object +#' @param object A Seurat object #' @param num.cells Number of sampled cells #' @param assay Assay used to calculate leverage score #' @param features Features used to calculate leverage score @@ -6582,7 +6550,7 @@ CheckMetaVarName <- function(object, var.name) { #' @param seed Set a random seed.By default, sets the seed to 123 #' @param ... Arguments passed to LeverageScore #' -#' @return Returns a sub-sampled seurat object +#' @return Returns a subset Seurat object with sampled cells #' @export #' LeverageScoreSampling <- function( @@ -6672,16 +6640,17 @@ IntegrationReferenceIndex <- function(object) { return(reference.index) } -#' Integrate embeddings from batch-corrected sketch cell embeddings -#' First construct a sketch-cell representation for all cells and -#' then use this and the batch-corrected embeddings of sketched cells to -#' construct the batch-corrected embeddings for all cells +#' Integrate embeddings from the integrated atoms +#' +#' First learn a atom dictionary representation to reconstruct each cell. +#' Then, relying on this dictionary representation, +#' reconstruct the embeddings of each cell from the integrated atoms. #' #' @param object A Seurat object with all cells for one dataset -#' @param sketch.object A sketched Seurat objects with integrated embeddings -#' @param features Features used for sketch integration -#' @param assay Assay name for raw expression -#' @param sketch.reduction Dimensional reduction name for batch-corrected embeddings +#' @param atom.sketch.object A sketched Seurat objects with integrated embeddings +#' @param features Features used for atomic sketch integration +#' @param assay Assay name for original expression +#' @param atom.sketch.reduction Dimensional reduction name for batch-corrected embeddings #' in the sketched object #' @param reduction.name dimensional reduction name, pca.correct by default #' @param reduction.key dimensional reduction key, specifies the string before @@ -6702,26 +6671,28 @@ IntegrationReferenceIndex <- function(object) { IntegrateSketchEmbeddings <- function( object, - sketch.object, + atom.sketch.object, features = NULL, assay = 'RNA', - sketch.reduction = 'integrated_dr', + atom.sketch.reduction = 'integrated_dr', reduction.name ='pca.correct', reduction.key = 'PCcorrect_', - dictionary.method = c('sketch', 'data')[1], + dictionary.method = c('sketch', 'data'), sketch.ratio = 0.8, - verbose = TRUE) { + verbose = TRUE + ) { + dictionary.method <- match.arg(arg = dictionary.method) # check features - features <- features %||%rownames(x = Loadings(object = sketch.object[[sketch.reduction]])) + features <- features %||%rownames(x = Loadings(object = atom.sketch.object[[atom.sketch.reduction]])) features <- intersect(features, rownames(object)) # check cell names - cells.sketch <- intersect(x = Cells(sketch.object), y = Cells(object)) - if (length(x = cells.sketch) == 0) { - stop("Cell names in object are the same with those in sketch.object.") - } - if (verbose) { - message(length(cells.sketch),' atomic cells are identified in the sketch.object') - } + cells.sketch <- intersect(x = Cells(atom.sketch.object), y = Cells(object)) + if (length(x = cells.sketch) == 0) { + stop("Cell names in object are the same with those in atom.sketch.object.") + } + if (verbose) { + message(length(cells.sketch),' atomic cells are identified in the atom.sketch.object') + } my.lapply <- ifelse( test = verbose && nbrOfWorkers() == 1, yes = pblapply, @@ -6736,13 +6707,13 @@ IntegrateSketchEmbeddings <- function( exp.mat <- t( x = as.matrix( x = GetAssayData( - sketch.object[[assay]], + atom.sketch.object[[assay]], slot = 'data' )[features, cells.sketch] ) ) sketch.transform <- ginv(X = exp.mat) %*% - Embeddings(object = sketch.object[[sketch.reduction]])[cells.sketch ,] + Embeddings(object = atom.sketch.object[[atom.sketch.reduction]])[cells.sketch ,] emb <- as.matrix( x = t( x = GetAssayData( @@ -6762,13 +6733,13 @@ IntegrateSketchEmbeddings <- function( exp.mat <- as.matrix( x = t( x = GetAssayData( - sketch.object[[assay]], + atom.sketch.object[[assay]], slot = 'data')[features,cells.sketch] ) %*% R ) sketch.transform <- ginv(X = exp.mat) %*% - Embeddings(object = sketch.object[[sketch.reduction]])[cells.sketch ,] + Embeddings(object = atom.sketch.object[[atom.sketch.reduction]])[cells.sketch ,] emb <- as.matrix( x = ( t( @@ -6784,10 +6755,10 @@ IntegrateSketchEmbeddings <- function( ) object[[reduction.name]] <- CreateDimReducObject( embeddings = as.matrix(emb), - loadings = Loadings(sketch.object[[sketch.reduction]])[features,], + loadings = Loadings(atom.sketch.object[[atom.sketch.reduction]])[features,], key = reduction.key, assay = assay - ) + ) return(object) } @@ -7062,11 +7033,15 @@ ProjectDimReduc <- function(query, } -#' Prepare the multi-omic bridge datasets and unimodal reference -#' First perform within-modality harmonization of bridge and reference -#' then perform dimensional reduction on the SNN graph of bridge datasets +#' Prepare the bridge and reference datasets +#' +#' Preprocess the multi-omic bridge and unimodel reference datasets into +#' an extended reference. +#' First perform within-modality harmonization between bridge and reference. +#' Then perform dimensional reduction on the SNN graph of bridge datasets #' via Laplacian Eigendecomposition #' Lastly, construct a bridge dictionary representation for unimodal reference cells +#' #' @param reference A reference Seurat object #' @param bridge A multi-omic bridge Seurat object #' @param reference.reduction Name of dimensional reduction of the reference object @@ -7074,9 +7049,8 @@ ProjectDimReduc <- function(query, #' @param normalization.method Name of normalization method used: LogNormalize #' or SCT #' @param reference.assay Assay name for reference -#' in the sketched object -#' @param bridge.ref.assay Assay name for bridge used for reference mapping -#' @param bridge.query.assay Assay name for bridge used for query mapping +#' @param bridge.ref.assay Assay name for bridge used for reference mapping. RNA by default +#' @param bridge.query.assay Assay name for bridge used for query mapping. ATAC by default #' @param supervised.reduction Determine if perform supervised dimensional reduction #' #' Options are: #' \itemize{ @@ -7091,11 +7065,10 @@ ProjectDimReduc <- function(query, #' bridge.query.reduction and supervised.reduction needs to set one #' @param bridge.query.features Features used for bridge query dimensional reduction #' @param laplacian.reduction.name Name of dimensional reduction name of graph laplacian eigenspace -#' @param laplacian.reduction.key dimensional reduction key -#' @param laplacian.reduction.dims Number of dimenions used for graph laplacian eigenspace +#' @param laplacian.reduction.key Dimensional reduction key +#' @param laplacian.reduction.dims Number of dimensions used for graph laplacian eigenspace #' @param verbose Print progress and message #' -#'#' @return #' @export #' @return Returns a \code{BridgeReferenceSet} that can be used as input to #' \code{\link{FindBridgeTransferAnchors}}. @@ -7110,7 +7083,7 @@ PrepareBridgeReference <- function ( reference.assay = NULL, bridge.ref.assay = 'RNA', bridge.query.assay = 'ATAC', - supervised.reduction = c('slsi', 'spca', NULL )[1], + supervised.reduction = c('slsi', 'spca', NULL ), bridge.query.reduction = NULL, bridge.query.features = NULL, laplacian.reduction.name = 'lap', @@ -7119,6 +7092,7 @@ PrepareBridgeReference <- function ( verbose = TRUE ) { ## checking + supervised.reduction <- match.arg(arg = supervised.reduction) if (!is.null(x = bridge.query.reduction) & !is.null(x = supervised.reduction)) { stop('bridge.query.reduction and supervised.reduction can only set one') } @@ -7134,7 +7108,7 @@ PrepareBridgeReference <- function ( # modality harmonization reference.assay <- reference.assay %||% DefaultAssay(reference) DefaultAssay(reference) <- reference.assay - DefaultAssay(bridge) <- bridge.ref.assay + DefaultAssay(bridge) <- bridge.ref.assay ref.anchor <- FindTransferAnchors( reference = reference, reference.reduction = reference.reduction, @@ -7215,11 +7189,12 @@ PrepareBridgeReference <- function ( } -#' Find bridge anchors between unimodal query and the other unimodal reference +#' Find bridge anchors between query and extended bridge-reference +#' +#' Find a set of anchors between unimodal query and the other unimodal reference #' using a pre-computed \code{\link{BridgeReferenceSet}}. -#' #' First, harmonized the bridge and query cells in the bridge query reduction space. -#' Then, constructe the bridge dictionary representations for query cells. +#' Then, construct the bridge dictionary representations for query cells. #' Next, find a set of anchors between query and reference in the bridge graph laplacian eigenspace. #' These anchors can later be used to integrate embeddings or transfer data from the reference to #' query object using the \code{\link{MapQuery}} object. @@ -7253,9 +7228,10 @@ FindBridgeTransferAnchors <- function( query, query.assay = NULL, dims, - reduction = c('lsiproject', 'pcaproject')[1], + reduction = c('lsiproject', 'pcaproject'), verbose = TRUE -){ +) { + reduction <- match.arg(arg = reduction) query.assay <- query.assay %||% DefaultAssay(query) DefaultAssay(query) <- query.assay params <- slot(object = extended.reference, name = "params") diff --git a/man/BridgeCellsRepresentation.Rd b/man/BridgeCellsRepresentation.Rd index ec4c6964d..a1a94c8fb 100644 --- a/man/BridgeCellsRepresentation.Rd +++ b/man/BridgeCellsRepresentation.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/integration.R \name{BridgeCellsRepresentation} \alias{BridgeCellsRepresentation} -\title{Use bridge cells to represent single-modality object} +\title{Constructing a dictionary representation for each unimodal dataset} \usage{ BridgeCellsRepresentation( object.list, @@ -14,22 +14,20 @@ BridgeCellsRepresentation( bridge.assay.name = "Bridge", return.all.assays = FALSE, l2.norm = TRUE, - do.center = FALSE, verbose = TRUE ) } \arguments{ -\item{object.list}{A list of Seurat objects between which to -find anchors for downstream integration.} +\item{object.list}{A list of Seurat objects} -\item{bridge.object}{A multimodal bridge seurat which connects two -single-modality objects} +\item{bridge.object}{A multi-omic bridge Seurat which is used as the basis to +represent unimodal datasets} \item{object.reduction}{A list of dimensional reductions from object.list used -to be reconstructed by bridge.obejct} +to be reconstructed by bridge.object.} \item{bridge.reduction}{A list of dimensional reductions from bridge.object used -to reconstruct object.reduction} +to reconstruct object.reduction.} \item{laplacian.reduction}{Name of bridge graph laplacian dimensional reduction} @@ -37,19 +35,14 @@ to reconstruct object.reduction} \item{bridge.assay.name}{Assay name used for bridge object reconstruction value} -\item{return.all.assays}{if return all assays in the object.list.} - -\item{l2.norm}{if l2 normalize dictionary representation} - -\item{do.center}{if center dictionary representation - +\item{return.all.assays}{if return all assays in the object.list. Only bridge assay is returned by default.} -\item{verbose}{Print messages and progress} +\item{l2.norm}{Determine if l2 normalize dictionary representation.} } \value{ Returns a object list in which each object has a bridge cell derived assay } \description{ -Use bridge cells to represent single-modality object +Constructing a dictionary representation for each unimodal dataset } diff --git a/man/FindBridgeAnchor.Rd b/man/FindBridgeAnchor.Rd index 8b715ba18..0e77c1c98 100644 --- a/man/FindBridgeAnchor.Rd +++ b/man/FindBridgeAnchor.Rd @@ -2,18 +2,18 @@ % Please edit documentation in R/integration.R \name{FindBridgeAnchor} \alias{FindBridgeAnchor} -\title{Find bridge anchors between two modalities objects} +\title{Find bridge anchors between two unimodal datasets} \usage{ FindBridgeAnchor( object.list, bridge.object, object.reduction, bridge.reduction, - anchor.type = c("Integration", "Transfer")[1], + anchor.type = c("Transfer", "Integration"), reference = NULL, laplacian.reduction = "lap", laplacian.dims = NULL, - reduction = c("direct", "cca")[1], + reduction = c("direct", "cca"), bridge.assay.name = "Bridge", reference.bridge.stored = FALSE, k.anchor = 20, @@ -23,17 +23,16 @@ FindBridgeAnchor( ) } \arguments{ -\item{object.list}{A list of Seurat objects between which to -find anchors for downstream integration.} +\item{object.list}{A list of Seurat objects} -\item{bridge.object}{A multimodal bridge seurat which connects two -single-modality objects} +\item{bridge.object}{A multi-omic bridge Seurat which is used as the basis to +represent unimodal datasets} \item{object.reduction}{A list of dimensional reductions from object.list used -to be reconstructed by bridge.obejct} +to be reconstructed by bridge.object.} \item{bridge.reduction}{A list of dimensional reductions from bridge.object used -to reconstruct object.reduction} +to reconstruct object.reduction.} \item{anchor.type}{The type of anchors. Can be one of: diff --git a/man/IntegrateSketchEmbeddings.Rd b/man/IntegrateSketchEmbeddings.Rd index d6fae40e8..a6b4c272c 100644 --- a/man/IntegrateSketchEmbeddings.Rd +++ b/man/IntegrateSketchEmbeddings.Rd @@ -2,20 +2,17 @@ % Please edit documentation in R/integration.R \name{IntegrateSketchEmbeddings} \alias{IntegrateSketchEmbeddings} -\title{Integrate embeddings from batch-corrected sketch cell embeddings -First construct a sketch-cell representation for all cells and -then use this and the batch-corrected embeddings of sketched cells to -construct the batch-corrected embeddings for all cells} +\title{Integrate embeddings from the integrated atoms} \usage{ IntegrateSketchEmbeddings( object, - sketch.object, + atom.sketch.object, features = NULL, assay = "RNA", - sketch.reduction = "integrated_dr", + atom.sketch.reduction = "integrated_dr", reduction.name = "pca.correct", reduction.key = "PCcorrect_", - dictionary.method = c("sketch", "data")[1], + dictionary.method = c("sketch", "data"), sketch.ratio = 0.8, verbose = TRUE ) @@ -23,13 +20,13 @@ IntegrateSketchEmbeddings( \arguments{ \item{object}{A Seurat object with all cells for one dataset} -\item{sketch.object}{A sketched Seurat objects with integrated embeddings} +\item{atom.sketch.object}{A sketched Seurat objects with integrated embeddings} -\item{features}{Features used for sketch integration} +\item{features}{Features used for atomic sketch integration} -\item{assay}{Assay name for raw expression} +\item{assay}{Assay name for original expression} -\item{sketch.reduction}{Dimensional reduction name for batch-corrected embeddings +\item{atom.sketch.reduction}{Dimensional reduction name for batch-corrected embeddings in the sketched object} \item{reduction.name}{dimensional reduction name, pca.correct by default} @@ -52,8 +49,7 @@ for all cells. sketch by default. Can be one of: Returns a Seurat object with an integrated dimensional reduction } \description{ -Integrate embeddings from batch-corrected sketch cell embeddings -First construct a sketch-cell representation for all cells and -then use this and the batch-corrected embeddings of sketched cells to -construct the batch-corrected embeddings for all cells +First learn a atom dictionary representation to reconstruct each cell. +Then, relying on this dictionary representation, +reconstruct the embeddings of each cell from the integrated atoms. } diff --git a/man/LeverageScoreSampling.Rd b/man/LeverageScoreSampling.Rd index fdcd7ef9a..d87ee767d 100644 --- a/man/LeverageScoreSampling.Rd +++ b/man/LeverageScoreSampling.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/integration.R \name{LeverageScoreSampling} \alias{LeverageScoreSampling} -\title{Subset objects based on Leverage score} +\title{Sampling cells from objects based on Leverage score} \usage{ LeverageScoreSampling( object, @@ -16,7 +16,7 @@ LeverageScoreSampling( ) } \arguments{ -\item{object}{A seurat object} +\item{object}{A Seurat object} \item{num.cells}{Number of sampled cells} @@ -33,8 +33,8 @@ LeverageScoreSampling( \item{...}{Arguments passed to LeverageScore} } \value{ -Returns a sub-sampled seurat object +Returns a subset Seurat object with sampled cells } \description{ -Subset objects based on Leverage score +Sampling cells from objects based on Leverage score } From 95b707617646b1dcb3dba5f4457308b31955ff91 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 1 Mar 2022 14:06:03 -0500 Subject: [PATCH 100/979] fix docu --- R/integration.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/integration.R b/R/integration.R index 8b1532b09..59b0b6e52 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5778,6 +5778,7 @@ FindAssayAnchor <- function( #' @param return.all.assays if return all assays in the object.list. #' Only bridge assay is returned by default. #' @param l2.norm Determine if l2 normalize dictionary representation. +#' @param verbose Print messages and progress #' #' @importFrom MASS ginv #' @return Returns a object list in which each object has a bridge cell derived assay From b10682db55002007fa2a6d31f31fdb89fef0c9bc Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 1 Mar 2022 14:48:16 -0500 Subject: [PATCH 101/979] fix integration --- R/integration.R | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/R/integration.R b/R/integration.R index 59b0b6e52..6676d1b52 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5988,7 +5988,6 @@ FindBridgeAnchor <- function(object.list, stored.bridge.weights <- TRUE } } - bridge.reduction.name <- paste0(bridge.assay.name, ".reduc") if (reference.bridge.stored) { object.list[[query]] <- BridgeCellsRepresentation( object.list = object.list[[query]] , @@ -6049,12 +6048,20 @@ FindBridgeAnchor <- function(object.list, k.score = k.score, verbose = verbose, ...) + object.merge <- merge(x = object.list[[1]], + y = object.list[2:length(object.list)] + ) slot( object = anchor, name = "weight.reduction" - ) <- merge(object.list[[1]][[bridge.reduction.name]], - object.list[[2]][[bridge.reduction.name]] - ) + ) <- CreateDimReducObject( + embeddings = t(GetAssayData( + object = object.merge, + slot = 'data' + )), + key = "L_", + assay = bridge.assay.name + ) anchor }, "Transfer" = { From 79e1ff9526d27b3bc23fecc1a8c5cd43eaa1ebb0 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Tue, 1 Mar 2022 15:39:11 -0500 Subject: [PATCH 102/979] Document improvements: PrepareBridgeReference --- R/integration.R | 95 ++++++++++++++++---------------- man/BridgeCellsRepresentation.Rd | 4 +- man/IntegrateSketchEmbeddings.Rd | 2 +- 3 files changed, 53 insertions(+), 48 deletions(-) diff --git a/R/integration.R b/R/integration.R index 6676d1b52..7cfb109f1 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5764,7 +5764,7 @@ FindAssayAnchor <- function( #' Constructing a dictionary representation for each unimodal dataset #' -#' +#' #' @param object.list A list of Seurat objects #' @param bridge.object A multi-omic bridge Seurat which is used as the basis to #' represent unimodal datasets @@ -5775,7 +5775,7 @@ FindAssayAnchor <- function( #' @param laplacian.reduction Name of bridge graph laplacian dimensional reduction #' @param laplacian.dims Dimensions used for bridge graph laplacian dimensional reduction #' @param bridge.assay.name Assay name used for bridge object reconstruction value -#' @param return.all.assays if return all assays in the object.list. +#' @param return.all.assays if return all assays in the object.list. #' Only bridge assay is returned by default. #' @param l2.norm Determine if l2 normalize dictionary representation. #' @param verbose Print messages and progress @@ -6363,7 +6363,7 @@ LeverageScore.default <- function( MARGIN = 2L, eps = 0.5, seed = 123, - verbose = TRUE, + verbose = TRUE, ... ) { features <- features %||% rownames(x = object) @@ -6649,11 +6649,11 @@ IntegrationReferenceIndex <- function(object) { } #' Integrate embeddings from the integrated atoms -#' +#' #' First learn a atom dictionary representation to reconstruct each cell. -#' Then, relying on this dictionary representation, -#' reconstruct the embeddings of each cell from the integrated atoms. -#' +#' Then, relying on this dictionary representation, +#' reconstruct the embeddings of each cell from the integrated atoms. +#' #' @param object A Seurat object with all cells for one dataset #' @param atom.sketch.object A sketched Seurat objects with integrated embeddings #' @param features Features used for atomic sketch integration @@ -6697,7 +6697,7 @@ IntegrateSketchEmbeddings <- function( cells.sketch <- intersect(x = Cells(atom.sketch.object), y = Cells(object)) if (length(x = cells.sketch) == 0) { stop("Cell names in object are the same with those in atom.sketch.object.") - } + } if (verbose) { message(length(cells.sketch),' atomic cells are identified in the atom.sketch.object') } @@ -7042,24 +7042,26 @@ ProjectDimReduc <- function(query, #' Prepare the bridge and reference datasets -#' -#' Preprocess the multi-omic bridge and unimodel reference datasets into +#' +#' Preprocess the multi-omic bridge and unimodal reference datasets into #' an extended reference. -#' First perform within-modality harmonization between bridge and reference. -#' Then perform dimensional reduction on the SNN graph of bridge datasets -#' via Laplacian Eigendecomposition -#' Lastly, construct a bridge dictionary representation for unimodal reference cells -#' +#' This function performs the following three steps: +#' 1. Performs within-modality harmonization between bridge and reference +#' 2. Performs dimensional reduction on the SNN graph of bridge datasets via +#' Laplacian Eigendecomposition +#' 3. Constructs a bridge dictionary representation for unimodal reference cells +#' #' @param reference A reference Seurat object #' @param bridge A multi-omic bridge Seurat object -#' @param reference.reduction Name of dimensional reduction of the reference object -#' @param reference.dims Number of dimensions used for the reference.reduction +#' @param reference.reduction Name of dimensional reduction of the reference object (default is 'pca') +#' @param reference.dims Number of dimensions used for the reference.reduction (default is 50) #' @param normalization.method Name of normalization method used: LogNormalize #' or SCT -#' @param reference.assay Assay name for reference +#' @param reference.assay Assay name for reference (default is \code{\link{DefaultAssay}}) #' @param bridge.ref.assay Assay name for bridge used for reference mapping. RNA by default #' @param bridge.query.assay Assay name for bridge used for query mapping. ATAC by default -#' @param supervised.reduction Determine if perform supervised dimensional reduction +#' @param supervised.reduction Type of supervised dimensional reduction to be performed +#' for integrating the bridge and query. #' #' Options are: #' \itemize{ #' \item{slsi: Perform supervised LSI as the dimensional reduction for @@ -7069,19 +7071,20 @@ ProjectDimReduc <- function(query, #' \item{NULL: no supervised dimensional reduction will be calculated. #' bridge.query.reduction is used for the bridge-query integration} #' } -#' @param bridge.query.reduction Name of dimensions used for the query-bridge harmonization. -#' bridge.query.reduction and supervised.reduction needs to set one +#' @param bridge.query.reduction Name of dimensions used for the bridge-query harmonization. +#' Requires either 'bridge.query.reduction' or 'supervised.reduction' to be not NULL. #' @param bridge.query.features Features used for bridge query dimensional reduction -#' @param laplacian.reduction.name Name of dimensional reduction name of graph laplacian eigenspace -#' @param laplacian.reduction.key Dimensional reduction key -#' @param laplacian.reduction.dims Number of dimensions used for graph laplacian eigenspace -#' @param verbose Print progress and message +#' (default is NULL which uses VariableFeatures from the bridge object) +#' @param laplacian.reduction.name Name of dimensional reduction name of graph laplacian eigenspace (default is 'lap') +#' @param laplacian.reduction.key Dimensional reduction key (default is 'lap_') +#' @param laplacian.reduction.dims Number of dimensions used for graph laplacian eigenspace (default is 50) +#' @param verbose Print progress and message (default is TRUE) #' #' @export #' @return Returns a \code{BridgeReferenceSet} that can be used as input to #' \code{\link{FindBridgeTransferAnchors}}. #' The parameters used are stored in the \code{BridgeReferenceSet} as well -#' +#' PrepareBridgeReference <- function ( reference, bridge, @@ -7091,9 +7094,9 @@ PrepareBridgeReference <- function ( reference.assay = NULL, bridge.ref.assay = 'RNA', bridge.query.assay = 'ATAC', - supervised.reduction = c('slsi', 'spca', NULL ), - bridge.query.reduction = NULL, - bridge.query.features = NULL, + supervised.reduction = c('slsi', 'spca', NULL), + bridge.query.reduction = NULL, + bridge.query.features = NULL, laplacian.reduction.name = 'lap', laplacian.reduction.key = 'lap_', laplacian.reduction.dims = 1:50, @@ -7114,7 +7117,7 @@ PrepareBridgeReference <- function ( ' assay has no variable genes and bridge.query.features has no input') } # modality harmonization - reference.assay <- reference.assay %||% DefaultAssay(reference) + reference.assay <- reference.assay %||% DefaultAssay(reference) DefaultAssay(reference) <- reference.assay DefaultAssay(bridge) <- bridge.ref.assay ref.anchor <- FindTransferAnchors( @@ -7148,7 +7151,7 @@ PrepareBridgeReference <- function ( verbose = verbose) DefaultAssay(object = bridge) <- bridge.query.assay if (!is.null(supervised.reduction)) { - bridge <- switch(EXPR = supervised.reduction, + bridge <- switch(EXPR = supervised.reduction, 'slsi' = { bridge.reduc <- RunSLSI(object = bridge, features = VariableFeatures(bridge), @@ -7156,7 +7159,7 @@ PrepareBridgeReference <- function ( assay = bridge.query.assay ) bridge.reduc - }, + }, 'spca' = { bridge.reduc <- RunSPCA(object = bridge, features = VariableFeatures(bridge), @@ -7187,8 +7190,8 @@ PrepareBridgeReference <- function ( bridge.ref.assay = bridge.ref.assay, bridge.query.assay = bridge.query.assay, supervised.reduction = supervised.reduction, - bridge.ref.reduction = bridge.ref.reduction, - bridge.query.reduction = bridge.query.reduction, + bridge.ref.reduction = bridge.ref.reduction, + bridge.query.reduction = bridge.query.reduction, laplacian.reduction.name = laplacian.reduction.name, laplacian.reduction.dims = laplacian.reduction.dims ) @@ -7198,12 +7201,12 @@ PrepareBridgeReference <- function ( #' Find bridge anchors between query and extended bridge-reference -#' +#' #' Find a set of anchors between unimodal query and the other unimodal reference #' using a pre-computed \code{\link{BridgeReferenceSet}}. #' First, harmonized the bridge and query cells in the bridge query reduction space. -#' Then, construct the bridge dictionary representations for query cells. -#' Next, find a set of anchors between query and reference in the bridge graph laplacian eigenspace. +#' Then, construct the bridge dictionary representations for query cells. +#' Next, find a set of anchors between query and reference in the bridge graph laplacian eigenspace. #' These anchors can later be used to integrate embeddings or transfer data from the reference to #' query object using the \code{\link{MapQuery}} object. @@ -7230,8 +7233,8 @@ PrepareBridgeReference <- function ( #' @return Returns an \code{AnchorSet} object that can be used as input to #' \code{\link{TransferData}}, \code{\link{IntegrateEmbeddings}} and #' \code{\link{MapQuery}}. -#' -FindBridgeTransferAnchors <- function( +#' +FindBridgeTransferAnchors <- function( extended.reference, query, query.assay = NULL, @@ -7280,23 +7283,23 @@ FindBridgeTransferAnchors <- function( #' Perform integration on the joint PCA cell embeddings. -#' +#' #' This is a convenience wrapper function around the following three functions -#' that are often run together when perform integration. +#' that are often run together when perform integration. #' #' \code{\link{FindIntegrationAnchors}}, \code{\link{RunPCA}}, -#' \code{\link{IntegrateEmbeddings}}. -#' +#' \code{\link{IntegrateEmbeddings}}. +#' #' @inheritParams FindIntegrationAnchors #' @param new.reduction.name Name of integrated dimensional reduction #' @param npcs Total Number of PCs to compute and store (50 by default) #' @param findintegrationanchors.args A named list of additional arguments to #' \code{\link{FindIntegrationAnchors}} #' @param verbose Print messages and progress -#' +#' #' @importFrom rlang invoke #' @return Returns a Seurat object with integrated dimensional reduction #' @export -#' +#' FastAnchorIntegration <- function( object.list, reference = NULL, @@ -7309,7 +7312,7 @@ FastAnchorIntegration <- function( npcs = 50, findintegrationanchors.args = list(), verbose = TRUE -) { +) { my.lapply <- ifelse( test = verbose && nbrOfWorkers() == 1, yes = pblapply, diff --git a/man/BridgeCellsRepresentation.Rd b/man/BridgeCellsRepresentation.Rd index a1a94c8fb..374dff2db 100644 --- a/man/BridgeCellsRepresentation.Rd +++ b/man/BridgeCellsRepresentation.Rd @@ -35,10 +35,12 @@ to reconstruct object.reduction.} \item{bridge.assay.name}{Assay name used for bridge object reconstruction value} -\item{return.all.assays}{if return all assays in the object.list. +\item{return.all.assays}{if return all assays in the object.list. Only bridge assay is returned by default.} \item{l2.norm}{Determine if l2 normalize dictionary representation.} + +\item{verbose}{Print messages and progress} } \value{ Returns a object list in which each object has a bridge cell derived assay diff --git a/man/IntegrateSketchEmbeddings.Rd b/man/IntegrateSketchEmbeddings.Rd index a6b4c272c..bcd860ad3 100644 --- a/man/IntegrateSketchEmbeddings.Rd +++ b/man/IntegrateSketchEmbeddings.Rd @@ -50,6 +50,6 @@ Returns a Seurat object with an integrated dimensional reduction } \description{ First learn a atom dictionary representation to reconstruct each cell. -Then, relying on this dictionary representation, +Then, relying on this dictionary representation, reconstruct the embeddings of each cell from the integrated atoms. } From f0d59c33e2703d2b48003c1893ba3fcfa747c1a8 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Tue, 1 Mar 2022 15:57:55 -0500 Subject: [PATCH 103/979] Document improvements: BridgeCellsRepresentation --- R/integration.R | 19 ++++++++++--------- man/BridgeCellsRepresentation.Rd | 14 +++++++------- man/FindBridgeAnchor.Rd | 6 +++--- 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/R/integration.R b/R/integration.R index 7cfb109f1..8e707b60d 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5762,22 +5762,22 @@ FindAssayAnchor <- function( } -#' Constructing a dictionary representation for each unimodal dataset +#' Construct a dictionary representation for each unimodal dataset #' #' #' @param object.list A list of Seurat objects #' @param bridge.object A multi-omic bridge Seurat which is used as the basis to #' represent unimodal datasets #' @param object.reduction A list of dimensional reductions from object.list used -#' to be reconstructed by bridge.object. +#' to be reconstructed by bridge.object #' @param bridge.reduction A list of dimensional reductions from bridge.object used -#' to reconstruct object.reduction. +#' to reconstruct object.reduction #' @param laplacian.reduction Name of bridge graph laplacian dimensional reduction #' @param laplacian.dims Dimensions used for bridge graph laplacian dimensional reduction -#' @param bridge.assay.name Assay name used for bridge object reconstruction value -#' @param return.all.assays if return all assays in the object.list. +#' @param bridge.assay.name Assay name used for bridge object reconstruction value (default is 'Bridge') +#' @param return.all.assays Whether to return all assays in the object.list. #' Only bridge assay is returned by default. -#' @param l2.norm Determine if l2 normalize dictionary representation. +#' @param l2.norm Whether to l2 normalize the dictionary representation #' @param verbose Print messages and progress #' #' @importFrom MASS ginv @@ -7204,9 +7204,10 @@ PrepareBridgeReference <- function ( #' #' Find a set of anchors between unimodal query and the other unimodal reference #' using a pre-computed \code{\link{BridgeReferenceSet}}. -#' First, harmonized the bridge and query cells in the bridge query reduction space. -#' Then, construct the bridge dictionary representations for query cells. -#' Next, find a set of anchors between query and reference in the bridge graph laplacian eigenspace. +#' This function performs three steps: +#' 1. Harmonize the bridge and query cells in the bridge query reduction space +#' 2. Construct the bridge dictionary representations for query cells +#' 3. Find a set of anchors between query and reference in the bridge graph laplacian eigenspace #' These anchors can later be used to integrate embeddings or transfer data from the reference to #' query object using the \code{\link{MapQuery}} object. diff --git a/man/BridgeCellsRepresentation.Rd b/man/BridgeCellsRepresentation.Rd index 374dff2db..9110960b7 100644 --- a/man/BridgeCellsRepresentation.Rd +++ b/man/BridgeCellsRepresentation.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/integration.R \name{BridgeCellsRepresentation} \alias{BridgeCellsRepresentation} -\title{Constructing a dictionary representation for each unimodal dataset} +\title{Construct a dictionary representation for each unimodal dataset} \usage{ BridgeCellsRepresentation( object.list, @@ -24,21 +24,21 @@ BridgeCellsRepresentation( represent unimodal datasets} \item{object.reduction}{A list of dimensional reductions from object.list used -to be reconstructed by bridge.object.} +to be reconstructed by bridge.object} \item{bridge.reduction}{A list of dimensional reductions from bridge.object used -to reconstruct object.reduction.} +to reconstruct object.reduction} \item{laplacian.reduction}{Name of bridge graph laplacian dimensional reduction} \item{laplacian.dims}{Dimensions used for bridge graph laplacian dimensional reduction} -\item{bridge.assay.name}{Assay name used for bridge object reconstruction value} +\item{bridge.assay.name}{Assay name used for bridge object reconstruction value (default is 'Bridge')} -\item{return.all.assays}{if return all assays in the object.list. +\item{return.all.assays}{Whether to return all assays in the object.list. Only bridge assay is returned by default.} -\item{l2.norm}{Determine if l2 normalize dictionary representation.} +\item{l2.norm}{Whether to l2 normalize the dictionary representation} \item{verbose}{Print messages and progress} } @@ -46,5 +46,5 @@ Only bridge assay is returned by default.} Returns a object list in which each object has a bridge cell derived assay } \description{ -Constructing a dictionary representation for each unimodal dataset +Construct a dictionary representation for each unimodal dataset } diff --git a/man/FindBridgeAnchor.Rd b/man/FindBridgeAnchor.Rd index 0e77c1c98..9cbc0b943 100644 --- a/man/FindBridgeAnchor.Rd +++ b/man/FindBridgeAnchor.Rd @@ -29,10 +29,10 @@ FindBridgeAnchor( represent unimodal datasets} \item{object.reduction}{A list of dimensional reductions from object.list used -to be reconstructed by bridge.object.} +to be reconstructed by bridge.object} \item{bridge.reduction}{A list of dimensional reductions from bridge.object used -to reconstruct object.reduction.} +to reconstruct object.reduction} \item{anchor.type}{The type of anchors. Can be one of: @@ -55,7 +55,7 @@ be one of: \item{direct: Use assay data as a dimensional reduction} }} -\item{bridge.assay.name}{Assay name used for bridge object reconstruction value} +\item{bridge.assay.name}{Assay name used for bridge object reconstruction value (default is 'Bridge')} \item{reference.bridge.stored}{If refernece has stored the bridge dictionary representation} From 3f542efcfb9e4f0045283e0a9670f5ef7cbccb87 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Tue, 1 Mar 2022 16:07:07 -0500 Subject: [PATCH 104/979] Document improvements: IntegrateSketchEmbeddings --- R/integration.R | 20 ++++++++++++-------- man/IntegrateSketchEmbeddings.Rd | 21 +++++++++++++-------- 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/R/integration.R b/R/integration.R index 8e707b60d..cb1640b33 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6650,27 +6650,31 @@ IntegrationReferenceIndex <- function(object) { #' Integrate embeddings from the integrated atoms #' +#' The main steps of this procedure are outlined below. For a more detailed +#' description of the methodology, please see Hao, et al Biorxiv 2022: +#' \doi{10.1101/2022.02.24.481684} +#' #' First learn a atom dictionary representation to reconstruct each cell. -#' Then, relying on this dictionary representation, +#' Then, using this dictionary representation, #' reconstruct the embeddings of each cell from the integrated atoms. #' #' @param object A Seurat object with all cells for one dataset #' @param atom.sketch.object A sketched Seurat objects with integrated embeddings #' @param features Features used for atomic sketch integration -#' @param assay Assay name for original expression +#' @param assay Assay name for original expression (default is 'RNA') #' @param atom.sketch.reduction Dimensional reduction name for batch-corrected embeddings -#' in the sketched object -#' @param reduction.name dimensional reduction name, pca.correct by default +#' in the sketched object (default is 'integrated_dr') +#' @param reduction.name dimensional reduction name (default is 'pca.correct') #' @param reduction.key dimensional reduction key, specifies the string before -#' the number for the dimension names. PCcorrect_ by default +#' the number for the dimension names. (default is 'PCcorrect_') #' @param dictionary.method Methods to construct sketch-cell representation -#' for all cells. sketch by default. Can be one of: +#' for all cells (default is 'sketch'). Can be one of: #' \itemize{ #' \item{sketch: Use random sketched data slot} #' \item{data: Use data slot} #' } -#' @param sketch.ratio Sketch ratio of data slot when dictionary.method is set to sketch -#' @param verbose Print progress and message +#' @param sketch.ratio Sketch ratio of data slot when \code{dictionary.method} is set to 'sketch' (default is 0.8) +#' @param verbose Print progress and message (default is TRUE) #' #' @return Returns a Seurat object with an integrated dimensional reduction #' @importFrom MASS ginv diff --git a/man/IntegrateSketchEmbeddings.Rd b/man/IntegrateSketchEmbeddings.Rd index bcd860ad3..af974711b 100644 --- a/man/IntegrateSketchEmbeddings.Rd +++ b/man/IntegrateSketchEmbeddings.Rd @@ -24,32 +24,37 @@ IntegrateSketchEmbeddings( \item{features}{Features used for atomic sketch integration} -\item{assay}{Assay name for original expression} +\item{assay}{Assay name for original expression (default is 'RNA')} \item{atom.sketch.reduction}{Dimensional reduction name for batch-corrected embeddings -in the sketched object} +in the sketched object (default is 'integrated_dr')} -\item{reduction.name}{dimensional reduction name, pca.correct by default} +\item{reduction.name}{dimensional reduction name (default is 'pca.correct')} \item{reduction.key}{dimensional reduction key, specifies the string before -the number for the dimension names. PCcorrect_ by default} +the number for the dimension names. (default is 'PCcorrect_')} \item{dictionary.method}{Methods to construct sketch-cell representation -for all cells. sketch by default. Can be one of: +for all cells (default is 'sketch'). Can be one of: \itemize{ \item{sketch: Use random sketched data slot} \item{data: Use data slot} }} -\item{sketch.ratio}{Sketch ratio of data slot when dictionary.method is set to sketch} +\item{sketch.ratio}{Sketch ratio of data slot when \code{dictionary.method} is set to 'sketch' (default is 0.8)} -\item{verbose}{Print progress and message} +\item{verbose}{Print progress and message (default is TRUE)} } \value{ Returns a Seurat object with an integrated dimensional reduction } \description{ +The main steps of this procedure are outlined below. For a more detailed +description of the methodology, please see Hao, et al Biorxiv 2022: +\doi{10.1101/2022.02.24.481684} +} +\details{ First learn a atom dictionary representation to reconstruct each cell. -Then, relying on this dictionary representation, +Then, using this dictionary representation, reconstruct the embeddings of each cell from the integrated atoms. } From b49d31bedf041945526fcb3c2c63ec22a77959ed Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Tue, 1 Mar 2022 16:21:23 -0500 Subject: [PATCH 105/979] Document improvements: LeverageScore --- R/integration.R | 19 ++++++++++--------- man/LeverageScore.Rd | 16 ++++++++-------- man/LeverageScoreSampling.Rd | 8 ++++---- 3 files changed, 22 insertions(+), 21 deletions(-) diff --git a/R/integration.R b/R/integration.R index cb1640b33..eae2ae928 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6341,14 +6341,15 @@ JLEmbed <- function(nrow, ncol, eps = 0.1, seed = NA, method = "li") { return(m) } +#' @param object A seurat object #' @param features Features used to calculate leverage score -#' @param nsketch Number of rows in the random sketch matrix -#' @param ndims Number of dimensions in the JL embeddings +#' @param nsketch Number of rows in the random sketch matrix (default is 5000) +#' @param ndims Number of dimensions in the Johnson–Lindenstrauss (JL) embeddings (default is all dimensions) #' @param sampling.method Sampling method for generating random matrix #' @param MARGIN Margin -#' @param eps error tolerance for JL embeddings -#' @param seed Set a random seed -#' @param verbose Print message and process +#' @param eps error tolerance for JL embeddings (default is 0.5) +#' @param seed Set a random seed (default is 123) +#' @param verbose Print message and process (default is TRUE) #' #' @importFrom Matrix qrR #' @importFrom SeuratObject as.sparse @@ -6550,12 +6551,12 @@ CheckMetaVarName <- function(object, var.name) { #' Sampling cells from objects based on Leverage score #' #' @param object A Seurat object -#' @param num.cells Number of sampled cells +#' @param num.cells Number of sampled cells (default is 5000) #' @param assay Assay used to calculate leverage score #' @param features Features used to calculate leverage score -#' @param var.name Variable name stored leverage score in the meta.data -#' @param over.write If over write the variable with leverage score -#' @param seed Set a random seed.By default, sets the seed to 123 +#' @param var.name Variable name stored leverage score in the meta.data (default is 'leverage.score') +#' @param over.write Whether to over write the variable with leverage score (default is FALSE) +#' @param seed Set a random seed (default is 123) #' @param ... Arguments passed to LeverageScore #' #' @return Returns a subset Seurat object with sampled cells diff --git a/man/LeverageScore.Rd b/man/LeverageScore.Rd index f93c552ce..86aea96ef 100644 --- a/man/LeverageScore.Rd +++ b/man/LeverageScore.Rd @@ -52,33 +52,33 @@ LeverageScore(object, ...) ) } \arguments{ -\item{object}{An object} +\item{object}{A seurat object} \item{...}{Arguments passed to other methods} \item{features}{Features used to calculate leverage score} -\item{nsketch}{Number of rows in the random sketch matrix} +\item{nsketch}{Number of rows in the random sketch matrix (default is 5000)} -\item{ndims}{Number of dimensions in the JL embeddings} +\item{ndims}{Number of dimensions in the Johnson–Lindenstrauss (JL) embeddings (default is all dimensions)} \item{sampling.method}{Sampling method for generating random matrix} \item{MARGIN}{Margin} -\item{eps}{error tolerance for JL embeddings} +\item{eps}{error tolerance for JL embeddings (default is 0.5)} -\item{seed}{Set a random seed} +\item{seed}{Set a random seed (default is 123)} -\item{verbose}{Print message and process} +\item{verbose}{Print message and process (default is TRUE)} \item{slot}{The slot used for leverage score calculation. data slot is used by default} \item{assay}{Assay used to calculate leverage score} -\item{var.name}{Variable name stored leverage score in the meta.data} +\item{var.name}{Variable name stored leverage score in the meta.data (default is 'leverage.score')} -\item{over.write}{If over write the variable with leverage score} +\item{over.write}{Whether to over write the variable with leverage score (default is FALSE)} } \value{ Returns a seurat object with additional column storing leverage score diff --git a/man/LeverageScoreSampling.Rd b/man/LeverageScoreSampling.Rd index d87ee767d..36e482344 100644 --- a/man/LeverageScoreSampling.Rd +++ b/man/LeverageScoreSampling.Rd @@ -18,17 +18,17 @@ LeverageScoreSampling( \arguments{ \item{object}{A Seurat object} -\item{num.cells}{Number of sampled cells} +\item{num.cells}{Number of sampled cells (default is 5000)} \item{assay}{Assay used to calculate leverage score} \item{features}{Features used to calculate leverage score} -\item{var.name}{Variable name stored leverage score in the meta.data} +\item{var.name}{Variable name stored leverage score in the meta.data (default is 'leverage.score')} -\item{over.write}{If over write the variable with leverage score} +\item{over.write}{Whether to over write the variable with leverage score (default is FALSE)} -\item{seed}{Set a random seed.By default, sets the seed to 123} +\item{seed}{Set a random seed (default is 123)} \item{...}{Arguments passed to LeverageScore} } From b85091d271f8a090823e4ab67ab381fcf9eeb7d8 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 1 Mar 2022 16:57:02 -0500 Subject: [PATCH 106/979] fix docu --- R/integration.R | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/R/integration.R b/R/integration.R index eae2ae928..2ddf8b3b6 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5789,14 +5789,13 @@ BridgeCellsRepresentation <- function(object.list, bridge.object, object.reduction, bridge.reduction, - laplacian.reduction = NULL, - laplacian.dims = NULL, + laplacian.reduction = 'lap', + laplacian.dims = 1:50, bridge.assay.name = "Bridge", return.all.assays = FALSE, l2.norm = TRUE, verbose = TRUE ) { - laplacian.dims <- laplacian.dims %||% 1:ncol(bridge.object[[laplacian.reduction]]) my.lapply <- ifelse( test = verbose && nbrOfWorkers() == 1, yes = pblapply, @@ -5859,7 +5858,7 @@ BridgeCellsRepresentation <- function(object.list, X <- Embeddings( object = object.list[[x]], reduction = object.reduction[[x]] - )[, dims.list[[x]]$object] %*% (SA.inv %*% lap.vector) + )[, dims.list[[x]]$object] %*% (SA.inv %*% lap.vector) } else { X <- Embeddings( object = object.list[[x]], @@ -6263,9 +6262,16 @@ RunGraphLaplacian.default <- function(object, return(lap_dir) } +Clarkson, K. L., & Woodruff, D. P. (2017). Low-rank approximation and regression in input sparsity time. Journal of the ACM (JACM), 63(6), 1-45. - - +#' Generate CountSketch random matrix +#' +#' @param nrow Number of sketching random cells +#' @param ncol Number of cells in the original data +#' @param seed Random seed for sampling +#' @references Clarkson, KL. & Woodruff, DP. +#' Low-rank approximation and regression in input sparsity time. +#' Journal of the ACM (JACM). 2017 Jan 30;63(6):1-45. \url{https://dl.acm.org/doi/abs/10.1145/3019134}; #' @importFrom Matrix sparseMatrix CountSketch <- function(nrow, ncol, seed = 123) { @@ -6341,12 +6347,15 @@ JLEmbed <- function(nrow, ncol, eps = 0.1, seed = NA, method = "li") { return(m) } -#' @param object A seurat object +#' @param object A Seurat object #' @param features Features used to calculate leverage score #' @param nsketch Number of rows in the random sketch matrix (default is 5000) #' @param ndims Number of dimensions in the Johnson–Lindenstrauss (JL) embeddings (default is all dimensions) #' @param sampling.method Sampling method for generating random matrix -#' @param MARGIN Margin +#' \itemize{ +#' \item{CountSketch: generate a sparsed \code{CountSketch} random matrix} +#' \item{Gaussian: generate a gaussian random matrix with mean = 0 and sd = 1 / (ncells ^ 2)} +#' } #' @param eps error tolerance for JL embeddings (default is 0.5) #' @param seed Set a random seed (default is 123) #' @param verbose Print message and process (default is TRUE) @@ -6355,13 +6364,13 @@ JLEmbed <- function(nrow, ncol, eps = 0.1, seed = NA, method = "li") { #' @importFrom SeuratObject as.sparse #' @rdname LeverageScore #' @export +#' LeverageScore.default <- function( object, features = NULL, nsketch = 5000L, ndims = NULL, sampling.method = c("CountSketch", "Gaussian"), - MARGIN = 2L, eps = 0.5, seed = 123, verbose = TRUE, @@ -6381,16 +6390,12 @@ LeverageScore.default <- function( nsketch <- 1.1*length(x = features) } nsketch <- min(nsketch, ndims) - MARGIN <- MARGIN %/% 1L - if (!MARGIN %in% seq.int(from = 1L, to = 2L)) { - stop("'MARGIN' must be either 1 or 2") - } sampling.method <- sampling.method[1L] sampling.method <- match.arg(arg = sampling.method) if (isTRUE(x = verbose)) { message(sampling.method, " sampling ", nsketch, " cells") } - ncells <- dim(x = object)[[MARGIN]] + ncells <- ncol(x = object) S <- switch( EXPR = sampling.method, "CountSketch" = CountSketch(nrow = nsketch, ncol = ncells, seed = seed), @@ -6906,7 +6911,11 @@ RunPCA_Sparse <- function( return(object) } -SmoothLabels <- function(labels, clusters ) { +# Smoothing labels based on the clusters +# @param labels the original labels +# @param clusters the clusters that are used to smooth labels +# +SmoothLabels <- function(labels, clusters) { cluster.set <- unique(clusters) smooth.labels <- labels for (c in cluster.set) { @@ -7244,7 +7253,7 @@ FindBridgeTransferAnchors <- function( extended.reference, query, query.assay = NULL, - dims, + dims = 1:30, reduction = c('lsiproject', 'pcaproject'), verbose = TRUE ) { From 444a0db6f5fa0dd58241c33d13f671c52a981345 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 1 Mar 2022 16:59:47 -0500 Subject: [PATCH 107/979] update docu --- R/integration.R | 2 -- man/BridgeCellsRepresentation.Rd | 4 ++-- man/LeverageScore.Rd | 11 ++++++----- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/R/integration.R b/R/integration.R index 2ddf8b3b6..58c741d34 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6262,8 +6262,6 @@ RunGraphLaplacian.default <- function(object, return(lap_dir) } -Clarkson, K. L., & Woodruff, D. P. (2017). Low-rank approximation and regression in input sparsity time. Journal of the ACM (JACM), 63(6), 1-45. - #' Generate CountSketch random matrix #' #' @param nrow Number of sketching random cells diff --git a/man/BridgeCellsRepresentation.Rd b/man/BridgeCellsRepresentation.Rd index 9110960b7..300515a5c 100644 --- a/man/BridgeCellsRepresentation.Rd +++ b/man/BridgeCellsRepresentation.Rd @@ -9,8 +9,8 @@ BridgeCellsRepresentation( bridge.object, object.reduction, bridge.reduction, - laplacian.reduction = NULL, - laplacian.dims = NULL, + laplacian.reduction = "lap", + laplacian.dims = 1:50, bridge.assay.name = "Bridge", return.all.assays = FALSE, l2.norm = TRUE, diff --git a/man/LeverageScore.Rd b/man/LeverageScore.Rd index 86aea96ef..b93babe99 100644 --- a/man/LeverageScore.Rd +++ b/man/LeverageScore.Rd @@ -15,7 +15,6 @@ LeverageScore(object, ...) nsketch = 5000L, ndims = NULL, sampling.method = c("CountSketch", "Gaussian"), - MARGIN = 2L, eps = 0.5, seed = 123, verbose = TRUE, @@ -52,7 +51,7 @@ LeverageScore(object, ...) ) } \arguments{ -\item{object}{A seurat object} +\item{object}{A Seurat object} \item{...}{Arguments passed to other methods} @@ -62,9 +61,11 @@ LeverageScore(object, ...) \item{ndims}{Number of dimensions in the Johnson–Lindenstrauss (JL) embeddings (default is all dimensions)} -\item{sampling.method}{Sampling method for generating random matrix} - -\item{MARGIN}{Margin} +\item{sampling.method}{Sampling method for generating random matrix +\itemize{ + \item{CountSketch: generate a sparsed \code{CountSketch} random matrix} + \item{Gaussian: generate a gaussian random matrix with mean = 0 and sd = 1 / (ncells ^ 2)} +}} \item{eps}{error tolerance for JL embeddings (default is 0.5)} From cd91e52b5ef224ce740db536443c2f2988fb1293 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 1 Mar 2022 18:12:19 -0500 Subject: [PATCH 108/979] add random projection docu --- R/integration.R | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/R/integration.R b/R/integration.R index 58c741d34..ec12589d6 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6262,14 +6262,14 @@ RunGraphLaplacian.default <- function(object, return(lap_dir) } -#' Generate CountSketch random matrix -#' -#' @param nrow Number of sketching random cells -#' @param ncol Number of cells in the original data -#' @param seed Random seed for sampling -#' @references Clarkson, KL. & Woodruff, DP. -#' Low-rank approximation and regression in input sparsity time. -#' Journal of the ACM (JACM). 2017 Jan 30;63(6):1-45. \url{https://dl.acm.org/doi/abs/10.1145/3019134}; +# Generate CountSketch random matrix +# +# @param nrow Number of sketching random cells +# @param ncol Number of cells in the original data +# @param seed Random seed for sampling +# @references Clarkson, KL. & Woodruff, DP. +# Low-rank approximation and regression in input sparsity time. +# Journal of the ACM (JACM). 2017 Jan 30;63(6):1-45. \url{https://dl.acm.org/doi/abs/10.1145/3019134}; #' @importFrom Matrix sparseMatrix CountSketch <- function(nrow, ncol, seed = 123) { @@ -6287,7 +6287,11 @@ CountSketch <- function(nrow, ncol, seed = 123) { )) } - +# Generate a very sparse random matrix to improve the computational speed up of +# random projection. +# +# @reference Ping Li, Trevor J. Hastie, and Kenneth W. Church, "Very sparse random projections(2006)". +# LiProj <- function(nrow, ncol, eps = 0.1, seed = NA) { if (!is.na(x = seed)) { set.seed(seed = seed) @@ -6310,6 +6314,13 @@ LiProj <- function(nrow, ncol, eps = 0.1, seed = NA) { } +# Generate JL random projection embeddings +# +# +# @reference Aghila G and Siddharth R (2020). +# RandPro: Random Projection with Classification. R package version 0.2.2. +# https://CRAN.R-project.org/package=RandPro +# JLEmbed <- function(nrow, ncol, eps = 0.1, seed = NA, method = "li") { if (!is.na(x = seed)) { set.seed(seed = seed) From 9bbd2dfd1df65f73027013bd65d28c4a7e2da152 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 1 Mar 2022 18:49:27 -0500 Subject: [PATCH 109/979] fix dims bug --- R/integration.R | 2 +- man/FindBridgeAnchor.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/integration.R b/R/integration.R index ec12589d6..138326909 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5943,7 +5943,7 @@ FindBridgeAnchor <- function(object.list, anchor.type = c("Transfer", "Integration"), reference = NULL, laplacian.reduction = "lap", - laplacian.dims = NULL, + laplacian.dims = 1:50, reduction = c("direct", "cca"), bridge.assay.name = "Bridge", reference.bridge.stored = FALSE, diff --git a/man/FindBridgeAnchor.Rd b/man/FindBridgeAnchor.Rd index 9cbc0b943..7f078f724 100644 --- a/man/FindBridgeAnchor.Rd +++ b/man/FindBridgeAnchor.Rd @@ -12,7 +12,7 @@ FindBridgeAnchor( anchor.type = c("Transfer", "Integration"), reference = NULL, laplacian.reduction = "lap", - laplacian.dims = NULL, + laplacian.dims = 1:50, reduction = c("direct", "cca"), bridge.assay.name = "Bridge", reference.bridge.stored = FALSE, From c606123fe2fe8931c139195dd1da1bda0bc3d540 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 1 Mar 2022 22:54:53 -0500 Subject: [PATCH 110/979] fix margin --- R/integration.R | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/R/integration.R b/R/integration.R index 138326909..189a1af41 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6370,6 +6370,7 @@ JLEmbed <- function(nrow, ncol, eps = 0.1, seed = NA, method = "li") { #' @param verbose Print message and process (default is TRUE) #' #' @importFrom Matrix qrR +#' @importFrom Matrix t #' @importFrom SeuratObject as.sparse #' @rdname LeverageScore #' @export @@ -6418,24 +6419,9 @@ LeverageScore.default <- function( } ) if (!is.null(x = features)) { - object <- if (MARGIN == 1L) { - object[, features, drop = FALSE] - } else { - object[features, , drop = FALSE] - } - } - if (MARGIN == 2L) { - tf <- tryCatch( - expr = methods::slot( - object = methods::selectMethod(f = "t", signature = class(x = object)), - name = ".Data" - ), - error = function(...) { - return(base::t) - } - ) - object <- tf(object) + object <- object[features, , drop = FALSE] } + object <- t(object) if (verbose) { message("Performing QR decomposition of the sketch matrix") } @@ -6447,7 +6433,6 @@ LeverageScore.default <- function( } else { base::qr.R(qr = qr.sa) } - # triangular matrix inverse R.inv <- as.sparse(backsolve(r = R , x = diag(ncol(R)))) if (isTRUE(x = verbose)) { From 26de19a9f6177078ce725a22d5d3e4563af2f0f7 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 1 Mar 2022 23:00:56 -0500 Subject: [PATCH 111/979] add docu --- R/integration.R | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/R/integration.R b/R/integration.R index 189a1af41..6732eb8ec 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5609,6 +5609,8 @@ ValidateParams_IntegrateEmbeddings_TransferAnchors <- function( #' Convert Neighbor class to an asymmetrical Graph class +#' +#' #' @param nn.object A neighbor class object #' @param col.cells Cells names of the neighbors, cell names in nn.object is used by default #' @param weighted Determine if use distance in the Graph @@ -5633,7 +5635,7 @@ NNtoGraph <- function( if (weighted) { select_nn_dist <- Distances(object = nn.object) dist.element <- as.numeric(x = t(x = select_nn_dist)) - nn.matrix <- Matrix::sparseMatrix( + nn.matrix <- sparseMatrix( i = i, j = j, x = dist.element, @@ -5654,7 +5656,7 @@ NNtoGraph <- function( } -# Find Anchor direct from assay +# Find Anchor directly from assay # # # @return Returns a TranserAnchor or Integration set @@ -5784,7 +5786,6 @@ FindAssayAnchor <- function( #' @return Returns a object list in which each object has a bridge cell derived assay #' @export #' - BridgeCellsRepresentation <- function(object.list, bridge.object, object.reduction, @@ -5971,7 +5972,6 @@ FindBridgeAnchor <- function(object.list, } ) } - if (anchor.type == "Transfer") { reference <- reference %||% c(1) query <- setdiff(c(1,2), reference) @@ -6153,7 +6153,8 @@ TransferLablesNN <- function( return(output.list) } - +# transfer continuous value based on neighbors +# TransferExpressionNN<- function( nn.object, reference.object, @@ -6593,7 +6594,7 @@ LeverageScoreSampling <- function( } -# Run hnsw +# Run hnsw to find neighbors # # @param data Data to build the index with # @param query A set of data to be queried against data @@ -6635,7 +6636,8 @@ HnswNN <- function(data, } - +# Calculate reference index from the integrated object +# IntegrationReferenceIndex <- function(object) { if (is.null(object@tools$Integration@sample.tree)) { reference.index <- object@commands$FindIntegrationAnchors$reference @@ -6775,7 +6777,8 @@ IntegrateSketchEmbeddings <- function( } - +# Project data slot to the dimensional reduction +# ProjectDataEmbeddings <- function(object, assay = 'RNA', feature.loadings, @@ -6831,7 +6834,8 @@ ProjectDataEmbeddings <- function(object, return(all.emb) } - +# Calculate mean and sd +# SparseMeanSd <- function(object, assay = NULL, slot = 'data', From c437870d9927bf749527dc1b72e54ce81b3b450e Mon Sep 17 00:00:00 2001 From: austinhartman Date: Tue, 1 Mar 2022 23:46:25 -0500 Subject: [PATCH 112/979] add bridge integration vignette --- vignettes/bridge_integration_vignette.Rmd | 283 ++++++++++++++++++++++ vignettes/vignettes.yaml | 7 + 2 files changed, 290 insertions(+) create mode 100644 vignettes/bridge_integration_vignette.Rmd diff --git a/vignettes/bridge_integration_vignette.Rmd b/vignettes/bridge_integration_vignette.Rmd new file mode 100644 index 000000000..8b20ecaa3 --- /dev/null +++ b/vignettes/bridge_integration_vignette.Rmd @@ -0,0 +1,283 @@ +--- +title: "Dictionary Learning for cross-modality integration" +output: + html_document: + theme: united + df_print: kable + pdf_document: default +date: 'Compiled: `r format(Sys.Date(), "%B %d, %Y")`' +--- + +```{r setup, include=FALSE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now) + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + message = FALSE, + warning = FALSE, + time_it = TRUE +) +``` + + +In the same way that read mapping tools have transformed genome sequence analysis, the ability to map new datasets to established references represents an exciting opportunity for the field of single-cell genomics. Along with others in the community, we have developed [tools to map and interpret query datasets](https://satijalab.org/seurat/articles/multimodal_reference_mapping.html), and have also constructed a [set of scRNA-seq datasets for diverse mammalian tissues](http://azimuth.hubmapconsortium.org). + +A key challenge is to extend this reference mapping framework to technologies that do not measure gene expression, even if the underlying reference is based on scRNA-seq. In [Hao et al, bioRxiv 2022](https://www.biorxiv.org/content/10.1101/2022.02.24.481684v1), we introduce 'bridge integration', which enables the mapping of complementary technologies (like scATAC-seq, scDNAme, CyTOF), onto scRNA-seq references, using a 'multi-omic' dataset as a molecular bridge. In this vignette, we demonstrate how to map an scATAC-seq dataset of human PBMC, onto our previously constructed [PBMC reference](https://azimuth.hubmapconsortium.org/references/human_pbmc/). We use a publicly available 10x multiome dataset, which simultaneously measures gene expression and chromatin accessibility in the same cell, as a bridge dataset. + +In this vignette we demonstrate: + +* Loading in and pre-processing the scATAC-seq, multiome, and scRNA-seq reference datasets +* Mapping the scATAC-seq dataset via bridge integration +* Exploring and assessing the resulting annotations + +First, we install the updated version of Seurat that supports this infrastructure, as well as other packages necessary for this vignette. + +```{r, message=FALSE, warning=FALSE} +library(remotes) +remotes::install_github("satijalab/seurat", "feat/dictionary", quiet = TRUE) +library(Seurat) +library(SeuratDisk) +library(Signac) +library(EnsDb.Hsapiens.v86) +library(dplyr) +library(ggplot2) +``` + +## Load the bridge, query, and reference datasets + +We start by loading a 10x multiome dataset, consisting of ~12,000 PBMC from a healthy donor. The dataset measures RNA-seq and ATAC-seq in the same cell, and is available for download from 10x Genomics [here](https://www.10xgenomics.com/resources/datasets/pbmc-from-a-healthy-donor-granulocytes-removed-through-cell-sorting-10-k-1-standard-2-0-0). We follow the loading instructions from the [Signac package vignettes](https://satijalab.org/signac/articles/pbmc_multiomic.html). Note that when using Signac, please make sure you are using the [latest version of Bioconductor]([http://www.bioconductor.org/news/bioc_3_14_release/]), as [users have reported errors](https://github.com/timoast/signac/issues/687) when using older BioC versions. + +
+ **Load and setup the 10x multiome object** + +```{r} +# the 10x hdf5 file contains both data types. +inputdata.10x <- Read10X_h5("/brahms/haoy/vignette_data/pbmc_granulocyte_sorted_10k_filtered_feature_bc_matrix.h5") +# extract RNA and ATAC data +rna_counts <- inputdata.10x$`Gene Expression` +atac_counts <- inputdata.10x$Peaks +# Create Seurat object +obj.multi <- CreateSeuratObject(counts = rna_counts) +# Get % of mitochondrial genes +obj.multi[["percent.mt"]] <- PercentageFeatureSet(obj.multi, pattern = "^MT-") + +# add the ATAC-seq assay +grange.counts <- StringToGRanges(rownames(atac_counts), sep = c(":", "-")) +grange.use <- seqnames(grange.counts) %in% standardChromosomes(grange.counts) +atac_counts <- atac_counts[as.vector(grange.use), ] + +# Get gene annotations +annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86) +# Change style to UCSC +seqlevelsStyle(annotations) <- 'UCSC' +genome(annotations) <- "hg38" + +# File with ATAC per fragment information file +frag.file <- "/brahms/haoy/vignette_data/pbmc_granulocyte_sorted_10k_atac_fragments.tsv.gz" + +# Add in ATAC-seq data as ChromatinAssay object +chrom_assay <- CreateChromatinAssay( + counts = atac_counts, + sep = c(":", "-"), + genome = 'hg38', + fragments = frag.file, + min.cells = 10, + annotation = annotations +) + +# Add the ATAC assay to the multiome object +obj.multi[["ATAC"]] <- chrom_assay + +# Filter ATAC data based on QC metrics +obj.multi <- subset( + x = obj.multi, + subset = nCount_ATAC < 7e4 & + nCount_ATAC > 5e3 & + nCount_RNA < 25000 & + nCount_RNA > 1000 & + percent.mt < 20 +) + + +``` +
+ +The scATAC-seq query dataset represents ~10,000 PBMC from a healthy donor, and is available for download [here](https://www.10xgenomics.com/resources/datasets/10-k-human-pbm-cs-atac-v-1-1-chromium-x-1-1-standard-2-0-0). We load in the peak/cell matrix, store the path to the fragments file, and add gene annotations to the object, following the steps as with the ATAC data in the multiome experiment. + +We note that it is important to quantify the same set of genomic features in the query dataset as are quantified in the multi-omic bridge. We therefore requantify the set of scATAC-seq peaks using the `FeatureMatrix` command. This is also described in the [Signac vignettes](https://satijalab.org/signac/articles/integrate_atac.html) and shown below. + +
+ **Load and setup the 10x scATAC-seq query** + +```{r, message=FALSE, warning=FALSE} +# Load ATAC dataset +atac_pbmc_data <- Read10X_h5(filename = "/brahms/haoy/vignette_data/10k_PBMC_ATAC_nextgem_Chromium_X_filtered_peak_bc_matrix.h5") +fragpath <- "/brahms/haoy/vignette_data/10k_PBMC_ATAC_nextgem_Chromium_X_fragments.tsv.gz" + +# Get gene annotations +annotation <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86) + +# Change to UCSC style +seqlevelsStyle(annotation) <- 'UCSC' + +# Create ChromatinAssay for ATAC data +atac_pbmc_assay <- CreateChromatinAssay( + counts = atac_pbmc_data, + sep = c(":", "-"), + fragments = fragpath, + annotation = annotation +) + +# Requantify query ATAC to have same features as multiome ATAC dataset +requant_multiome_ATAC <- FeatureMatrix( + fragments = Fragments(atac_pbmc_assay), + features = granges(obj.multi[['ATAC']]), + cells = Cells(atac_pbmc_assay) +) + +# Create assay with requantified ATAC data +ATAC_assay <- CreateChromatinAssay( + counts = requant_multiome_ATAC, + fragments = fragpath, + annotation = annotation +) + +# Create Seurat sbject +obj.atac <- CreateSeuratObject(counts = ATAC_assay,assay = 'ATAC') +obj.atac[['peak.orig']] <- atac_pbmc_assay +obj.atac <- subset(obj.atac, subset = nCount_ATAC < 7e4 & nCount_ATAC > 2000) +``` +
+ +--- + +We load the reference (download [here](https://atlas.fredhutch.org/data/nygc/multimodal/pbmc_multimodal.h5seurat)) from our recent [paper](https://doi.org/10.1016/j.cell.2021.04.048). This reference is stored as an h5Seurat file, a format that enables on-disk storage of multimodal Seurat objects (more details on h5Seurat and `SeuratDisk` can be found [here](https://mojaveazure.github.io/seurat-disk/index.html)). + +```{r pbmc.ref} +obj.rna <- LoadH5Seurat("/brahms/haoy/seurat4_pbmc/pbmc_multimodal.h5seurat") +``` +
+ **What if I want to use my own reference dataset?** + +As an alternative to using a pre-built reference, you can also use your own reference. To demonstrate, you can download a scRNA-seq dataset of 23,837 human PBMC [here](https://www.dropbox.com/s/x8mu9ye2w3a63hf/20k_PBMC_scRNA.rds?dl=0), which we have already annotated. +```{r, message=FALSE, warning=FALSE, eval=FALSE} +obj.rna = readRDS("/path/to/reference.rds") +obj.rna = SCTransform(object = obj.rna) %>% RunPCA() %>% RunUMAP(dims = 1:50, return.model = TRUE) +``` +When using your own reference, set `reference.reduction = "pca"` in the `PrepareBridgeReference` function. + +
+ +--- + +# Preprocessing/normalization for all datasets + +Prior to performing bridge integration, we normalize and pre-process each of the datasets (note that the reference has already been normalized). We normalize gene expression data using [sctransform](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1874-1), and ATAC data using TF-IDF. + +```{r, message=FALSE, warning=FALSE} +# normalize multiome RNA +DefaultAssay(obj.multi) <- "RNA" +obj.multi <- SCTransform(obj.multi, verbose = FALSE) + +# normalize multiome ATAC +DefaultAssay(obj.multi) <- "ATAC" +obj.multi <- RunTFIDF(obj.multi) +obj.multi <- FindTopFeatures(obj.multi, min.cutoff = "q0") + +# normalize query +obj.atac <- RunTFIDF(obj.atac) + +``` + +## Map scATAC-seq dataset using bridge integration + +Now that we have the reference, query, and bridge datasets set up, we can begin integration. The bridge dataset enables translation between the scRNA-seq reference and the scATAC-seq query, effectively augmenting the reference so that it can map a new data type. We call this an extended reference, and first set it up. Note that you can save the results of this function and map multiple scATAC-seq datasets without having to rerun. + + +```{r, message=FALSE, warning=FALSE} +# Drop first dimension for ATAC reduction +dims.atac <- 2:50 +dims.rna <- 1:50 +DefaultAssay(obj.multi) <- "RNA" +DefaultAssay(obj.rna) <- "SCT" +obj.rna.ext <- PrepareBridgeReference(reference = obj.rna, + bridge = obj.multi, + reference.reduction = "spca", + reference.dims = dims.rna, + normalization.method = "SCT" +) +``` + +Now, we can directly find anchors between the extended reference and query objects. We use the `FindBridgeTransferAnchors` function, which translates the query dataset using the same dictionary as was used to translate the reference, and then identifies anchors in this space. The function is meant to mimic our `FindTransferAnchors` function, but to identify correspondences across modalities. + +```{r, message=FALSE, warning=FALSE} +bridge.anchor <- FindBridgeTransferAnchors(extended.reference = obj.rna.ext, + query = obj.atac, + reduction = "lsiproject", + dims = dims.atac +) +``` + + +Once we have identified anchors, we can map the query dataset onto the reference. The `MapQuery` function is the same as we have [previously introduced for reference mapping](https://satijalab.org/seurat/articles/multimodal_reference_mapping.html) . It transfers cell annotations from the reference dataset, and also visualizes the query dataset on a previously computed UMAP embedding. Since our reference dataset contains cell type annotations at three levels of resolution (l1 - l3), we can transfer each level to the query dataset. + + +```{r, message=FALSE, warning=FALSE} + +obj.atac <- MapQuery(anchorset = bridge.anchor, + reference = obj.rna, + query = obj.atac, + refdata = list( + l1 = "celltype.l1", + l2 = "celltype.l2", + l3 = "celltype.l3"), + reduction.model = "wnn.umap" +) +``` + +Now we can visualize the results, plotting the scATAC-seq cells based on their predicted annotations, on the reference UMAP embedding. You can see that each scATAC-seq cell has been assigned a cell name based on the scRNA-seq defined cell ontology. + +```{r, message=FALSE, warning=FALSE} +DimPlot(obj.atac, group.by = "predicted.l2", reduction = "ref.umap", label = TRUE) + ggtitle("ATAC") + NoLegend() +``` + +## Assessing the mapping + +To assess the mapping and cell type predictions, we will first see if the predicted cell type labels are concordant with an unsupervised analysis of the scATAC-seq dataset. We follow the standard unsupervised processing workflow for scATAC-seq data: + +```{r, message=FALSE, warning=FALSE} +obj.atac <- FindTopFeatures(obj.atac, min.cutoff = "q0") +obj.atac <- RunSVD(obj.atac) +obj.atac <- RunUMAP(obj.atac, reduction = "lsi", dims = 2:50) +``` + +Now, we visualize the predicted cluster labels on the unsupervised UMAP emebdding. We can see that predicted cluster labels (from the scRNA-seq reference) are concordant with the structure of the scATAC-seq data. However, there are some cell types (i.e. Treg), that do not appear to separate in unsupervised analysis. These may be prediction errors, or cases where the reference mapping provides additional resolution. + +```{r, pbmcdimplots, message=FALSE, warning=FALSE} +DimPlot(obj.atac, group.by = "predicted.l2", reduction = "umap", label = FALSE) +``` + +Lastly, we validate the predicted cell types for the scATAC-seq data by examining their chromatin accessibility profiles at canonical loci. We use the `CoveragePlot` function to visualize accessibility patterns at the CD8A, FOXP3, and RORC, after grouping cells by their predicted labels. We see expected patterns in each case. For example, the PAX5 locus exhibits peaks that are accessible exclusively in B cells, and the CD8A locus shows the same in CD8 T cell subsets. Similarly, the accessibility of FOXP3, a canonical marker of regulatory T cells (Tregs), in predicted Tregs provides strong support for the accuracy of our prediction. + +```{r, message=FALSE, warning=FALSE} +CoveragePlot(obj.atac, region = "PAX5", group.by = "predicted.l1", idents = c("B", "CD4 T", "Mono", "NK"), window = 200, extend.upstream = -150000) +CoveragePlot(obj.atac, region = "CD8A", group.by = "predicted.l2", idents = c("CD8 Naive", "CD4 Naive", "CD4 TCM", "CD8 TCM"), extend.downstream = 5000, extend.upstream = 5000) +CoveragePlot(obj.atac, region = "FOXP3", group.by = "predicted.l2", idents = c( "CD4 Naive", "CD4 TCM", "CD4 TEM", "Treg"), extend.downstream = 0, extend.upstream = 0) +CoveragePlot(obj.atac, region = "RORC", group.by = "predicted.l2", idents = c("CD8 Naive", "CD8 TEM", "CD8 TCM", "MAIT"), extend.downstream = 5000, extend.upstream = 5000) +``` + +
+ **Session Info** +```{r} +sessionInfo() +``` +
diff --git a/vignettes/vignettes.yaml b/vignettes/vignettes.yaml index 7ed3c016c..9e7076f59 100644 --- a/vignettes/vignettes.yaml +++ b/vignettes/vignettes.yaml @@ -81,6 +81,13 @@ summary: | Examples of how to perform normalization, feature selection, integration, and differential expression with an updated version of sctransform. image: assets/sctransform_v2.png + + - title: Cross-modality Bridge Integration + name: bridge_integration_vignette + summary: | + Map scATAC-seq onto an scRNA-seq reference using a multi-omic bridge dataset. + image: bridge_integration.png + - category: Other vignettes: - title: Visualization From 746c7429b164587b3abf2e52213e41771659be5f Mon Sep 17 00:00:00 2001 From: austinhartman Date: Wed, 2 Mar 2022 10:09:45 -0500 Subject: [PATCH 113/979] separate preprocessing section with horizontal line --- vignettes/bridge_integration_vignette.Rmd | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vignettes/bridge_integration_vignette.Rmd b/vignettes/bridge_integration_vignette.Rmd index 8b20ecaa3..9561ec0a5 100644 --- a/vignettes/bridge_integration_vignette.Rmd +++ b/vignettes/bridge_integration_vignette.Rmd @@ -111,6 +111,8 @@ obj.multi <- subset( ``` +--- + The scATAC-seq query dataset represents ~10,000 PBMC from a healthy donor, and is available for download [here](https://www.10xgenomics.com/resources/datasets/10-k-human-pbm-cs-atac-v-1-1-chromium-x-1-1-standard-2-0-0). We load in the peak/cell matrix, store the path to the fragments file, and add gene annotations to the object, following the steps as with the ATAC data in the multiome experiment. We note that it is important to quantify the same set of genomic features in the query dataset as are quantified in the multi-omic bridge. We therefore requantify the set of scATAC-seq peaks using the `FeatureMatrix` command. This is also described in the [Signac vignettes](https://satijalab.org/signac/articles/integrate_atac.html) and shown below. From d3845a523d9abc5175e3841173a61fcd2c9748d3 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 15 Mar 2022 01:43:26 -0400 Subject: [PATCH 114/979] update for spca --- R/integration.R | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/R/integration.R b/R/integration.R index 6732eb8ec..c54225ba6 100644 --- a/R/integration.R +++ b/R/integration.R @@ -7102,7 +7102,7 @@ PrepareBridgeReference <- function ( bridge, reference.reduction = 'pca', reference.dims = 1:50, - normalization.method = c('SCT', 'LogNormalization'), + normalization.method = c('SCT', 'LogNormalize'), reference.assay = NULL, bridge.ref.assay = 'RNA', bridge.query.assay = 'ATAC', @@ -7115,9 +7115,12 @@ PrepareBridgeReference <- function ( verbose = TRUE ) { ## checking + if (!is.null(supervised.reduction)) { supervised.reduction <- match.arg(arg = supervised.reduction) + } if (!is.null(x = bridge.query.reduction) & !is.null(x = supervised.reduction)) { - stop('bridge.query.reduction and supervised.reduction can only set one') + stop('bridge.query.reduction and supervised.reduction can only set one.', + 'If you want to set bridge.query.reduction, supervised.reduction should set to NULL') } if (is.null(x = bridge.query.reduction) & is.null(x = supervised.reduction)) { stop('Both bridge.query.reduction and supervised.reduction are NULL. One of them needs to be set') @@ -7227,6 +7230,7 @@ PrepareBridgeReference <- function ( #' \code{\link{PrepareBridgeReference}} #' @param query A query Seurat object #' @param query.assay Assay name for query-bridge integration +#' @param scale Determine if scale the query data for projection #' @param dims Number of dimensions for query-bridge integration #' @param reduction Dimensional reduction to perform when finding anchors. #' Options are: @@ -7252,6 +7256,7 @@ FindBridgeTransferAnchors <- function( query, query.assay = NULL, dims = 1:30, + scale = FALSE, reduction = c('lsiproject', 'pcaproject'), verbose = TRUE ) { @@ -7264,24 +7269,24 @@ FindBridgeTransferAnchors <- function( reference.reduction <- params$reference.reduction bridge.ref.reduction <- params$bridge.ref.reduction DefaultAssay(extended.reference@bridge) <- bridge.query.assay - if (reduction == "lsiproject") { + query.anchor <- FindTransferAnchors( reference = extended.reference@bridge, reference.reduction = bridge.query.reduction, dims = dims, query = query, reduction = reduction, - scale = FALSE, + scale = scale, features = rownames(extended.reference@bridge[[bridge.query.reduction]]@feature.loadings), k.filter = NA, verbose = verbose ) - query <- MapQuery(anchorset = query.anchor, - reference = extended.reference@bridge, - query = query, - store.weights = TRUE - ) - } + + query <- MapQuery(anchorset = query.anchor, + reference = extended.reference@bridge, + query = query, + store.weights = TRUE + ) bridge_anchor <- FindBridgeAnchor( object.list = list(extended.reference@reference, query), bridge.object = extended.reference@bridge, From f7354c94ad1f01c3bb925f955e3ab81b5b6d9cbf Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 16 Mar 2022 16:40:40 -0400 Subject: [PATCH 115/979] set variable features --- R/integration.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/integration.R b/R/integration.R index c54225ba6..f895c2d2b 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6697,7 +6697,7 @@ IntegrateSketchEmbeddings <- function( ) { dictionary.method <- match.arg(arg = dictionary.method) # check features - features <- features %||%rownames(x = Loadings(object = atom.sketch.object[[atom.sketch.reduction]])) + features <- features %||% VariableFeatures(object = atom.sketch.object) features <- intersect(features, rownames(object)) # check cell names cells.sketch <- intersect(x = Cells(atom.sketch.object), y = Cells(object)) @@ -6769,7 +6769,7 @@ IntegrateSketchEmbeddings <- function( ) object[[reduction.name]] <- CreateDimReducObject( embeddings = as.matrix(emb), - loadings = Loadings(atom.sketch.object[[atom.sketch.reduction]])[features,], + loadings = Loadings(atom.sketch.object[[atom.sketch.reduction]]), key = reduction.key, assay = assay ) From aa8d504f1c8768eef440237923e2b3b535d2853f Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 18 Mar 2022 17:47:58 -0400 Subject: [PATCH 116/979] add reference to integration --- R/integration.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/integration.R b/R/integration.R index f895c2d2b..d1b403f65 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5972,9 +5972,9 @@ FindBridgeAnchor <- function(object.list, } ) } + reference <- reference %||% c(1) + query <- setdiff(c(1,2), reference) if (anchor.type == "Transfer") { - reference <- reference %||% c(1) - query <- setdiff(c(1,2), reference) stored.bridge.weights <- FALSE # check weight matrix if (is.null(bridge.object@tools$MapQuery)) { From a9c9d357bbed198bdf52ef034688f0e8cd062074 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sat, 19 Mar 2022 22:52:41 -0400 Subject: [PATCH 117/979] add bridge integration anchor --- NAMESPACE | 1 + R/integration.R | 62 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 7ed86b3c1..913cce832 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -185,6 +185,7 @@ export(FeatureScatter) export(FetchData) export(FilterSlideSeq) export(FindAllMarkers) +export(FindBridgeIntegrationAnchors) export(FindBridgeTransferAnchors) export(FindClusters) export(FindConservedMarkers) diff --git a/R/integration.R b/R/integration.R index d1b403f65..502662d72 100644 --- a/R/integration.R +++ b/R/integration.R @@ -7300,6 +7300,68 @@ FindBridgeTransferAnchors <- function( } + +#' Find integration bridge anchors between query and extended bridge-reference +#' +#' Find a set of anchors between unimodal query and the other unimodal reference +#' using a pre-computed \code{\link{BridgeReferenceSet}}. +#' These integration anchors can later be used to integrate query and reference +#' using the \code{\link{IntegrateEmbeddings}} object. +#' +#' @inheritParams FindIntegrationAnchors +#' @export +#' @return Returns an \code{AnchorSet} object that can be used as input to +#' \code{\link{IntegrateEmbeddings}}. +#' +FindBridgeIntegrationAnchors <- function( + extended.reference, + query, + query.assay = NULL, + dims = 1:30, + scale = FALSE, + reduction = c('lsiproject', 'pcaproject'), + verbose = TRUE +) { + reduction <- match.arg(arg = reduction) + query.assay <- query.assay %||% DefaultAssay(query) + DefaultAssay(query) <- query.assay + params <- slot(object = extended.reference, name = "params") + bridge.query.assay <- params$bridge.query.assay + bridge.query.reduction <- params$bridge.query.reduction %||% params$supervised.reduction + reference.reduction <- params$reference.reduction + bridge.ref.reduction <- params$bridge.ref.reduction + DefaultAssay(extended.reference@bridge) <- bridge.query.assay + + query.anchor <- FindTransferAnchors( + reference = extended.reference@bridge, + reference.reduction = bridge.query.reduction, + dims = dims, + query = query, + reduction = reduction, + scale = scale, + features = rownames(extended.reference@bridge[[bridge.query.reduction]]@feature.loadings), + k.filter = NA, + verbose = verbose + ) + query <- MapQuery(anchorset = query.anchor, + reference = extended.reference@bridge, + query = query, + store.weights = TRUE + ) + bridge_anchor <- FindBridgeAnchor( + object.list = list(extended.reference@reference, query), + bridge.object = extended.reference@bridge, + reduction = 'direct', + object.reduction = c(reference.reduction, paste0('ref.', bridge.query.reduction)), + bridge.reduction = c(bridge.ref.reduction, bridge.query.reduction), + anchor.type = "Integration", + reference.bridge.stored = TRUE, + verbose = verbose + ) + return(bridge_anchor) +} + + #' Perform integration on the joint PCA cell embeddings. #' #' This is a convenience wrapper function around the following three functions From 1ef46255148d55973878151bc4b7a80c4c8786d1 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sat, 19 Mar 2022 23:01:25 -0400 Subject: [PATCH 118/979] update docu --- R/integration.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/integration.R b/R/integration.R index 502662d72..ece58ca2b 100644 --- a/R/integration.R +++ b/R/integration.R @@ -7308,7 +7308,7 @@ FindBridgeTransferAnchors <- function( #' These integration anchors can later be used to integrate query and reference #' using the \code{\link{IntegrateEmbeddings}} object. #' -#' @inheritParams FindIntegrationAnchors +#' @inheritParams FindBridgeTransferAnchors #' @export #' @return Returns an \code{AnchorSet} object that can be used as input to #' \code{\link{IntegrateEmbeddings}}. From b3033390f80b3f330d8960a0384e7e818d5eef0c Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sun, 20 Mar 2022 23:27:09 -0400 Subject: [PATCH 119/979] add integration.reduction --- R/integration.R | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/R/integration.R b/R/integration.R index ece58ca2b..20861d477 100644 --- a/R/integration.R +++ b/R/integration.R @@ -7309,6 +7309,15 @@ FindBridgeTransferAnchors <- function( #' using the \code{\link{IntegrateEmbeddings}} object. #' #' @inheritParams FindBridgeTransferAnchors +#' @param integration.reduction Dimensional reduction to perform when finding anchors +#' between query and reference. +#' Options are: +#' \itemize{ +#' \item{direct: find anchors directly on the bridge representation space} +#' \item{cca: perform cca on the on the bridge representation space and find anchors +#' } +#' } +#' #' @export #' @return Returns an \code{AnchorSet} object that can be used as input to #' \code{\link{IntegrateEmbeddings}}. @@ -7320,9 +7329,11 @@ FindBridgeIntegrationAnchors <- function( dims = 1:30, scale = FALSE, reduction = c('lsiproject', 'pcaproject'), + integration.reduction = c('direct', 'cca'), verbose = TRUE ) { reduction <- match.arg(arg = reduction) + integration.reduction <- match.arg(arg = integration.reduction) query.assay <- query.assay %||% DefaultAssay(query) DefaultAssay(query) <- query.assay params <- slot(object = extended.reference, name = "params") @@ -7351,7 +7362,7 @@ FindBridgeIntegrationAnchors <- function( bridge_anchor <- FindBridgeAnchor( object.list = list(extended.reference@reference, query), bridge.object = extended.reference@bridge, - reduction = 'direct', + reduction = integration.reduction, object.reduction = c(reference.reduction, paste0('ref.', bridge.query.reduction)), bridge.reduction = c(bridge.ref.reduction, bridge.query.reduction), anchor.type = "Integration", From d20d2ab33644eefc932b033e6fc65095c05cd8e7 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 21 Mar 2022 18:56:20 -0400 Subject: [PATCH 120/979] change fast integration name --- NAMESPACE | 2 +- R/integration.R | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 913cce832..264465697 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -177,7 +177,7 @@ export(Embeddings) export(ExpMean) export(ExpSD) export(ExpVar) -export(FastAnchorIntegration) +export(FastRPCAIntegration) export(FastRowScale) export(FeatureLocator) export(FeaturePlot) diff --git a/R/integration.R b/R/integration.R index 20861d477..c927e59c8 100644 --- a/R/integration.R +++ b/R/integration.R @@ -7391,10 +7391,9 @@ FindBridgeIntegrationAnchors <- function( #' @return Returns a Seurat object with integrated dimensional reduction #' @export #' -FastAnchorIntegration <- function( +FastRPCAIntegration <- function( object.list, reference = NULL, - reduction = 'rpca', anchor.features = 2000, k.anchor = 20, dims = 1:30, From a4d5975831cebae0e20a3fbf73cc9df3a2892c8e Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 21 Mar 2022 19:58:35 -0400 Subject: [PATCH 121/979] fix reduction --- R/integration.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/integration.R b/R/integration.R index c927e59c8..b6e1eab9f 100644 --- a/R/integration.R +++ b/R/integration.R @@ -7408,6 +7408,7 @@ FastRPCAIntegration <- function( yes = pblapply, no = future_lapply ) + reduction <- 'rpca' if (is.numeric(x = anchor.features)) { anchor.features <- SelectIntegrationFeatures( object.list = object.list, From d3e9b71acaa9307981b3f63696cde1a35ad1e986 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 22 Mar 2022 17:33:59 -0400 Subject: [PATCH 122/979] add sct in fastinte --- R/integration.R | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/R/integration.R b/R/integration.R index b6e1eab9f..e28f9724d 100644 --- a/R/integration.R +++ b/R/integration.R @@ -7398,6 +7398,7 @@ FastRPCAIntegration <- function( k.anchor = 20, dims = 1:30, scale = TRUE, + normalization.method = c("LogNormalize", "SCT"), new.reduction.name = 'integrated_dr', npcs = 50, findintegrationanchors.args = list(), @@ -7428,6 +7429,11 @@ FastRPCAIntegration <- function( } ) } + if (normalization.method == 'SCT') { + object.list <- PrepSCTIntegration(object.list = object.list, + anchor.features = anchor.features + ) + } anchor <- invoke( .fn = FindIntegrationAnchors, .args = c(list( @@ -7435,6 +7441,7 @@ FastRPCAIntegration <- function( reference = reference, anchor.features = anchor.features, reduction = reduction, + normalization.method = normalization.method, scale = scale, k.anchor = k.anchor, dims = dims, From a392e9887b35e8e288834ebf9c9e8f4c53342012 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 22 Mar 2022 18:25:25 -0400 Subject: [PATCH 123/979] fix fast inte SCT --- R/integration.R | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/R/integration.R b/R/integration.R index e28f9724d..44e329264 100644 --- a/R/integration.R +++ b/R/integration.R @@ -7417,23 +7417,25 @@ FastRPCAIntegration <- function( verbose = FALSE ) } - if (reduction == 'rpca') { + if (normalization.method == 'SCT') { + scale <- FALSE + object.list <- PrepSCTIntegration(object.list = object.list, + anchor.features = anchor.features + ) + } if (verbose) { message('Performing PCA for each object') } object.list <- my.lapply(X = object.list, FUN = function(x) { - x <- ScaleData(x, features = anchor.features, do.scale = scale, verbose = FALSE) + if (normalization.method != 'SCT') { + x <- ScaleData(x, features = anchor.features, do.scale = scale, verbose = FALSE) + } x <- RunPCA(x, features = anchor.features, verbose = FALSE) return(x) } ) - } - if (normalization.method == 'SCT') { - object.list <- PrepSCTIntegration(object.list = object.list, - anchor.features = anchor.features - ) - } + anchor <- invoke( .fn = FindIntegrationAnchors, .args = c(list( @@ -7453,10 +7455,13 @@ FastRPCAIntegration <- function( y = object.list[2:length(object.list)] ) anchor.feature <- slot(object = anchor, name = 'anchor.features') - object_merged <- ScaleData(object = object_merged, - features = anchor.feature, - verbose = FALSE - ) + if (normalization.method != 'SCT') { + object_merged <- ScaleData(object = object_merged, + features = anchor.feature, + do.scale = scale, + verbose = FALSE + ) + } object_merged <- RunPCA(object_merged, features = anchor.feature, verbose = FALSE, From 42bd3c3ebf7cb39078b02ce31d8f4d948b431a6a Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 31 Mar 2022 13:34:43 -0400 Subject: [PATCH 124/979] update docu --- R/integration.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/integration.R b/R/integration.R index 44e329264..a615731d3 100644 --- a/R/integration.R +++ b/R/integration.R @@ -7084,7 +7084,7 @@ ProjectDimReduc <- function(query, #' bridge.query.reduction is used for the bridge-query integration} #' } #' @param bridge.query.reduction Name of dimensions used for the bridge-query harmonization. -#' Requires either 'bridge.query.reduction' or 'supervised.reduction' to be not NULL. +#' 'bridge.query.reduction' and 'supervised.reduction' cannot be NULL together. #' @param bridge.query.features Features used for bridge query dimensional reduction #' (default is NULL which uses VariableFeatures from the bridge object) #' @param laplacian.reduction.name Name of dimensional reduction name of graph laplacian eigenspace (default is 'lap') @@ -7314,7 +7314,7 @@ FindBridgeTransferAnchors <- function( #' Options are: #' \itemize{ #' \item{direct: find anchors directly on the bridge representation space} -#' \item{cca: perform cca on the on the bridge representation space and find anchors +#' \item{cca: perform cca on the on the bridge representation space and then find anchors #' } #' } #' From 4d247be65d1dc739245f92a077f4867a1332b625 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 4 Apr 2022 20:30:27 -0400 Subject: [PATCH 125/979] update params --- R/integration.R | 7 ++----- man/IntegrateSketchEmbeddings.Rd | 7 ------- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/R/integration.R b/R/integration.R index 44e329264..13010054f 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6666,9 +6666,6 @@ IntegrationReferenceIndex <- function(object) { #' @param assay Assay name for original expression (default is 'RNA') #' @param atom.sketch.reduction Dimensional reduction name for batch-corrected embeddings #' in the sketched object (default is 'integrated_dr') -#' @param reduction.name dimensional reduction name (default is 'pca.correct') -#' @param reduction.key dimensional reduction key, specifies the string before -#' the number for the dimension names. (default is 'PCcorrect_') #' @param dictionary.method Methods to construct sketch-cell representation #' for all cells (default is 'sketch'). Can be one of: #' \itemize{ @@ -6689,12 +6686,12 @@ IntegrateSketchEmbeddings <- function( features = NULL, assay = 'RNA', atom.sketch.reduction = 'integrated_dr', - reduction.name ='pca.correct', - reduction.key = 'PCcorrect_', dictionary.method = c('sketch', 'data'), sketch.ratio = 0.8, verbose = TRUE ) { + reduction.name = atom.sketch.reduction + reduction.key = Key(object = atom.sketch.object[[atom.sketch.reduction]]) dictionary.method <- match.arg(arg = dictionary.method) # check features features <- features %||% VariableFeatures(object = atom.sketch.object) diff --git a/man/IntegrateSketchEmbeddings.Rd b/man/IntegrateSketchEmbeddings.Rd index af974711b..941b90060 100644 --- a/man/IntegrateSketchEmbeddings.Rd +++ b/man/IntegrateSketchEmbeddings.Rd @@ -10,8 +10,6 @@ IntegrateSketchEmbeddings( features = NULL, assay = "RNA", atom.sketch.reduction = "integrated_dr", - reduction.name = "pca.correct", - reduction.key = "PCcorrect_", dictionary.method = c("sketch", "data"), sketch.ratio = 0.8, verbose = TRUE @@ -29,11 +27,6 @@ IntegrateSketchEmbeddings( \item{atom.sketch.reduction}{Dimensional reduction name for batch-corrected embeddings in the sketched object (default is 'integrated_dr')} -\item{reduction.name}{dimensional reduction name (default is 'pca.correct')} - -\item{reduction.key}{dimensional reduction key, specifies the string before -the number for the dimension names. (default is 'PCcorrect_')} - \item{dictionary.method}{Methods to construct sketch-cell representation for all cells (default is 'sketch'). Can be one of: \itemize{ From 2c54d9758a63b919c6e4d023ce446617e1adef1d Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Thu, 7 Apr 2022 16:17:51 -0400 Subject: [PATCH 126/979] Update renv --- .renvignore | 2 + renv.lock | 1105 +++++++++++++++++++++++++++++++++------------ renv/.gitignore | 3 +- renv/activate.R | 347 ++++++++++++-- renv/settings.dcf | 3 + 5 files changed, 1119 insertions(+), 341 deletions(-) create mode 100644 .renvignore diff --git a/.renvignore b/.renvignore new file mode 100644 index 000000000..d4a6a9b06 --- /dev/null +++ b/.renvignore @@ -0,0 +1,2 @@ +vignettes/ +tests/ diff --git a/renv.lock b/renv.lock index 900952632..62fe30bb5 100644 --- a/renv.lock +++ b/renv.lock @@ -1,6 +1,6 @@ { "R": { - "Version": "4.1.0", + "Version": "4.1.3", "Repositories": [ { "Name": "CRAN", @@ -8,1103 +8,1610 @@ } ] }, - "Bioconductor": { - "Version": "3.13" - }, "Packages": { "BH": { "Package": "BH", - "Version": "1.75.0-0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "e4c04affc2cac20c8fec18385cd14691" - }, - "BiocGenerics": { - "Package": "BiocGenerics", - "Version": "0.38.0", - "Source": "Bioconductor", - "Hash": "de5e346fed0fc44a0424a0531cf5d12d" - }, - "BiocManager": { - "Package": "BiocManager", - "Version": "1.30.16", + "Version": "1.78.0-0", "Source": "Repository", "Repository": "CRAN", - "Hash": "2fdca0877debdd4668190832cdee4c31" + "Hash": "4e348572ffcaa2fb1e610e7a941f6f3a", + "Requirements": [] }, "FNN": { "Package": "FNN", "Version": "1.1.3", "Source": "Repository", "Repository": "CRAN", - "Hash": "b56998fff55e4a4b4860ad6e8c67e0f9" - }, - "IRanges": { - "Package": "IRanges", - "Version": "2.26.0", - "Source": "Bioconductor", - "Hash": "7859b18fedba59e99467df40b42e3553" + "Hash": "b56998fff55e4a4b4860ad6e8c67e0f9", + "Requirements": [] }, "KernSmooth": { "Package": "KernSmooth", "Version": "2.23-20", "Source": "Repository", "Repository": "CRAN", - "Hash": "8dcfa99b14c296bc9f1fd64d52fd3ce7" + "Hash": "8dcfa99b14c296bc9f1fd64d52fd3ce7", + "Requirements": [] }, "MASS": { "Package": "MASS", - "Version": "7.3-54", + "Version": "7.3-55", "Source": "Repository", "Repository": "CRAN", - "Hash": "0e59129db205112e3963904db67fd0dc" + "Hash": "c5232ffb549f6d7a04a152c34ca1353d", + "Requirements": [] }, "Matrix": { "Package": "Matrix", - "Version": "1.3-4", + "Version": "1.4-0", "Source": "Repository", "Repository": "CRAN", - "Hash": "4ed05e9c9726267e4a5872e09c04587c" + "Hash": "130c0caba175739d98f2963c6a407cf6", + "Requirements": [ + "lattice" + ] }, "R6": { "Package": "R6", "Version": "2.5.1", "Source": "Repository", "Repository": "CRAN", - "Hash": "470851b6d5d0ac559e9d01bb352b4021" + "Hash": "470851b6d5d0ac559e9d01bb352b4021", + "Requirements": [] }, "RANN": { "Package": "RANN", "Version": "2.6.1", "Source": "Repository", "Repository": "CRAN", - "Hash": "d128ea05a972d3e67c6f39de52c72bd7" + "Hash": "d128ea05a972d3e67c6f39de52c72bd7", + "Requirements": [] }, "RColorBrewer": { "Package": "RColorBrewer", - "Version": "1.1-2", + "Version": "1.1-3", "Source": "Repository", "Repository": "CRAN", - "Hash": "e031418365a7f7a766181ab5a41a5716" + "Hash": "45f0398006e83a5b10b72a90663d8d8c", + "Requirements": [] }, "ROCR": { "Package": "ROCR", "Version": "1.0-11", "Source": "Repository", "Repository": "CRAN", - "Hash": "cc151930e20e16427bc3d0daec62b4a9" + "Hash": "cc151930e20e16427bc3d0daec62b4a9", + "Requirements": [ + "gplots" + ] }, "RSpectra": { "Package": "RSpectra", "Version": "0.16-0", "Source": "Repository", "Repository": "CRAN", - "Hash": "a41329d24d5a98eaed2bd0159adb1b5f" + "Hash": "a41329d24d5a98eaed2bd0159adb1b5f", + "Requirements": [ + "Matrix", + "Rcpp", + "RcppEigen" + ] }, "Rcpp": { "Package": "Rcpp", - "Version": "1.0.7", + "Version": "1.0.8.3", "Source": "Repository", "Repository": "CRAN", - "Hash": "dab19adae4440ae55aa8a9d238b246bb" + "Hash": "32e79b908fda56ee57fe518a8d37b864", + "Requirements": [] }, "RcppAnnoy": { "Package": "RcppAnnoy", "Version": "0.0.19", "Source": "Repository", "Repository": "CRAN", - "Hash": "5681153e3eb103725e35ac5f7ebca910" + "Hash": "5681153e3eb103725e35ac5f7ebca910", + "Requirements": [ + "Rcpp" + ] }, "RcppArmadillo": { "Package": "RcppArmadillo", - "Version": "0.10.6.0.0", + "Version": "0.11.0.0.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "d34bcef6e2df81ab2d44c7fbe8b4d3f0" + "Hash": "704ac7bb6a49df5a9a2b014793fcc6cb", + "Requirements": [ + "Rcpp" + ] }, "RcppEigen": { "Package": "RcppEigen", "Version": "0.3.3.9.1", "Source": "Repository", "Repository": "CRAN", - "Hash": "ddfa72a87fdf4c80466a20818be91d00" + "Hash": "ddfa72a87fdf4c80466a20818be91d00", + "Requirements": [ + "Matrix", + "Rcpp" + ] }, "RcppProgress": { "Package": "RcppProgress", "Version": "0.4.2", "Source": "Repository", "Repository": "CRAN", - "Hash": "1c0aa18b97e6aaa17f93b8b866c0ace5" + "Hash": "1c0aa18b97e6aaa17f93b8b866c0ace5", + "Requirements": [] + }, + "RcppTOML": { + "Package": "RcppTOML", + "Version": "0.1.7", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "f8a578aa91321ecec1292f1e2ffadeda", + "Requirements": [ + "Rcpp" + ] }, "Rtsne": { "Package": "Rtsne", "Version": "0.15", "Source": "Repository", "Repository": "CRAN", - "Hash": "f153432c4ca15b937ccfaa40f167c892" - }, - "S4Vectors": { - "Package": "S4Vectors", - "Version": "0.30.0", - "Source": "Bioconductor", - "Hash": "a750488825efca8e08a30e8157821b9b" + "Hash": "f153432c4ca15b937ccfaa40f167c892", + "Requirements": [ + "Rcpp" + ] }, "SeuratObject": { "Package": "SeuratObject", - "Version": "4.9.9.9001", + "Version": "4.9.9.9010", "Source": "GitHub", "RemoteType": "github", - "RemoteUsername": "mojaveazure", + "RemoteHost": "api.github.com", "RemoteRepo": "seurat-object", + "RemoteUsername": "mojaveazure", "RemoteRef": "feat/standard", - "RemoteSha": "a7a542f503d3ed3f0836170bdaf89e4d278d30ab", - "RemoteHost": "api.github.com", - "Hash": "3a2f1268f710da71f5fc8d5419b75778" + "RemoteSha": "3114047de4187bfb1d532293ce8513f0732c1a17", + "Hash": "d2d746377b67b152f40d5166451b6d9b", + "Requirements": [ + "Matrix", + "Rcpp", + "RcppEigen", + "rlang", + "spam" + ] }, "abind": { "Package": "abind", "Version": "1.4-5", "Source": "Repository", "Repository": "CRAN", - "Hash": "4f57884290cc75ab22f4af9e9d4ca862" + "Hash": "4f57884290cc75ab22f4af9e9d4ca862", + "Requirements": [] }, "askpass": { "Package": "askpass", "Version": "1.1", "Source": "Repository", "Repository": "CRAN", - "Hash": "e8a22846fff485f0be3770c2da758713" + "Hash": "e8a22846fff485f0be3770c2da758713", + "Requirements": [ + "sys" + ] }, "base64enc": { "Package": "base64enc", "Version": "0.1-3", "Source": "Repository", "Repository": "CRAN", - "Hash": "543776ae6848fde2f48ff3816d0628bc" + "Hash": "543776ae6848fde2f48ff3816d0628bc", + "Requirements": [] }, "bitops": { "Package": "bitops", "Version": "1.0-7", "Source": "Repository", "Repository": "CRAN", - "Hash": "b7d8d8ee39869c18d8846a184dd8a1af" + "Hash": "b7d8d8ee39869c18d8846a184dd8a1af", + "Requirements": [] }, "brio": { "Package": "brio", - "Version": "1.1.2", + "Version": "1.1.3", "Source": "Repository", "Repository": "CRAN", - "Hash": "2f01e16ff9571fe70381c7b9ae560dc4" + "Hash": "976cf154dfb043c012d87cddd8bca363", + "Requirements": [] }, "bslib": { "Package": "bslib", - "Version": "0.3.0", + "Version": "0.3.1", "Source": "Repository", "Repository": "CRAN", - "Hash": "074ebc936dbcecd7115ed8083643b550" + "Hash": "56ae7e1987b340186a8a5a157c2ec358", + "Requirements": [ + "htmltools", + "jquerylib", + "jsonlite", + "rlang", + "sass" + ] }, "caTools": { "Package": "caTools", "Version": "1.18.2", "Source": "Repository", "Repository": "CRAN", - "Hash": "34d90fa5845004236b9eacafc51d07b2" + "Hash": "34d90fa5845004236b9eacafc51d07b2", + "Requirements": [ + "bitops" + ] }, "cachem": { "Package": "cachem", "Version": "1.0.6", "Source": "Repository", "Repository": "CRAN", - "Hash": "648c5b3d71e6a37e3043617489a0a0e9" + "Hash": "648c5b3d71e6a37e3043617489a0a0e9", + "Requirements": [ + "fastmap", + "rlang" + ] }, "callr": { "Package": "callr", "Version": "3.7.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "461aa75a11ce2400245190ef5d3995df" + "Hash": "461aa75a11ce2400245190ef5d3995df", + "Requirements": [ + "R6", + "processx" + ] }, "cli": { "Package": "cli", - "Version": "3.0.1", + "Version": "3.2.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "e3ae5d68dea0c55a12ea12a9fda02e61" + "Hash": "1bdb126893e9ce6aae50ad1d6fc32faf", + "Requirements": [ + "glue" + ] }, "cluster": { "Package": "cluster", "Version": "2.1.2", "Source": "Repository", "Repository": "CRAN", - "Hash": "ce49bfe5bc0b3ecd43a01fe1b01c2243" + "Hash": "ce49bfe5bc0b3ecd43a01fe1b01c2243", + "Requirements": [] }, "codetools": { "Package": "codetools", "Version": "0.2-18", "Source": "Repository", "Repository": "CRAN", - "Hash": "019388fc48e48b3da0d3a76ff94608a8" + "Hash": "019388fc48e48b3da0d3a76ff94608a8", + "Requirements": [] }, "colorspace": { "Package": "colorspace", - "Version": "2.0-2", + "Version": "2.0-3", "Source": "Repository", "Repository": "CRAN", - "Hash": "6baccb763ee83c0bd313460fdb8b8a84" + "Hash": "bb4341986bc8b914f0f0acf2e4a3f2f7", + "Requirements": [] }, "commonmark": { "Package": "commonmark", - "Version": "1.7", + "Version": "1.8.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "0f22be39ec1d141fd03683c06f3a6e67" + "Hash": "2ba81b120c1655ab696c935ef33ea716", + "Requirements": [] }, "cowplot": { "Package": "cowplot", "Version": "1.1.1", "Source": "Repository", "Repository": "CRAN", - "Hash": "b418e8423699d11c7f2087c2bfd07da2" + "Hash": "b418e8423699d11c7f2087c2bfd07da2", + "Requirements": [ + "ggplot2", + "gtable", + "rlang", + "scales" + ] }, "cpp11": { "Package": "cpp11", - "Version": "0.4.0", + "Version": "0.4.2", "Source": "Repository", "Repository": "CRAN", - "Hash": "40ba3fd26c8f61d8d14d334bc7761df9" + "Hash": "fa53ce256cd280f468c080a58ea5ba8c", + "Requirements": [] }, "crayon": { "Package": "crayon", - "Version": "1.4.1", + "Version": "1.5.1", "Source": "Repository", "Repository": "CRAN", - "Hash": "e75525c55c70e5f4f78c9960a4b402e9" + "Hash": "8dc45fd8a1ee067a92b85ef274e66d6a", + "Requirements": [] }, "crosstalk": { "Package": "crosstalk", - "Version": "1.1.1", + "Version": "1.2.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "2b06f9e415a62b6762e4b8098d2aecbc" + "Hash": "6aa54f69598c32177e920eb3402e8293", + "Requirements": [ + "R6", + "htmltools", + "jsonlite", + "lazyeval" + ] }, "curl": { "Package": "curl", "Version": "4.3.2", "Source": "Repository", "Repository": "CRAN", - "Hash": "022c42d49c28e95d69ca60446dbabf88" + "Hash": "022c42d49c28e95d69ca60446dbabf88", + "Requirements": [] }, "data.table": { "Package": "data.table", - "Version": "1.14.0", + "Version": "1.14.2", "Source": "Repository", "Repository": "CRAN", - "Hash": "d1b8b1a821ee564a3515fa6c6d5c52dc" + "Hash": "36b67b5adf57b292923f5659f5f0c853", + "Requirements": [] }, "deldir": { "Package": "deldir", - "Version": "0.2-10", + "Version": "1.0-6", "Source": "Repository", "Repository": "CRAN", - "Hash": "6ba6a411bdd4c1b297bd54e2c5c98385" + "Hash": "65a3d4e2a1619bb85ae0fb64628da972", + "Requirements": [] }, "desc": { "Package": "desc", - "Version": "1.3.0", + "Version": "1.4.1", "Source": "Repository", "Repository": "CRAN", - "Hash": "b6963166f7f10b970af1006c462ce6cd" + "Hash": "eebd27ee58fcc58714eedb7aa07d8ad1", + "Requirements": [ + "R6", + "cli", + "rprojroot" + ] }, "diffobj": { "Package": "diffobj", - "Version": "0.3.4", + "Version": "0.3.5", "Source": "Repository", "Repository": "CRAN", - "Hash": "feb5b7455eba422a2c110bb89852e6a3" + "Hash": "bcaa8b95f8d7d01a5dedfd959ce88ab8", + "Requirements": [ + "crayon" + ] }, "digest": { "Package": "digest", - "Version": "0.6.28", + "Version": "0.6.29", "Source": "Repository", "Repository": "CRAN", - "Hash": "49b5c6e230bfec487b8917d5a0c77cca" + "Hash": "cf6b206a045a684728c3267ef7596190", + "Requirements": [] }, "dotCall64": { "Package": "dotCall64", "Version": "1.0-1", "Source": "Repository", "Repository": "CRAN", - "Hash": "d0ef6cd1546530da4d72179b52856e84" + "Hash": "d0ef6cd1546530da4d72179b52856e84", + "Requirements": [] }, "dplyr": { "Package": "dplyr", - "Version": "1.0.7", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "36f1ae62f026c8ba9f9b5c9a08c03297" + "Version": "1.0.8", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "ef47665e64228a17609d6df877bf86f2", + "Requirements": [ + "R6", + "generics", + "glue", + "lifecycle", + "magrittr", + "pillar", + "rlang", + "tibble", + "tidyselect", + "vctrs" + ] }, "dqrng": { "Package": "dqrng", "Version": "0.3.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "3ce2af5ead3b01c518fd453c7fe5a51a" + "Hash": "3ce2af5ead3b01c518fd453c7fe5a51a", + "Requirements": [ + "BH", + "Rcpp", + "sitmo" + ] }, "ellipsis": { "Package": "ellipsis", "Version": "0.3.2", "Source": "Repository", "Repository": "CRAN", - "Hash": "bb0eec2fe32e88d9e2836c2f73ea2077" + "Hash": "bb0eec2fe32e88d9e2836c2f73ea2077", + "Requirements": [ + "rlang" + ] }, "evaluate": { "Package": "evaluate", - "Version": "0.14", + "Version": "0.15", "Source": "Repository", "Repository": "CRAN", - "Hash": "ec8ca05cffcc70569eaaad8469d2a3a7" + "Hash": "699a7a93d08c962d9f8950b2d7a227f1", + "Requirements": [] }, "fansi": { "Package": "fansi", - "Version": "0.5.0", + "Version": "1.0.3", "Source": "Repository", "Repository": "CRAN", - "Hash": "d447b40982c576a72b779f0a3b3da227" + "Hash": "83a8afdbe71839506baa9f90eebad7ec", + "Requirements": [] }, "farver": { "Package": "farver", "Version": "2.1.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "c98eb5133d9cb9e1622b8691487f11bb" + "Hash": "c98eb5133d9cb9e1622b8691487f11bb", + "Requirements": [] }, "fastmap": { "Package": "fastmap", "Version": "1.1.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "77bd60a6157420d4ffa93b27cf6a58b8" + "Hash": "77bd60a6157420d4ffa93b27cf6a58b8", + "Requirements": [] }, "fitdistrplus": { "Package": "fitdistrplus", - "Version": "1.1-5", + "Version": "1.1-8", "Source": "Repository", "Repository": "CRAN", - "Hash": "df82a154b37aad59c6a9cc2b54614296" + "Hash": "9de7bf56c16871adf44dc88a24c7836c", + "Requirements": [ + "MASS", + "survival" + ] }, "fontawesome": { "Package": "fontawesome", "Version": "0.2.2", "Source": "Repository", "Repository": "CRAN", - "Hash": "55624ed409e46c5f358b2c060be87f67" + "Hash": "55624ed409e46c5f358b2c060be87f67", + "Requirements": [ + "htmltools", + "rlang" + ] }, "fs": { "Package": "fs", - "Version": "1.5.0", + "Version": "1.5.2", "Source": "Repository", "Repository": "CRAN", - "Hash": "44594a07a42e5f91fac9f93fda6d0109" + "Hash": "7c89603d81793f0d5486d91ab1fc6f1d", + "Requirements": [] }, "future": { "Package": "future", - "Version": "1.22.1", + "Version": "1.24.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "9c56382c3e53f0b4fc0fc16d88fc3974" + "Hash": "5cc7addaa73372fbee0a7d06c880068e", + "Requirements": [ + "digest", + "globals", + "listenv", + "parallelly" + ] }, "future.apply": { "Package": "future.apply", "Version": "1.8.1", "Source": "Repository", "Repository": "CRAN", - "Hash": "f568ce73d3d59582b0f7babd0eb33d07" + "Hash": "f568ce73d3d59582b0f7babd0eb33d07", + "Requirements": [ + "future", + "globals" + ] }, "generics": { "Package": "generics", - "Version": "0.1.0", + "Version": "0.1.2", "Source": "Repository", "Repository": "CRAN", - "Hash": "4d243a9c10b00589889fe32314ffd902" + "Hash": "177475892cf4a55865868527654a7741", + "Requirements": [] }, "ggplot2": { "Package": "ggplot2", "Version": "3.3.5", "Source": "Repository", "Repository": "CRAN", - "Hash": "d7566c471c7b17e095dd023b9ef155ad" + "Hash": "d7566c471c7b17e095dd023b9ef155ad", + "Requirements": [ + "MASS", + "digest", + "glue", + "gtable", + "isoband", + "mgcv", + "rlang", + "scales", + "tibble", + "withr" + ] }, "ggrepel": { "Package": "ggrepel", "Version": "0.9.1", "Source": "Repository", "Repository": "CRAN", - "Hash": "08ab869f37e6a7741a64ab9069bcb67d" + "Hash": "08ab869f37e6a7741a64ab9069bcb67d", + "Requirements": [ + "Rcpp", + "ggplot2", + "rlang", + "scales" + ] }, "ggridges": { "Package": "ggridges", "Version": "0.5.3", "Source": "Repository", "Repository": "CRAN", - "Hash": "9d028e8f37c84dba356ce3c367a1978e" + "Hash": "9d028e8f37c84dba356ce3c367a1978e", + "Requirements": [ + "ggplot2", + "plyr", + "scales", + "withr" + ] }, "globals": { "Package": "globals", "Version": "0.14.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "eca8023ed5ca6372479ebb9b3207f5ae" + "Hash": "eca8023ed5ca6372479ebb9b3207f5ae", + "Requirements": [ + "codetools" + ] }, "glue": { "Package": "glue", - "Version": "1.4.2", + "Version": "1.6.2", "Source": "Repository", "Repository": "CRAN", - "Hash": "6efd734b14c6471cfe443345f3e35e29" + "Hash": "4f2596dfb05dac67b9dc558e5c6fba2e", + "Requirements": [] }, "goftest": { "Package": "goftest", - "Version": "1.2-2", + "Version": "1.2-3", "Source": "Repository", "Repository": "CRAN", - "Hash": "3c9209bce1b10900155ea37ce954cf30" + "Hash": "dbe0201f91eeb15918dd3fbf01ee689a", + "Requirements": [] }, "gplots": { "Package": "gplots", "Version": "3.1.1", "Source": "Repository", "Repository": "CRAN", - "Hash": "e65e5d5dea4cbb9ba822dcd782b2ee1f" + "Hash": "e65e5d5dea4cbb9ba822dcd782b2ee1f", + "Requirements": [ + "KernSmooth", + "caTools", + "gtools" + ] }, "gridExtra": { "Package": "gridExtra", "Version": "2.3", "Source": "Repository", "Repository": "CRAN", - "Hash": "7d7f283939f563670a697165b2cf5560" + "Hash": "7d7f283939f563670a697165b2cf5560", + "Requirements": [ + "gtable" + ] }, "gtable": { "Package": "gtable", "Version": "0.3.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "ac5c6baf7822ce8732b343f14c072c4d" + "Hash": "ac5c6baf7822ce8732b343f14c072c4d", + "Requirements": [] }, "gtools": { "Package": "gtools", "Version": "3.9.2", "Source": "Repository", "Repository": "CRAN", - "Hash": "2ace6c4a06297d0b364e0444384a2b82" + "Hash": "2ace6c4a06297d0b364e0444384a2b82", + "Requirements": [] }, "here": { "Package": "here", "Version": "1.0.1", "Source": "Repository", "Repository": "CRAN", - "Hash": "24b224366f9c2e7534d2344d10d59211" - }, - "highr": { - "Package": "highr", - "Version": "0.9", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "8eb36c8125038e648e5d111c0d7b2ed4" + "Hash": "24b224366f9c2e7534d2344d10d59211", + "Requirements": [ + "rprojroot" + ] }, "htmltools": { "Package": "htmltools", "Version": "0.5.2", "Source": "Repository", "Repository": "CRAN", - "Hash": "526c484233f42522278ab06fb185cb26" + "Hash": "526c484233f42522278ab06fb185cb26", + "Requirements": [ + "base64enc", + "digest", + "fastmap", + "rlang" + ] }, "htmlwidgets": { "Package": "htmlwidgets", "Version": "1.5.4", "Source": "Repository", "Repository": "CRAN", - "Hash": "76147821cd3fcd8c4b04e1ef0498e7fb" + "Hash": "76147821cd3fcd8c4b04e1ef0498e7fb", + "Requirements": [ + "htmltools", + "jsonlite", + "yaml" + ] }, "httpuv": { "Package": "httpuv", - "Version": "1.6.3", + "Version": "1.6.5", "Source": "Repository", "Repository": "CRAN", - "Hash": "65e865802fe6dd1bafef1dae5b80a844" + "Hash": "97fe71f0a4a1c9890e6c2128afa04bc0", + "Requirements": [ + "R6", + "Rcpp", + "later", + "promises" + ] }, "httr": { "Package": "httr", "Version": "1.4.2", "Source": "Repository", "Repository": "CRAN", - "Hash": "a525aba14184fec243f9eaec62fbed43" + "Hash": "a525aba14184fec243f9eaec62fbed43", + "Requirements": [ + "R6", + "curl", + "jsonlite", + "mime", + "openssl" + ] }, "ica": { "Package": "ica", "Version": "1.0-2", "Source": "Repository", "Repository": "CRAN", - "Hash": "95ba9b882bb834ecbdad37338a11f3f8" + "Hash": "95ba9b882bb834ecbdad37338a11f3f8", + "Requirements": [] }, "igraph": { "Package": "igraph", - "Version": "1.2.6", + "Version": "1.3.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "7b1f856410253d56ea67ad808f7cdff6" + "Hash": "bf0f6f80c2eaf5c4485ecbe710dd0a30", + "Requirements": [ + "Matrix", + "magrittr", + "pkgconfig" + ] }, "irlba": { "Package": "irlba", - "Version": "2.3.3", + "Version": "2.3.5", "Source": "Repository", "Repository": "CRAN", - "Hash": "a9ad517358000d57022401ef18ee657a" + "Hash": "066c11bb9bc75b343f3de1ecaf3b7ba2", + "Requirements": [ + "Matrix" + ] }, "isoband": { "Package": "isoband", "Version": "0.2.5", "Source": "Repository", "Repository": "CRAN", - "Hash": "7ab57a6de7f48a8dc84910d1eca42883" + "Hash": "7ab57a6de7f48a8dc84910d1eca42883", + "Requirements": [] }, "jquerylib": { "Package": "jquerylib", "Version": "0.1.4", "Source": "Repository", "Repository": "CRAN", - "Hash": "5aab57a3bd297eee1c1d862735972182" + "Hash": "5aab57a3bd297eee1c1d862735972182", + "Requirements": [ + "htmltools" + ] }, "jsonlite": { "Package": "jsonlite", - "Version": "1.7.2", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "98138e0994d41508c7a6b84a0600cfcb" - }, - "knitr": { - "Package": "knitr", - "Version": "1.34", + "Version": "1.8.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "aa958054ac6f0360926bb952ea302f0f" + "Hash": "d07e729b27b372429d42d24d503613a0", + "Requirements": [] }, "labeling": { "Package": "labeling", "Version": "0.4.2", "Source": "Repository", "Repository": "CRAN", - "Hash": "3d5108641f47470611a32d0bdf357a72" + "Hash": "3d5108641f47470611a32d0bdf357a72", + "Requirements": [] }, "later": { "Package": "later", "Version": "1.3.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "7e7b457d7766bc47f2a5f21cc2984f8e" + "Hash": "7e7b457d7766bc47f2a5f21cc2984f8e", + "Requirements": [ + "Rcpp", + "rlang" + ] }, "lattice": { "Package": "lattice", - "Version": "0.20-44", + "Version": "0.20-45", "Source": "Repository", "Repository": "CRAN", - "Hash": "f36bf1a849d9106dc2af72e501f9de41" + "Hash": "b64cdbb2b340437c4ee047a1f4c4377b", + "Requirements": [] }, "lazyeval": { "Package": "lazyeval", "Version": "0.2.2", "Source": "Repository", "Repository": "CRAN", - "Hash": "d908914ae53b04d4c0c0fd72ecc35370" + "Hash": "d908914ae53b04d4c0c0fd72ecc35370", + "Requirements": [] }, "leiden": { "Package": "leiden", "Version": "0.3.9", "Source": "Repository", "Repository": "CRAN", - "Hash": "d6768920a499f996e6025c5daecf33fb" + "Hash": "d6768920a499f996e6025c5daecf33fb", + "Requirements": [ + "Matrix", + "igraph", + "reticulate" + ] }, "lifecycle": { "Package": "lifecycle", "Version": "1.0.1", "Source": "Repository", "Repository": "CRAN", - "Hash": "a6b6d352e3ed897373ab19d8395c98d0" + "Hash": "a6b6d352e3ed897373ab19d8395c98d0", + "Requirements": [ + "glue", + "rlang" + ] }, "listenv": { "Package": "listenv", "Version": "0.8.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "0bde42ee282efb18c7c4e63822f5b4f7" + "Hash": "0bde42ee282efb18c7c4e63822f5b4f7", + "Requirements": [] }, "lmtest": { "Package": "lmtest", - "Version": "0.9-38", + "Version": "0.9-40", "Source": "Repository", "Repository": "CRAN", - "Hash": "b0edacc02f7a3dad41a1afc385e929f4" + "Hash": "c6fafa6cccb1e1dfe7f7d122efd6e6a7", + "Requirements": [ + "zoo" + ] }, "magrittr": { "Package": "magrittr", - "Version": "2.0.1", + "Version": "2.0.3", "Source": "Repository", "Repository": "CRAN", - "Hash": "41287f1ac7d28a92f0a286ed507928d3" + "Hash": "7ce2733a9826b3aeb1775d56fd305472", + "Requirements": [] }, "matrixStats": { "Package": "matrixStats", "Version": "0.61.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "b8e6221fc11247b12ab1b055a6f66c27" + "Hash": "b8e6221fc11247b12ab1b055a6f66c27", + "Requirements": [] }, "mgcv": { "Package": "mgcv", - "Version": "1.8-36", + "Version": "1.8-39", "Source": "Repository", "Repository": "CRAN", - "Hash": "93cc747b0e1ad882a4570463c3575c23" + "Hash": "055265005c238024e306fe0b600c89ff", + "Requirements": [ + "Matrix", + "nlme" + ] }, "mime": { "Package": "mime", - "Version": "0.11", + "Version": "0.12", "Source": "Repository", "Repository": "CRAN", - "Hash": "8974a907200fc9948d636fe7d85ca9fb" + "Hash": "18e9c28c1d3ca1560ce30658b22ce104", + "Requirements": [] }, "miniUI": { "Package": "miniUI", "Version": "0.1.1.1", "Source": "Repository", "Repository": "CRAN", - "Hash": "fec5f52652d60615fdb3957b3d74324a" + "Hash": "fec5f52652d60615fdb3957b3d74324a", + "Requirements": [ + "htmltools", + "shiny" + ] }, "munsell": { "Package": "munsell", "Version": "0.5.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "6dfe8bf774944bd5595785e3229d8771" + "Hash": "6dfe8bf774944bd5595785e3229d8771", + "Requirements": [ + "colorspace" + ] }, "nlme": { "Package": "nlme", - "Version": "3.1-152", + "Version": "3.1-155", "Source": "Repository", "Repository": "CRAN", - "Hash": "35de1ce639f20b5e10f7f46260730c65" + "Hash": "74ad940dccc9e977189a5afe5fcdb7ba", + "Requirements": [ + "lattice" + ] }, "openssl": { "Package": "openssl", - "Version": "1.4.5", + "Version": "2.0.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "5406fd37ef0bf9b88c8a4f264d6ec220" + "Hash": "cf4329aac12c2c44089974559c18e446", + "Requirements": [ + "askpass" + ] }, "parallelly": { "Package": "parallelly", - "Version": "1.28.1", + "Version": "1.30.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "5300c9fc71841550bdca64d39e82af0e" + "Hash": "67db13907a9cea89c118cf82d448799f", + "Requirements": [] }, "patchwork": { "Package": "patchwork", "Version": "1.1.1", "Source": "Repository", "Repository": "CRAN", - "Hash": "c446b30cb33ec125ff02588b60660ccb" + "Hash": "c446b30cb33ec125ff02588b60660ccb", + "Requirements": [ + "ggplot2", + "gtable" + ] }, "pbapply": { "Package": "pbapply", "Version": "1.5-0", "Source": "Repository", "Repository": "CRAN", - "Hash": "11359a5bb73622ab3f4136bf57108b64" + "Hash": "11359a5bb73622ab3f4136bf57108b64", + "Requirements": [] }, "pillar": { "Package": "pillar", - "Version": "1.6.2", + "Version": "1.7.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "43f228eb4b49093d1c8a5c93cae9efe9" + "Hash": "51dfc97e1b7069e9f7e6f83f3589c22e", + "Requirements": [ + "cli", + "crayon", + "ellipsis", + "fansi", + "glue", + "lifecycle", + "rlang", + "utf8", + "vctrs" + ] }, "pkgconfig": { "Package": "pkgconfig", "Version": "2.0.3", "Source": "Repository", "Repository": "CRAN", - "Hash": "01f28d4278f15c76cddbea05899c5d6f" + "Hash": "01f28d4278f15c76cddbea05899c5d6f", + "Requirements": [] }, "pkgload": { "Package": "pkgload", - "Version": "1.2.2", + "Version": "1.2.4", "Source": "Repository", "Repository": "CRAN", - "Hash": "53139eedf68b98eecd5289664969c3f2" + "Hash": "7533cd805940821bf23eaf3c8d4c1735", + "Requirements": [ + "cli", + "crayon", + "desc", + "rlang", + "rprojroot", + "rstudioapi", + "withr" + ] }, "plotly": { "Package": "plotly", - "Version": "4.9.4.1", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "af4b92cb3828aa30002e2f945c49c2d7" + "Version": "4.10.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "fbb11e44d057996ca5fe40d959cacfb0", + "Requirements": [ + "RColorBrewer", + "base64enc", + "crosstalk", + "data.table", + "digest", + "dplyr", + "ggplot2", + "htmltools", + "htmlwidgets", + "httr", + "jsonlite", + "lazyeval", + "magrittr", + "promises", + "purrr", + "rlang", + "scales", + "tibble", + "tidyr", + "vctrs", + "viridisLite" + ] }, "plyr": { "Package": "plyr", - "Version": "1.8.6", + "Version": "1.8.7", "Source": "Repository", "Repository": "CRAN", - "Hash": "ec0e5ab4e5f851f6ef32cd1d1984957f" + "Hash": "9c17c6ee41639ebdc1d7266546d3b627", + "Requirements": [ + "Rcpp" + ] }, "png": { "Package": "png", "Version": "0.1-7", "Source": "Repository", "Repository": "CRAN", - "Hash": "03b7076c234cb3331288919983326c55" + "Hash": "03b7076c234cb3331288919983326c55", + "Requirements": [] }, "polyclip": { "Package": "polyclip", "Version": "1.10-0", "Source": "Repository", "Repository": "CRAN", - "Hash": "cb167f328b3ada4ec5cf67a7df4c900a" + "Hash": "cb167f328b3ada4ec5cf67a7df4c900a", + "Requirements": [] }, "praise": { "Package": "praise", "Version": "1.0.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "a555924add98c99d2f411e37e7d25e9f" + "Hash": "a555924add98c99d2f411e37e7d25e9f", + "Requirements": [] }, "processx": { "Package": "processx", - "Version": "3.5.2", + "Version": "3.5.3", "Source": "Repository", "Repository": "CRAN", - "Hash": "0cbca2bc4d16525d009c4dbba156b37c" + "Hash": "8bbae1a548d0d3fdf6647bdd9d35bf6d", + "Requirements": [ + "R6", + "ps" + ] }, "promises": { "Package": "promises", "Version": "1.2.0.1", "Source": "Repository", "Repository": "CRAN", - "Hash": "4ab2c43adb4d4699cf3690acd378d75d" + "Hash": "4ab2c43adb4d4699cf3690acd378d75d", + "Requirements": [ + "R6", + "Rcpp", + "later", + "magrittr", + "rlang" + ] }, "ps": { "Package": "ps", "Version": "1.6.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "32620e2001c1dce1af49c49dccbb9420" + "Hash": "32620e2001c1dce1af49c49dccbb9420", + "Requirements": [] }, "purrr": { "Package": "purrr", "Version": "0.3.4", "Source": "Repository", "Repository": "CRAN", - "Hash": "97def703420c8ab10d8f0e6c72101e02" + "Hash": "97def703420c8ab10d8f0e6c72101e02", + "Requirements": [ + "magrittr", + "rlang" + ] }, "rappdirs": { "Package": "rappdirs", "Version": "0.3.3", "Source": "Repository", "Repository": "CRAN", - "Hash": "5e3c5dc0b071b21fa128676560dbe94d" + "Hash": "5e3c5dc0b071b21fa128676560dbe94d", + "Requirements": [] }, "rematch2": { "Package": "rematch2", "Version": "2.1.2", "Source": "Repository", "Repository": "CRAN", - "Hash": "76c9e04c712a05848ae7a23d2f170a40" + "Hash": "76c9e04c712a05848ae7a23d2f170a40", + "Requirements": [ + "tibble" + ] }, "renv": { "Package": "renv", - "Version": "0.14.0", + "Version": "0.15.4", "Source": "Repository", "Repository": "CRAN", - "Hash": "30e5eba91b67f7f4d75d31de14bbfbdc" + "Hash": "c1078316e1d4f70275fc1ea60c0bc431", + "Requirements": [] }, "reshape2": { "Package": "reshape2", "Version": "1.4.4", "Source": "Repository", "Repository": "CRAN", - "Hash": "bb5996d0bd962d214a11140d77589917" + "Hash": "bb5996d0bd962d214a11140d77589917", + "Requirements": [ + "Rcpp", + "plyr", + "stringr" + ] }, "reticulate": { "Package": "reticulate", - "Version": "1.22", + "Version": "1.24", "Source": "Repository", "Repository": "CRAN", - "Hash": "b34a8bb69005168078d1d546a53912b2" + "Hash": "ffdf27627a3c1537478073c43b6e7980", + "Requirements": [ + "Matrix", + "Rcpp", + "RcppTOML", + "here", + "jsonlite", + "png", + "rappdirs", + "withr" + ] }, "rlang": { "Package": "rlang", - "Version": "0.4.11", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "515f341d3affe0de9e4a7f762efb0456" - }, - "rmarkdown": { - "Package": "rmarkdown", - "Version": "2.11", + "Version": "1.0.2", "Source": "Repository", "Repository": "CRAN", - "Hash": "320017b52d05a943981272b295750388" + "Hash": "04884d9a75d778aca22c7154b8333ec9", + "Requirements": [] }, "rpart": { "Package": "rpart", - "Version": "4.1-15", + "Version": "4.1.16", "Source": "Repository", "Repository": "CRAN", - "Hash": "9787c1fcb680e655d062e7611cadf78e" + "Hash": "ea3ca1d9473daabb3cd0f1b4f974c1ed", + "Requirements": [] }, "rprojroot": { "Package": "rprojroot", - "Version": "2.0.2", + "Version": "2.0.3", "Source": "Repository", "Repository": "CRAN", - "Hash": "249d8cd1e74a8f6a26194a91b47f21d1" + "Hash": "1de7ab598047a87bba48434ba35d497d", + "Requirements": [] }, "rstudioapi": { "Package": "rstudioapi", "Version": "0.13", "Source": "Repository", "Repository": "CRAN", - "Hash": "06c85365a03fdaf699966cc1d3cf53ea" + "Hash": "06c85365a03fdaf699966cc1d3cf53ea", + "Requirements": [] }, "sass": { "Package": "sass", - "Version": "0.4.0", + "Version": "0.4.1", "Source": "Repository", "Repository": "CRAN", - "Hash": "50cf822feb64bb3977bda0b7091be623" + "Hash": "f37c0028d720bab3c513fd65d28c7234", + "Requirements": [ + "R6", + "fs", + "htmltools", + "rappdirs", + "rlang" + ] }, "scales": { "Package": "scales", "Version": "1.1.1", "Source": "Repository", "Repository": "CRAN", - "Hash": "6f76f71042411426ec8df6c54f34e6dd" + "Hash": "6f76f71042411426ec8df6c54f34e6dd", + "Requirements": [ + "R6", + "RColorBrewer", + "farver", + "labeling", + "lifecycle", + "munsell", + "viridisLite" + ] }, "scattermore": { "Package": "scattermore", - "Version": "0.7", + "Version": "0.8", "Source": "Repository", "Repository": "CRAN", - "Hash": "373eb417aadd7c7f35861953f3fe3deb" + "Hash": "77ef398f338597b86a3d6853c585ce38", + "Requirements": [ + "ggplot2", + "scales" + ] }, "sctransform": { "Package": "sctransform", - "Version": "0.3.2", + "Version": "0.3.3", "Source": "Repository", "Repository": "CRAN", - "Hash": "f6423b883393222fb5c022799374ccec" + "Hash": "85ff27d18c45a92993f6d7fd09c41f79", + "Requirements": [ + "MASS", + "Matrix", + "Rcpp", + "RcppArmadillo", + "dplyr", + "future", + "future.apply", + "ggplot2", + "gridExtra", + "magrittr", + "matrixStats", + "reshape2", + "rlang" + ] }, "shiny": { "Package": "shiny", - "Version": "1.7.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "72bf551406bb75a182df55090dc5a25a" + "Version": "1.7.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "00344c227c7bd0ab5d78052c5d736c44", + "Requirements": [ + "R6", + "bslib", + "cachem", + "commonmark", + "crayon", + "ellipsis", + "fastmap", + "fontawesome", + "glue", + "htmltools", + "httpuv", + "jsonlite", + "later", + "lifecycle", + "mime", + "promises", + "rlang", + "sourcetools", + "withr", + "xtable" + ] }, "sitmo": { "Package": "sitmo", - "Version": "2.0.1", + "Version": "2.0.2", "Source": "Repository", "Repository": "CRAN", - "Hash": "0f9ba299f2385e686745b066c6d7a7c4" + "Hash": "c956d93f6768a9789edbc13072b70c78", + "Requirements": [ + "Rcpp" + ] }, "sourcetools": { "Package": "sourcetools", "Version": "0.1.7", "Source": "Repository", "Repository": "CRAN", - "Hash": "947e4e02a79effa5d512473e10f41797" + "Hash": "947e4e02a79effa5d512473e10f41797", + "Requirements": [] }, "spam": { "Package": "spam", - "Version": "2.7-0", + "Version": "2.8-0", "Source": "Repository", "Repository": "CRAN", - "Hash": "1abde1bf414425ea288fd9e277d6e410" + "Hash": "516ffcf193fa4f07683611a4474db22b", + "Requirements": [ + "dotCall64" + ] }, "spatstat.core": { "Package": "spatstat.core", - "Version": "2.3-0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "dda7556b6adf3f14c4cce941df3c30ba" + "Version": "2.4-2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "1f996e01b9890a001823bd97818e75e9", + "Requirements": [ + "Matrix", + "abind", + "goftest", + "mgcv", + "nlme", + "rpart", + "spatstat.data", + "spatstat.geom", + "spatstat.random", + "spatstat.sparse", + "spatstat.utils", + "tensor" + ] }, "spatstat.data": { "Package": "spatstat.data", - "Version": "2.1-0", + "Version": "2.1-4", "Source": "Repository", "Repository": "CRAN", - "Hash": "4e8002e034d7d0af852b2bbcce851c2e" + "Hash": "cc7222397c84a957caaff24d6260bf5a", + "Requirements": [ + "Matrix", + "spatstat.utils" + ] }, "spatstat.geom": { "Package": "spatstat.geom", - "Version": "2.2-2", + "Version": "2.4-0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "33f4612dc27fa9228cac4713bb2edfd7", + "Requirements": [ + "deldir", + "polyclip", + "spatstat.data", + "spatstat.utils" + ] + }, + "spatstat.random": { + "Package": "spatstat.random", + "Version": "2.2-0", "Source": "Repository", "Repository": "CRAN", - "Hash": "abf3bf1c02e9af3be5b6817d463dc064" + "Hash": "c08d6e00855f11e04ac863dce8d6779e", + "Requirements": [ + "spatstat.data", + "spatstat.geom", + "spatstat.utils" + ] }, "spatstat.sparse": { "Package": "spatstat.sparse", - "Version": "2.0-0", + "Version": "2.1-0", "Source": "Repository", "Repository": "CRAN", - "Hash": "615efff0d33e612b15dc3fc3ba0cc554" + "Hash": "66728f844c20c0a3a17f1f7ca62dcae6", + "Requirements": [ + "Matrix", + "abind", + "spatstat.utils", + "tensor" + ] }, "spatstat.utils": { "Package": "spatstat.utils", - "Version": "2.2-0", + "Version": "2.3-0", "Source": "Repository", "Repository": "CRAN", - "Hash": "a1519c316a49d8041bdc6f37fd0249e2" + "Hash": "814e4fc977efc125d8bc51ca7b49f866", + "Requirements": [] }, "stringi": { "Package": "stringi", - "Version": "1.7.4", + "Version": "1.7.6", "Source": "Repository", "Repository": "CRAN", - "Hash": "ebaccb577da50829a3bb1b8296f318a5" + "Hash": "bba431031d30789535745a9627ac9271", + "Requirements": [] }, "stringr": { "Package": "stringr", "Version": "1.4.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "0759e6b6c0957edb1311028a49a35e76" + "Hash": "0759e6b6c0957edb1311028a49a35e76", + "Requirements": [ + "glue", + "magrittr", + "stringi" + ] }, "survival": { "Package": "survival", - "Version": "3.2-12", + "Version": "3.2-13", "Source": "Repository", "Repository": "CRAN", - "Hash": "dcdc241a75d17656fec8d02cd8215c39" + "Hash": "6f0a0fadc63bc6570fe172770f15bbc4", + "Requirements": [ + "Matrix" + ] }, "sys": { "Package": "sys", "Version": "3.4", "Source": "Repository", "Repository": "CRAN", - "Hash": "b227d13e29222b4574486cfcbde077fa" + "Hash": "b227d13e29222b4574486cfcbde077fa", + "Requirements": [] }, "tensor": { "Package": "tensor", "Version": "1.5", "Source": "Repository", "Repository": "CRAN", - "Hash": "25cfab6cf405c15bccf7e69ec39df090" + "Hash": "25cfab6cf405c15bccf7e69ec39df090", + "Requirements": [] }, "testthat": { "Package": "testthat", - "Version": "3.0.4", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "575216c9946ca70016c3ffb9c31709ba" + "Version": "3.1.3", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "affcf9db2c99dd2c7e6459ef55ed3385", + "Requirements": [ + "R6", + "brio", + "callr", + "cli", + "crayon", + "desc", + "digest", + "ellipsis", + "evaluate", + "jsonlite", + "lifecycle", + "magrittr", + "pkgload", + "praise", + "processx", + "ps", + "rlang", + "waldo", + "withr" + ] }, "tibble": { "Package": "tibble", - "Version": "3.1.4", + "Version": "3.1.6", "Source": "Repository", "Repository": "CRAN", - "Hash": "5e8ad5621e5c94b24ec07b88eee13df8" + "Hash": "8a8f02d1934dfd6431c671361510dd0b", + "Requirements": [ + "ellipsis", + "fansi", + "lifecycle", + "magrittr", + "pillar", + "pkgconfig", + "rlang", + "vctrs" + ] }, "tidyr": { "Package": "tidyr", - "Version": "1.1.3", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "450d7dfaedde58e28586b854eeece4fa" + "Version": "1.2.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "d8b95b7fee945d7da6888cf7eb71a49c", + "Requirements": [ + "cpp11", + "dplyr", + "ellipsis", + "glue", + "lifecycle", + "magrittr", + "purrr", + "rlang", + "tibble", + "tidyselect", + "vctrs" + ] }, "tidyselect": { "Package": "tidyselect", - "Version": "1.1.1", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "7243004a708d06d4716717fa1ff5b2fe" - }, - "tinytex": { - "Package": "tinytex", - "Version": "0.33", + "Version": "1.1.2", "Source": "Repository", "Repository": "CRAN", - "Hash": "6e0ad90ac5669e35d5456cb61b295acb" + "Hash": "17f6da8cfd7002760a859915ce7eef8f", + "Requirements": [ + "ellipsis", + "glue", + "purrr", + "rlang", + "vctrs" + ] }, "utf8": { "Package": "utf8", "Version": "1.2.2", "Source": "Repository", "Repository": "CRAN", - "Hash": "c9c462b759a5cc844ae25b5942654d13" + "Hash": "c9c462b759a5cc844ae25b5942654d13", + "Requirements": [] }, "uwot": { "Package": "uwot", - "Version": "0.1.10", + "Version": "0.1.11", "Source": "Repository", "Repository": "CRAN", - "Hash": "a9737c75f5f949695617b05e78281b2f" + "Hash": "f83dba1458cca3b1523e27944edb9da5", + "Requirements": [ + "FNN", + "Matrix", + "RSpectra", + "Rcpp", + "RcppAnnoy", + "RcppProgress", + "dqrng", + "irlba" + ] }, "vctrs": { "Package": "vctrs", - "Version": "0.3.8", + "Version": "0.4.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "ecf749a1b39ea72bd9b51b76292261f1" + "Hash": "95c2573b232eac82df562f9e300f9790", + "Requirements": [ + "cli", + "glue", + "rlang" + ] }, "viridisLite": { "Package": "viridisLite", "Version": "0.4.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "55e157e2aa88161bdb0754218470d204" + "Hash": "55e157e2aa88161bdb0754218470d204", + "Requirements": [] }, "waldo": { "Package": "waldo", - "Version": "0.3.1", + "Version": "0.4.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "ad8cfff5694ac5b3c354f8f2044bd976" + "Hash": "035fba89d0c86e2113120f93301b98ad", + "Requirements": [ + "cli", + "diffobj", + "fansi", + "glue", + "rematch2", + "rlang", + "tibble" + ] }, "withr": { "Package": "withr", - "Version": "2.4.2", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "ad03909b44677f930fa156d47d7a3aeb" - }, - "xfun": { - "Package": "xfun", - "Version": "0.26", + "Version": "2.5.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "a270216f7ffda25e53298293046d1d05" + "Hash": "c0e49a9760983e81e55cdd9be92e7182", + "Requirements": [] }, "xtable": { "Package": "xtable", "Version": "1.8-4", "Source": "Repository", "Repository": "CRAN", - "Hash": "b8acdf8af494d9ec19ccb2481a9b11c2" + "Hash": "b8acdf8af494d9ec19ccb2481a9b11c2", + "Requirements": [] }, "yaml": { "Package": "yaml", - "Version": "2.2.1", + "Version": "2.3.5", "Source": "Repository", "Repository": "CRAN", - "Hash": "2826c5d9efb0a88f657c7a679c7106db" + "Hash": "458bb38374d73bf83b1bb85e353da200", + "Requirements": [] }, "zoo": { "Package": "zoo", "Version": "1.8-9", "Source": "Repository", "Repository": "CRAN", - "Hash": "035d1c7c12593038c26fb1c2fd40c4d2" + "Hash": "035d1c7c12593038c26fb1c2fd40c4d2", + "Requirements": [ + "lattice" + ] } } } diff --git a/renv/.gitignore b/renv/.gitignore index 5246cf196..275e4ca34 100644 --- a/renv/.gitignore +++ b/renv/.gitignore @@ -1,5 +1,6 @@ -local/ library/ +local/ +cellar/ lock/ python/ staging/ diff --git a/renv/activate.R b/renv/activate.R index 304fd900a..e96125195 100644 --- a/renv/activate.R +++ b/renv/activate.R @@ -2,32 +2,50 @@ local({ # the requested version of renv - version <- "0.14.0" + version <- "0.15.4" # the project directory project <- getwd() - # allow environment variable to control activation - activate <- Sys.getenv("RENV_ACTIVATE_PROJECT") - if (!nzchar(activate)) { + # figure out whether the autoloader is enabled + enabled <- local({ - # don't auto-activate when R CMD INSTALL is running - if (nzchar(Sys.getenv("R_INSTALL_PKG"))) - return(FALSE) + # first, check config option + override <- getOption("renv.config.autoloader.enabled") + if (!is.null(override)) + return(override) - } + # next, check environment variables + # TODO: prefer using the configuration one in the future + envvars <- c( + "RENV_CONFIG_AUTOLOADER_ENABLED", + "RENV_AUTOLOADER_ENABLED", + "RENV_ACTIVATE_PROJECT" + ) + + for (envvar in envvars) { + envval <- Sys.getenv(envvar, unset = NA) + if (!is.na(envval)) + return(tolower(envval) %in% c("true", "t", "1")) + } + + # enable by default + TRUE + + }) - # bail if activation was explicitly disabled - if (tolower(activate) %in% c("false", "f", "0")) + if (!enabled) return(FALSE) # avoid recursion - if (nzchar(Sys.getenv("RENV_R_INITIALIZING"))) + if (identical(getOption("renv.autoloader.running"), TRUE)) { + warning("ignoring recursive attempt to run renv autoloader") return(invisible(TRUE)) + } # signal that we're loading renv during R startup - Sys.setenv("RENV_R_INITIALIZING" = "true") - on.exit(Sys.unsetenv("RENV_R_INITIALIZING"), add = TRUE) + options(renv.autoloader.running = TRUE) + on.exit(options(renv.autoloader.running = NULL), add = TRUE) # signal that we've consented to use renv options(renv.consent = TRUE) @@ -36,21 +54,15 @@ local({ # mask 'utils' packages, will come first on the search path library(utils, lib.loc = .Library) - # check to see if renv has already been loaded - if ("renv" %in% loadedNamespaces()) { - - # if renv has already been loaded, and it's the requested version of renv, - # nothing to do - spec <- .getNamespaceInfo(.getNamespace("renv"), "spec") - if (identical(spec[["version"]], version)) - return(invisible(TRUE)) - - # otherwise, unload and attempt to load the correct version of renv + # unload renv if it's already been laoded + if ("renv" %in% loadedNamespaces()) unloadNamespace("renv") - } - # load bootstrap tools + `%||%` <- function(x, y) { + if (is.environment(x) || length(x)) x else y + } + bootstrap <- function(version, library) { # attempt to download renv @@ -76,6 +88,11 @@ local({ if (!is.na(repos)) return(repos) + # check for lockfile repositories + repos <- tryCatch(renv_bootstrap_repos_lockfile(), error = identity) + if (!inherits(repos, "error") && length(repos)) + return(repos) + # if we're testing, re-use the test repositories if (renv_bootstrap_tests_running()) return(getOption("renv.tests.repos")) @@ -100,6 +117,30 @@ local({ } + renv_bootstrap_repos_lockfile <- function() { + + lockpath <- Sys.getenv("RENV_PATHS_LOCKFILE", unset = "renv.lock") + if (!file.exists(lockpath)) + return(NULL) + + lockfile <- tryCatch(renv_json_read(lockpath), error = identity) + if (inherits(lockfile, "error")) { + warning(lockfile) + return(NULL) + } + + repos <- lockfile$R$Repositories + if (length(repos) == 0) + return(NULL) + + keys <- vapply(repos, `[[`, "Name", FUN.VALUE = character(1)) + vals <- vapply(repos, `[[`, "URL", FUN.VALUE = character(1)) + names(vals) <- keys + + return(vals) + + } + renv_bootstrap_download <- function(version) { # if the renv version number has 4 components, assume it must @@ -107,16 +148,20 @@ local({ nv <- numeric_version(version) components <- unclass(nv)[[1]] - methods <- if (length(components) == 4L) { - list( + # if this appears to be a development version of 'renv', we'll + # try to restore from github + dev <- length(components) == 4L + + # begin collecting different methods for finding renv + methods <- c( + renv_bootstrap_download_tarball, + if (dev) renv_bootstrap_download_github - ) - } else { - list( + else c( renv_bootstrap_download_cran_latest, renv_bootstrap_download_cran_archive ) - } + ) for (method in methods) { path <- tryCatch(method(version), error = identity) @@ -253,6 +298,33 @@ local({ } + renv_bootstrap_download_tarball <- function(version) { + + # if the user has provided the path to a tarball via + # an environment variable, then use it + tarball <- Sys.getenv("RENV_BOOTSTRAP_TARBALL", unset = NA) + if (is.na(tarball)) + return() + + # allow directories + info <- file.info(tarball, extra_cols = FALSE) + if (identical(info$isdir, TRUE)) { + name <- sprintf("renv_%s.tar.gz", version) + tarball <- file.path(tarball, name) + } + + # bail if it doesn't exist + if (!file.exists(tarball)) + return() + + fmt <- "* Bootstrapping with tarball at path '%s'." + msg <- sprintf(fmt, tarball) + message(msg) + + tarball + + } + renv_bootstrap_download_github <- function(version) { enabled <- Sys.getenv("RENV_BOOTSTRAP_FROM_GITHUB", unset = "TRUE") @@ -306,7 +378,13 @@ local({ bin <- R.home("bin") exe <- if (Sys.info()[["sysname"]] == "Windows") "R.exe" else "R" r <- file.path(bin, exe) - args <- c("--vanilla", "CMD", "INSTALL", "-l", shQuote(library), shQuote(tarball)) + + args <- c( + "--vanilla", "CMD", "INSTALL", "--no-multiarch", + "-l", shQuote(path.expand(library)), + shQuote(path.expand(tarball)) + ) + output <- system2(r, args, stdout = TRUE, stderr = TRUE) message("Done!") @@ -499,18 +577,33 @@ local({ renv_bootstrap_library_root <- function(project) { + prefix <- renv_bootstrap_profile_prefix() + path <- Sys.getenv("RENV_PATHS_LIBRARY", unset = NA) if (!is.na(path)) - return(path) + return(paste(c(path, prefix), collapse = "/")) - path <- Sys.getenv("RENV_PATHS_LIBRARY_ROOT", unset = NA) - if (!is.na(path)) { + path <- renv_bootstrap_library_root_impl(project) + if (!is.null(path)) { name <- renv_bootstrap_library_root_name(project) - return(file.path(path, name)) + return(paste(c(path, prefix, name), collapse = "/")) } - prefix <- renv_bootstrap_profile_prefix() - paste(c(project, prefix, "renv/library"), collapse = "/") + renv_bootstrap_paths_renv("library", project = project) + + } + + renv_bootstrap_library_root_impl <- function(project) { + + root <- Sys.getenv("RENV_PATHS_LIBRARY_ROOT", unset = NA) + if (!is.na(root)) + return(root) + + type <- renv_bootstrap_project_type(project) + if (identical(type, "package")) { + userdir <- renv_bootstrap_user_dir() + return(file.path(userdir, "library")) + } } @@ -576,7 +669,7 @@ local({ return(profile) # check for a profile file (nothing to do if it doesn't exist) - path <- file.path(project, "renv/local/profile") + path <- renv_bootstrap_paths_renv("profile", profile = FALSE) if (!file.exists(path)) return(NULL) @@ -587,7 +680,7 @@ local({ # set RENV_PROFILE profile <- contents[[1L]] - if (nzchar(profile)) + if (!profile %in% c("", "default")) Sys.setenv(RENV_PROFILE = profile) profile @@ -597,7 +690,7 @@ local({ renv_bootstrap_profile_prefix <- function() { profile <- renv_bootstrap_profile_get() if (!is.null(profile)) - return(file.path("renv/profiles", profile)) + return(file.path("profiles", profile, "renv")) } renv_bootstrap_profile_get <- function() { @@ -621,6 +714,178 @@ local({ profile } + + renv_bootstrap_path_absolute <- function(path) { + + substr(path, 1L, 1L) %in% c("~", "/", "\\") || ( + substr(path, 1L, 1L) %in% c(letters, LETTERS) && + substr(path, 2L, 3L) %in% c(":/", ":\\") + ) + + } + + renv_bootstrap_paths_renv <- function(..., profile = TRUE, project = NULL) { + renv <- Sys.getenv("RENV_PATHS_RENV", unset = "renv") + root <- if (renv_bootstrap_path_absolute(renv)) NULL else project + prefix <- if (profile) renv_bootstrap_profile_prefix() + components <- c(root, renv, prefix, ...) + paste(components, collapse = "/") + } + + renv_bootstrap_project_type <- function(path) { + + descpath <- file.path(path, "DESCRIPTION") + if (!file.exists(descpath)) + return("unknown") + + desc <- tryCatch( + read.dcf(descpath, all = TRUE), + error = identity + ) + + if (inherits(desc, "error")) + return("unknown") + + type <- desc$Type + if (!is.null(type)) + return(tolower(type)) + + package <- desc$Package + if (!is.null(package)) + return("package") + + "unknown" + + } + + renv_bootstrap_user_dir <- function() { + dir <- renv_bootstrap_user_dir_impl() + path.expand(chartr("\\", "/", dir)) + } + + renv_bootstrap_user_dir_impl <- function() { + + # use local override if set + override <- getOption("renv.userdir.override") + if (!is.null(override)) + return(override) + + # use R_user_dir if available + tools <- asNamespace("tools") + if (is.function(tools$R_user_dir)) + return(tools$R_user_dir("renv", "cache")) + + # try using our own backfill for older versions of R + envvars <- c("R_USER_CACHE_DIR", "XDG_CACHE_HOME") + for (envvar in envvars) { + root <- Sys.getenv(envvar, unset = NA) + if (!is.na(root)) + return(file.path(root, "R/renv")) + } + + # use platform-specific default fallbacks + if (Sys.info()[["sysname"]] == "Windows") + file.path(Sys.getenv("LOCALAPPDATA"), "R/cache/R/renv") + else if (Sys.info()[["sysname"]] == "Darwin") + "~/Library/Caches/org.R-project.R/R/renv" + else + "~/.cache/R/renv" + + } + + + renv_json_read <- function(file = NULL, text = NULL) { + + text <- paste(text %||% read(file), collapse = "\n") + + # find strings in the JSON + pattern <- '["](?:(?:\\\\.)|(?:[^"\\\\]))*?["]' + locs <- gregexpr(pattern, text)[[1]] + + # if any are found, replace them with placeholders + replaced <- text + strings <- character() + replacements <- character() + + if (!identical(c(locs), -1L)) { + + # get the string values + starts <- locs + ends <- locs + attr(locs, "match.length") - 1L + strings <- substring(text, starts, ends) + + # only keep those requiring escaping + strings <- grep("[[\\]{}:]", strings, perl = TRUE, value = TRUE) + + # compute replacements + replacements <- sprintf('"\032%i\032"', seq_along(strings)) + + # replace the strings + mapply(function(string, replacement) { + replaced <<- sub(string, replacement, replaced, fixed = TRUE) + }, strings, replacements) + + } + + # transform the JSON into something the R parser understands + transformed <- replaced + transformed <- gsub("[[{]", "list(", transformed) + transformed <- gsub("[]}]", ")", transformed) + transformed <- gsub(":", "=", transformed, fixed = TRUE) + text <- paste(transformed, collapse = "\n") + + # parse it + json <- parse(text = text, keep.source = FALSE, srcfile = NULL)[[1L]] + + # construct map between source strings, replaced strings + map <- as.character(parse(text = strings)) + names(map) <- as.character(parse(text = replacements)) + + # convert to list + map <- as.list(map) + + # remap strings in object + remapped <- renv_json_remap(json, map) + + # evaluate + eval(remapped, envir = baseenv()) + + } + + renv_json_remap <- function(json, map) { + + # fix names + if (!is.null(names(json))) { + lhs <- match(names(json), names(map), nomatch = 0L) + rhs <- match(names(map), names(json), nomatch = 0L) + names(json)[rhs] <- map[lhs] + } + + # fix values + if (is.character(json)) + return(map[[json]] %||% json) + + # handle true, false, null + if (is.name(json)) { + text <- as.character(json) + if (text == "true") + return(TRUE) + else if (text == "false") + return(FALSE) + else if (text == "null") + return(NULL) + } + + # recurse + if (is.recursive(json)) { + for (i in seq_along(json)) { + json[i] <- list(renv_json_remap(json[[i]], map)) + } + } + + json + + } # load the renv profile, if any renv_bootstrap_profile_load(project) diff --git a/renv/settings.dcf b/renv/settings.dcf index bf722e818..169d82f1b 100644 --- a/renv/settings.dcf +++ b/renv/settings.dcf @@ -1,7 +1,10 @@ +bioconductor.version: external.libraries: ignored.packages: package.dependency.fields: Imports, Depends, LinkingTo r.version: snapshot.type: implicit use.cache: TRUE +vcs.ignore.cellar: TRUE vcs.ignore.library: TRUE +vcs.ignore.local: TRUE From b4fdcbd8b7509dff1e914435f7ba7ffc7b318260 Mon Sep 17 00:00:00 2001 From: austinhartman Date: Mon, 11 Apr 2022 12:53:07 -0400 Subject: [PATCH 127/979] add atomic integration vignette --- vignettes/atomic_integration.Rmd | 191 +++++++++++++++++++++++++++++++ vignettes/get_started.Rmd | 2 +- vignettes/vignettes.yaml | 14 ++- 3 files changed, 205 insertions(+), 2 deletions(-) create mode 100755 vignettes/atomic_integration.Rmd diff --git a/vignettes/atomic_integration.Rmd b/vignettes/atomic_integration.Rmd new file mode 100755 index 000000000..fedce4754 --- /dev/null +++ b/vignettes/atomic_integration.Rmd @@ -0,0 +1,191 @@ +--- +title: "Atomic sketch integration for scRNA-seq data" +output: + html_document: + df_print: paged +--- + +```{r setup, include=FALSE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + tidy.opts = list(width.cutoff = 95), + fig.width = 10, + message = FALSE, + warning = FALSE, + time_it = TRUE +) +``` + + +The recent increase in publicly available single-cell datasets poses a significant challenge for integrative analysis. For example, multiple tissues have now been profiled across dozens of studies, representing hundreds of individuals and millions of cells. In [Hao et al, 2022](https://www.biorxiv.org/content/10.1101/2022.02.24.481684v1) proposed a dictionary learning based method, atomic sketch integration, could also enable efficient and large-scale integrative analysis. Our procedure enables the integration of large compendiums of datasets without ever needing to load the full scale of data into memory at once. In [our manuscript](https://www.biorxiv.org/content/10.1101/2022.02.24.481684v1) we use atomic sketch integration to integrate millions of scRNA-seq from human lung and human PBMC. + +In this vignette, we demonstrate how to use atomic sketch integration to harmonize scRNA-seq experiments from five studies, each profiling of human immune cells (PBMC) from COVID patients. Specifically, we demonstrate how to perform the following steps + +* Sample a representative subset of cells ('atoms') from each dataset +* Integrate the atoms from each dataset +* Reconstruct (integrate) the full datasets, based on the atoms + +First, we install the updated version of Seurat that supports this infrastructure, as well as other packages necessary for this vignette. + +```{r install, eval=FALSE} +if (!requireNamespace("remotes", quietly = TRUE)) { + install.packages("remotes") +} +remotes::install_github("satijalab/seurat", "feat/dictionary") +``` + +```{r message=FALSE, warning=FALSE} +library(Seurat) +library(SeuratDisk) +library(patchwork) +``` + +## Downloading datasets + +We obtained datasets in h5seurat format from a public [resource compiled by the Gottardo Lab](https://atlas.fredhutch.org/fredhutch/covid/). In this analysis, we use the [Arunachalam](https://s3.us-west-2.amazonaws.com/atlas.fredhutch.org/data/hutch/covid19/downloads/arunachalam_2020_processed.HDF5), [Combes](https://s3.us-west-2.amazonaws.com/atlas.fredhutch.org/data/hutch/covid19/downloads/combes_2021_processed.HDF5), [Lee](https://s3.us-west-2.amazonaws.com/atlas.fredhutch.org/data/hutch/covid19/downloads/lee_2020_processed.HDF5), [Wilk](https://s3.us-west-2.amazonaws.com/atlas.fredhutch.org/data/hutch/covid19/downloads/wilk_2020_processed.HDF5), and [Yao](https://s3.us-west-2.amazonaws.com/atlas.fredhutch.org/data/hutch/covid19/downloads/yao_2021_processed.HDF5) datasets, but you can download additional data from this resource and include it in the vignette below. + +## Sample representative atoms from each dataset + +Inspired by pioneering work aiming to identify ['sketches'](https://www.sciencedirect.com/science/article/pii/S2405471219301528) of scRNA-seq data, our first step is to sample a representative set of cells from each dataset. We compute a leverage score (estimate of ['statistical leverage'](https://arxiv.org/abs/1109.3843)) for each cell, which helps to identify cells that are likely to be member of rare subpopulations and ensure that these are included in our representative sample. Importantly, the estimation of leverage scores only requires data normalization, can be computed efficiently for sparse datasets, and does not require any intensive computation or dimensional reduction steps. + +We load each object separately, perform basic preprocessing (normalization and variable feature selection), and select and store 5,000 representative cells (which we call 'atoms') from each dataset. We then delete the full dataset from memory, before loading the next one in. + +```{r init, results='hide', message=FALSE, fig.keep='none'} + +file.dir <- '/brahms/haoy/vignette_data/PBMCVignette/' +files.set <- c("arunachalam_2020_processed.HDF5", "combes_2021_processed.HDF5","lee_2020_processed.HDF5","wilk_2020_processed.HDF5","yao_2021_processed.HDF5") + +atoms.list <- list() +for (i in 1:length(files.set)) { + + # load in Seurat object + object <- LoadH5Seurat(file = paste0(file.dir ,files.set[i]), assays = 'RNA') + dataset_name <- gsub("_processed.HDF5", "", files.set[i]) + object$dataset <- dataset_name + + # Rename cells to avoid future conflicts + object <- RenameCells(object = object, add.cell.id = dataset_name) + + # basic preprocessing + object <- NormalizeData(object) + object <- FindVariableFeatures(object) + + # calculate leverage score and sample 5000 cells based on leverage score + atoms.i <- LeverageScoreSampling(object = object, num.cells = 5000) + atoms.list[[i]] <- atoms.i +} + +# delete full object from memory +# note that this is optional, if you can store the full datasets in memory, you dont have to reload them later +rm(object) +``` + + +## Perform integration on the atoms from different datasets + +Next we perform integrative analysis on the 'atoms' from each of the datasets. Here, we utilize a new wrapper function that takes a list of Seurat object and runs an optimized version of the [Fast integration using reciprocal PCA](https://satijalab.org/seurat/articles/integration_rpca.html) in Seurat workflow. The function performs all corrections in low-dimensional space (rather than on the expression values themselves) to further improve speed and memory usage, and outputs a merged Seurat object where all cells have been placed in an integrated low-dimensional space (stored as `integrated_dr`). We perform SCTransform normalization prior to performing integration, but this step is optional. + +However, we emphasize that you can perform integration here using any analysis technique that places cells across datasets into a shared space. For example, we also demonstrate below how to use [Harmony](https://github.com/immunogenomics/harmony), as an alternative integration approach. + + +```{r fast.integration} +# optional step: SCTransform normalization +for (i in 1:length(atoms.list)) { + atoms.list[[i]] <- SCTransform(atoms.list[[i]], verbose = FALSE) +} + +# perform integration +features <- SelectIntegrationFeatures(object.list = atoms.list) +atoms.merge <- FastRPCAIntegration(object.list = atoms.list, dims = 1:30, normalization.method = 'SCT', anchor.features = features) + +# we can generate a 2D visualization representing the integrated atoms +atom.reduction <- 'integrated_dr' +atoms.merge <- RunUMAP(atoms.merge, reduction = atom.reduction, dims = 1:30, return.model = TRUE) +DimPlot(atoms.merge, group.by = 'dataset') +``` + +
+ **Alternative: integrate atoms using Harmony** + +As an alternative approach to integrate atoms, and to demonstrate the flexibility of our atomic sketch procedure, we can also use the [Harmony within the Seurat workflow](https://github.com/immunogenomics/harmony) to integrate the atoms. The integration procedure returns a Seurat object with a low-dimensional space (stored as the `harmony` dimensional reduction) that jointly represents atoms from all datasets. + +```{r, eval = FALSE} +library(harmony) +atoms.merge <- merge(atoms.list[[1]], atoms.list[2:length(atoms.list)]) +VariableFeatures(atoms.merge) <- SelectIntegrationFeatures(object.list = atoms.list) +atoms.merge <- ScaleData(atoms.merge) +atoms.merge <- RunPCA(atoms.merge) +atoms.merge <- RunHarmony(atoms.merge, project.dim = FALSE, group.by.vars = 'dataset') +atom.reduction <- 'harmony' + +atoms.merge <- RunUMAP(atoms.merge, reduction = atom.reduction, dims = 1:30, return.model = TRUE) +DimPlot(atoms.merge, group.by = 'dataset') +``` + +
+ +--- + +## Integrate all cells from all datasets + +Now that we have integrated the subset of atoms of each dataset, placing them each in an integrated low-dimensional space, we can now place each cell from each dataset in this space as well. We load the full datasets back in individually, and use the `IntegrateSketchEmbeddings` function to integrate all cells. After this function is run, each cell in the object has a + +```{r load.full.data} +integrated_objects <- list() +for (i in 1:length(files.set)) { + + # load in Seurat object / basic preprocessing + object <- LoadH5Seurat(file = paste0(file.dir , files.set[i]), assays = 'RNA') + dataset_name <- gsub("_processed.HDF5", "", files.set[i]) + object$dataset <- dataset_name + object <- RenameCells(object = object, add.cell.id = dataset_name) + object <- NormalizeData(object) + + # Integrate all cells into the same space as the atoms + object <- IntegrateSketchEmbeddings(object = object, atom.sketch.object = atoms.merge, atom.sketch.reduction = atom.reduction, features = features) + + # At this point, you can save the results/delete the object + # Since we want to compute a joint visualization of all cells later, + # we save the object with the dimensional reduction and just the top 100 variable features + object <- DietSeurat(object, features = features[1:100], dimreducs = 'integrated_dr') + integrated_objects[[i]] <- object + rm(object) +} +``` + +We perform UMAP visualization on the integrated embeddings. +```{r } +obj.merge <- merge(integrated_objects[[1]], integrated_objects[2:length(integrated_objects)], merge.dr = 'integrated_dr') +obj.merge <- RunUMAP(obj.merge, reduction = 'integrated_dr', dims = 1:30) +``` + +Now we can visualize the results, plotting the scRNA-seq cells based on dataset batches and pre-annotated labels annotations on the UMAP embedding. We also add pre-computed cell annotations to this object (you can download the cell annotation metadata at [this link](https://seurat.nygenome.org/vignette_data/atomic_integration/pbmc_annotations.txt)). + +```{r split.dim} +annotation_data <- read.table("/brahms/haoy/vignette_data/PBMCVignette/pbmc_annotations.txt") +obj.merge <- AddMetaData(obj.merge, metadata = annotation_data) +DimPlot(obj.merge, reduction = "umap", group.by = "dataset", shuffle = TRUE, raster = FALSE) +DimPlot(obj.merge, reduction = "umap", group.by = "celltype.l2", raster = FALSE) +``` + +Note that Neutrophils are present primarily in a single dataset (Combes), present at very low frequency in two others (Wilk and Lee), and absent in the remaining datasets. Despite the fact that this population is not present in all samples, it is correctly integrated by our atomic sketch procedure. + + +
+ **Session Info** +```{r} +sessionInfo() +``` +
diff --git a/vignettes/get_started.Rmd b/vignettes/get_started.Rmd index fd28ff622..2193aa3e3 100644 --- a/vignettes/get_started.Rmd +++ b/vignettes/get_started.Rmd @@ -105,7 +105,7 @@ We provide a series of vignettes, tutorials, and analysis walkthroughs to help u For new users of Seurat, we suggest starting with a guided walk through of a dataset of 2,700 Peripheral Blood Mononuclear Cells (PBMCs) made publicly available by 10X Genomics. This tutorial implements the major components of a standard unsupervised clustering workflow including QC and data filtration, calculation of high-variance genes, dimensional reduction, graph-based clustering, and the identification of cluster markers. -We provide additional introductory vignettes for users who are interested in analyzing multimodal single-cell datasets (e.g. from CITE-seq, or the 10x mulitome kit), or spatial datasets (e.g. from 10x visium or SLIDE-seq). +We provide additional introductory vignettes for users who are interested in analyzing multimodal single-cell datasets (e.g. from CITE-seq, or the 10x multiome kit), or spatial datasets (e.g. 10x Visium or Vizgen MERFISH). ```{r results='asis', echo=FALSE, warning=FALSE, message = FALSE} make_vignette_card_section(vdat = vdat, cat = 1) diff --git a/vignettes/vignettes.yaml b/vignettes/vignettes.yaml index 9e7076f59..74d441f11 100644 --- a/vignettes/vignettes.yaml +++ b/vignettes/vignettes.yaml @@ -12,12 +12,18 @@ An introduction to working with multi-modal datasets in Seurat. image: citeseq_plot.jpg - - title: Analysis of spatial datasets + - title: Analysis of spatial datasets (Sequencing-based) name: spatial_vignette summary: | Learn to explore spatially-resolved transcriptomic data with examples from 10x Visium and Slide-seq v2. image: spatial_vignette_ttr.jpg + - title: Analysis of spatial datasets (Imaging-based) + name: spatial_vignette_2 + summary: | + Learn to explore spatially-resolved data from multiplexed imaging technologies, including MERFISH, Nanostring SMI, and CODEX. + image: spatial_vignette_2.png + - category: Data Integration vignettes: - title: Introduction to scRNA-seq integration @@ -88,6 +94,12 @@ Map scATAC-seq onto an scRNA-seq reference using a multi-omic bridge dataset. image: bridge_integration.png + - title: Atomic sketch integration for scRNA-seq + name: atomic_integration + summary: | + Perform community-scale integration of scRNA-seq datasets by atomic sketch integration. + image: atomic_integration.png + - category: Other vignettes: - title: Visualization From 4189b3c0e8260b52557607825a500f01bb7fdc98 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 13 Apr 2022 18:58:14 -0400 Subject: [PATCH 128/979] Don't redefine %||% and %iff% Add new abbreviation utility --- R/reexports.R | 12 ++++++++++ R/utilities.R | 61 +++++++++++++++++++++--------------------------- man/reexports.Rd | 4 +++- 3 files changed, 42 insertions(+), 35 deletions(-) diff --git a/R/reexports.R b/R/reexports.R index 29820e40b..8c6cec545 100644 --- a/R/reexports.R +++ b/R/reexports.R @@ -145,6 +145,18 @@ NULL # Functions and Generics #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#' @importFrom SeuratObject %||% +#' @rdname reexports +#' @export +#' +SeuratObject::`%||%` + +#' @importFrom SeuratObject %iff% +#' @rdname reexports +#' @export +#' +SeuratObject::`%iff%` + #' @importFrom SeuratObject AddMetaData #' @rdname reexports #' @export diff --git a/R/utilities.R b/R/utilities.R index 6ae270517..7a6771ace 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -1570,40 +1570,33 @@ as.data.frame.Matrix <- function( # Internal #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Set a default value if an object is null -# -# @param lhs An object to set if it's null -# @param rhs The value to provide if x is null -# -# @return rhs if lhs is null, else lhs -# -# @author Hadley Wickham -# @references https://adv-r.hadley.nz/functions.html#missing-arguments -# -`%||%` <- function(lhs, rhs) { - if (!is.null(x = lhs)) { - return(lhs) - } else { - return(rhs) - } -} - -# Set a default value if an object is NOT null -# -# @param lhs An object to set if it's NOT null -# @param rhs The value to provide if x is NOT null -# -# @return lhs if lhs is null, else rhs -# -# @author Hadley Wickham -# @references https://adv-r.hadley.nz/functions.html#missing-arguments -# -`%iff%` <- function(lhs, rhs) { - if (!is.null(x = lhs)) { - return(rhs) - } else { - return(lhs) - } +#' Create Abbreviations +#' +#' @param x A character vector +#' @param digits Include digits in the abbreviation +#' +#' @return Abbreviated versions of \code{x} +#' +#' @keywords internal +#' +#' @examples +#' .Abbrv(c('HelloWorld, 'LetsGo3', 'tomato')) +#' .Abbrv(c('HelloWorld, 'LetsGo3', 'tomato'), digits = FALSE) +#' .Abbrv('Wow3', digits = FALSE) +#' +#' @noRd +#' +.Abbrv <- function(x, digits = TRUE) { + pattern <- ifelse(test = isTRUE(x = digits), yes = '[A-Z0-9]+', no = '[A-Z]+') + y <- vapply( + X = regmatches(x = x, m = gregexec(pattern = pattern, text = x)), + FUN = paste, + FUN.VALUE = character(length = 1L), + collapse = '' + ) + na <- nchar(x = y) <= 1L + y[na] <- x[na] + return(tolower(x = y)) } # Generate chunk points diff --git a/man/reexports.Rd b/man/reexports.Rd index 2c9dbf1e5..ed66b98a1 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -3,6 +3,8 @@ \docType{import} \name{reexports} \alias{reexports} +\alias{\%||\%} +\alias{\%iff\%} \alias{AddMetaData} \alias{as.Graph} \alias{as.Neighbor} @@ -68,6 +70,6 @@ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ - \item{SeuratObject}{\code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject]{Index}}, \code{\link[SeuratObject:Index]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}} + \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject]{Index}}, \code{\link[SeuratObject:Index]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}} }} From fa558e0d772f858866c568ab2a3d7e1842ead8e3 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 13 Apr 2022 19:00:10 -0400 Subject: [PATCH 129/979] Add new generics for LeverageScore and VST --- R/generics.R | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/R/generics.R b/R/generics.R index 13e9a43ff..f0c1099ca 100644 --- a/R/generics.R +++ b/R/generics.R @@ -300,6 +300,41 @@ IntegrateEmbeddings <- function(anchorset, ...) { UseMethod(generic = "IntegrateEmbeddings", object = anchorset) } +#' @export +#' +LeverageScore <- function(object, ...) { + UseMethod(generic = 'LeverageScore', object = object) +} + +#' Normalize Raw Data +#' +#' @param data Matrix with the raw count data +#' @param scale.factor Scale the data; default is \code{1e4} +#' @param verbose Print progress +#' +#' @return A matrix with the normalized and log-transformed data +#' +#' @template param-dotsm +#' +#' @export +#' @concept preprocessing +#' +#' @examples +#' mat <- matrix(data = rbinom(n = 25, size = 5, prob = 0.2), nrow = 5) +#' mat +#' mat_norm <- LogNormalize(data = mat) +#' mat_norm +#' +LogNormalize <- function( + data, + scale.factor = 1e4, + # margin = 2L, + verbose = TRUE, + ... +) { + UseMethod(generic = 'LogNormalize', object = data) +} + #' Metric for evaluating mapping success #' #' This metric was designed to help identify query cells that aren't well @@ -604,3 +639,46 @@ SCTResults <- function(object, ...) { "SCTResults<-" <- function(object, ..., value) { UseMethod(generic = 'SCTResults<-', object = object) } + +#' Variance Stabilizing Transformation +#' +#' Apply variance stabilizing transformation for selection of variable features +#' +#' @inheritParams stats::loess +#' @param data A matrix-like object +#' @param margin Unused +#' @param nselect Number of of features to select +#' @param clip Upper bound for values post-standardization; defaults to the +#' square root of the number of cells +#' @param verbose ... +#' +#' @template param-dotsm +#' +#' @return A data frame with the following columns: +#' \itemize{ +#' \item \dQuote{\code{mean}}: ... +#' \item \dQuote{\code{variance}}: ... +#' \item \dQuote{\code{variance.expected}}: ... +#' \item \dQuote{\code{variance.standardized}}: ... +#' \item \dQuote{\code{variable}}: \code{TRUE} if the feature selected as +#' variable, otherwise \code{FALSE} +#' \item \dQuote{\code{rank}}: If the feature is selected as variable, then how +#' it compares to other variable features with lower ranks as more variable; +#' otherwise, \code{NA} +#' } +#' +#' @rdname VST +#' @export VST +#' +#' @keywords internal +#' +VST <- function( + data, + margin = 1L, + nselect = 2000L, + span = 0.3, + clip = NULL, + ... +) { + UseMethod(generic = 'VST', object = data) +} From 90194dfd3003932c86b2bb74b6488a44e8c3a309 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 13 Apr 2022 19:00:42 -0400 Subject: [PATCH 130/979] Add methods for LogNormalize Various other updates --- R/preprocessing.R | 173 +++++++++++++++++----------------------------- 1 file changed, 63 insertions(+), 110 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index 09485de14..5142959d9 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -468,11 +468,8 @@ GetResidual <- function( #' @param slice Name for the stored image of the tissue slice #' @param filter.matrix Only keep spots that have been determined to be over #' tissue -#' @param to.upper Converts all feature names to upper case. This can provide an -#' approximate conversion of mouse to human gene names which can be useful in an -#' explorative analysis. For cross-species comparisons, orthologous genes should -#' be identified across species and used instead. -#' @param image An object of class VisiumV1. Typically, an output from \code{\link{Read10X_Image}} +#' @param to.upper Converts all feature names to upper case. Can be useful when +#' analyses require comparisons between human and mouse gene names for example. #' @param ... Arguments passed to \code{\link{Read10X_h5}} #' #' @return A \code{Seurat} object @@ -498,7 +495,6 @@ Load10X_Spatial <- function( slice = 'slice1', filter.matrix = TRUE, to.upper = FALSE, - image = NULL, ... ) { if (length(x = data.dir) > 1) { @@ -510,15 +506,10 @@ Load10X_Spatial <- function( rownames(x = data) <- toupper(x = rownames(x = data)) } object <- CreateSeuratObject(counts = data, assay = assay) - if (is.null(x = image)) { - image <- Read10X_Image( - image.dir = file.path(data.dir, 'spatial'), - filter.matrix = filter.matrix - ) - } else { - if (!inherits(x = image, what = "VisiumV1")) - stop("Image must be an object of class 'VisiumV1'.") - } + image <- Read10X_Image( + image.dir = file.path(data.dir, 'spatial'), + filter.matrix = filter.matrix + ) image <- image[Cells(x = object)] DefaultAssay(object = image) <- assay object[[slice]] <- image @@ -596,45 +587,6 @@ LoadSTARmap <- function( return(starmap) } -#' Normalize raw data -#' -#' Normalize count data per cell and transform to log scale -#' -#' @param data Matrix with the raw count data -#' @param scale.factor Scale the data. Default is 1e4 -#' @param verbose Print progress -#' -#' @return Returns a matrix with the normalize and log transformed data -#' -#' @import Matrix -#' @importFrom methods as -#' -#' @export -#' @concept preprocessing -#' -#' @examples -#' mat <- matrix(data = rbinom(n = 25, size = 5, prob = 0.2), nrow = 5) -#' mat -#' mat_norm <- LogNormalize(data = mat) -#' mat_norm -#' -LogNormalize <- function(data, scale.factor = 1e4, verbose = TRUE) { - if (is.data.frame(x = data)) { - data <- as.matrix(x = data) - } - if (!inherits(x = data, what = 'dgCMatrix')) { - data <- as(object = data, Class = "dgCMatrix") - } - # call Rcpp function to normalize - if (verbose) { - cat("Performing log-normalization\n", file = stderr()) - } - norm.data <- LogNorm(data, scale_factor = scale.factor, display_progress = verbose) - colnames(x = norm.data) <- colnames(x = data) - rownames(x = norm.data) <- rownames(x = data) - return(norm.data) -} - #' Demultiplex samples based on classification method from MULTI-seq (McGinnis et al., bioRxiv 2018) #' #' Identify singlets, doublets and negative cells from multiplexing experiments. Annotate singlets by tags. @@ -992,8 +944,7 @@ Read10X_h5 <- function(filename, use.names = TRUE, unique.features = TRUE) { #' Load a 10X Genomics Visium Image #' #' @param image.dir Path to directory with 10X Genomics visium image data; -#' should include files \code{tissue_lowres_image.png}, -#' @param image.name The file name of the image. Defaults to tissue_lowres_image.png. +#' should include files \code{tissue_lowres_iamge.png}, #' \code{scalefactors_json.json} and \code{tissue_positions_list.csv} #' @param filter.matrix Filter spot/feature matrix to only include spots that #' have been determined to be over tissue. @@ -1009,8 +960,8 @@ Read10X_h5 <- function(filename, use.names = TRUE, unique.features = TRUE) { #' @export #' @concept preprocessing #' -Read10X_Image <- function(image.dir, image.name = "tissue_lowres_image.png", filter.matrix = TRUE, ...) { - image <- readPNG(source = file.path(image.dir, image.name)) +Read10X_Image <- function(image.dir, filter.matrix = TRUE, ...) { + image <- readPNG(source = file.path(image.dir, 'tissue_lowres_image.png')) scale.factors <- fromJSON(txt = file.path(image.dir, 'scalefactors_json.json')) tissue.positions <- read.csv( file = file.path(image.dir, 'tissue_positions_list.csv'), @@ -1047,11 +998,8 @@ Read10X_Image <- function(image.dir, image.name = "tissue_lowres_image.png", fil #' @param features Name or remote URL of the features/genes file #' @param cell.column Specify which column of cells file to use for cell names; default is 1 #' @param feature.column Specify which column of features files to use for feature/gene names; default is 2 -#' @param cell.sep Specify the delimiter in the cell name file -#' @param feature.sep Specify the delimiter in the feature name file #' @param skip.cell Number of lines to skip in the cells file before beginning to read cell names #' @param skip.feature Number of lines to skip in the features file before beginning to gene names -#' @param mtx.transpose Transpose the matrix after reading in #' @param unique.features Make feature names unique (default TRUE) #' @param strip.suffix Remove trailing "-1" if present in all cell barcodes. #' @@ -1090,11 +1038,8 @@ ReadMtx <- function( features, cell.column = 1, feature.column = 2, - cell.sep = "\t", - feature.sep = "\t", skip.cell = 0, skip.feature = 0, - mtx.transpose = FALSE, unique.features = TRUE, strip.suffix = FALSE ) { @@ -1104,11 +1049,11 @@ ReadMtx <- function( "feature list" = features ) for (i in seq_along(along.with = all.files)) { - uri <- normalizePath(all.files[[i]], mustWork = FALSE) + uri <- all.files[[i]] err <- paste("Cannot find", names(x = all.files)[i], "at", uri) uri <- build_url(url = parse_url(url = uri)) if (grepl(pattern = '^:///', x = uri)) { - uri <- gsub(pattern = '^://', replacement = '', x = uri) + uri <- gsub(pattern = '^:///', replacement = '', x = uri) if (!file.exists(uri)) { stop(err, call. = FALSE) } @@ -1126,14 +1071,14 @@ ReadMtx <- function( cell.barcodes <- read.table( file = all.files[['barcode list']], header = FALSE, - sep = cell.sep, + sep = '\t', row.names = NULL, skip = skip.cell ) feature.names <- read.table( file = all.files[['feature list']], header = FALSE, - sep = feature.sep, + sep = '\t', row.names = NULL, skip = skip.feature ) @@ -1204,9 +1149,6 @@ ReadMtx <- function( feature.names <- make.unique(names = feature.names) } data <- readMM(file = all.files[['expression matrix']]) - if (mtx.transpose) { - data <- t(x = data) - } if (length(x = cell.names) != ncol(x = data)) { stop( "Matrix has ", @@ -1911,7 +1853,6 @@ SubsetByBarcodeInflections <- function(object) { #' #' @rdname FindVariableFeatures #' @concept preprocessing -#' @method FindVariableFeatures V3Matrix #' @export #' FindVariableFeatures.V3Matrix <- function( @@ -1977,6 +1918,7 @@ FindVariableFeatures.V3Matrix <- function( EXPR = binning.method, 'equal_width' = num.bin, 'equal_frequency' = c( + -1, quantile( x = feature.mean[feature.mean > 0], probs = seq.int(from = 0, to = 1, length.out = num.bin) @@ -1984,8 +1926,7 @@ FindVariableFeatures.V3Matrix <- function( ), stop("Unknown binning method: ", binning.method) ) - data.x.bin <- cut(x = feature.mean, breaks = data.x.breaks, - include.lowest = TRUE) + data.x.bin <- cut(x = feature.mean, breaks = data.x.breaks) names(x = data.x.bin) <- names(x = feature.mean) mean.y <- tapply(X = feature.dispersion, INDEX = data.x.bin, FUN = mean) sd.y <- tapply(X = feature.dispersion, INDEX = data.x.bin, FUN = sd) @@ -2074,7 +2015,7 @@ FindVariableFeatures.Assay <- function( }, 'dispersion' = head(x = rownames(x = hvf.info), n = nfeatures), 'vst' = head(x = rownames(x = hvf.info), n = nfeatures), - stop("Unknown selection method: ", selection.method) + stop("Unkown selection method: ", selection.method) ) VariableFeatures(object = object) <- top.features vf.name <- ifelse( @@ -2253,35 +2194,8 @@ FindSpatiallyVariableFeatures.Assay <- function( features <- features[! features %in% features.computed] } data <- GetAssayData(object = object, slot = slot) - missing.features <- which(x = ! features %in% rownames(x = data)) - if (length(x = missing.features) > 0) { - remaining.features <- length(x = features) - length(x = missing.features) - if (length(x = remaining.features) > 0) { - warning("Not all requested features are present in the requested slot (", - slot, "). Removing ", length(x = missing.features), - " missing features and continuing with ", remaining.features, - " remaining features.", immediate. = TRUE, call. = FALSE) - features <- features[features %in% rownames(x = data)] - } else { - stop("None of the requested features are present in the requested slot (", - slot, ").", call. = FALSE) - } - } - image.cells <- rownames(x = spatial.location) - data <- as.matrix(x = data[features, image.cells, drop = FALSE]) - rv <- RowVar(x = data) - rv.small <- which(x = rv < 1e-16) - rv.remove <- c() - if (length(x = rv.small) > 0) { - for (i in rv.small) { - if (var(x = data[i, ]) == 0) { - rv.remove <- c(rv.remove, i) - } - } - } - if (length(x = rv.remove) > 0) { - data <- data[-c(rv.remove), , drop = FALSE] - } + data <- as.matrix(x = data[features, ]) + data <- data[RowVar(x = data) > 0, ] if (nrow(x = data) != 0) { svf.info <- FindSpatiallyVariableFeatures( object = data, @@ -2362,6 +2276,50 @@ FindSpatiallyVariableFeatures.Seurat <- function( object <- LogSeuratCommand(object = object) } +#' @rdname LogNormalize +#' @method LogNormalize data.frame +#' @export +#' +LogNormalize.data.frame <- function( + data, + scale.factor = 1e4, + verbose = TRUE, + ... +) { + return(LogNormalize( + data = as.matrix(x = data), + scale.factor = scale.factor, + verbose = verbose, + ... + )) +} + +#' @rdname LogNormalize +#' @method LogNormalize V3Matrix +#' @export +#' +LogNormalize.V3Matrix <- function( + data, + scale.factor = 1e4, + verbose = TRUE, + ... +) { + # if (is.data.frame(x = data)) { + # data <- as.matrix(x = data) + # } + if (!inherits(x = data, what = 'dgCMatrix')) { + data <- as(object = data, Class = "dgCMatrix") + } + # call Rcpp function to normalize + if (verbose) { + cat("Performing log-normalization\n", file = stderr()) + } + norm.data <- LogNorm(data, scale_factor = scale.factor, display_progress = verbose) + colnames(x = norm.data) <- colnames(x = data) + rownames(x = norm.data) <- rownames(x = data) + return(norm.data) +} + #' @importFrom future.apply future_lapply #' @importFrom future nbrOfWorkers #' @@ -2383,8 +2341,6 @@ FindSpatiallyVariableFeatures.Seurat <- function( #' #' @rdname NormalizeData #' @concept preprocessing -#' -#' @method NormalizeData V3Matrix #' @export #' NormalizeData.V3Matrix <- function( @@ -2483,7 +2439,7 @@ NormalizeData.V3Matrix <- function( scale.factor = scale.factor, verbose = verbose ), - stop("Unknown normalization method: ", normalization.method) + stop("Unkown normalization method: ", normalization.method) ) } return(normalized.data) @@ -3067,9 +3023,6 @@ ClassifyCells <- function(data, q) { # # ComputeRMetric <- function(mv, r.metric = 5) { - if (!inherits(x = mv, what = "list")) { - mv <- list(mv) - } r.metric.results <- unlist(x = lapply( X = mv, FUN = function(x) { From 2f14b1384d6be6484ddddd0b849d841391ad1a5c Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 13 Apr 2022 19:00:55 -0400 Subject: [PATCH 131/979] Enable preprocessing workflow for v5 --- R/preprocessing5.R | 363 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 280 insertions(+), 83 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 604c3d14c..4dd4d6882 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -12,12 +12,6 @@ hvf.methods <- list() # Generics #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -#' @export -#' -LogNormalize5 <- function(data, scale.factor = 1e4, margin = 2L, verbose = TRUE) { - UseMethod(generic = 'LogNormalize5', object = data) -} - #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Functions #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -26,42 +20,54 @@ LogNormalize5 <- function(data, scale.factor = 1e4, margin = 2L, verbose = TRUE) # Methods for Seurat-defined generics #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#' @importFrom rlang is_quosure quo_get_env quo_get_expr #' @method FindVariableFeatures default #' @export #' FindVariableFeatures.default <- function( object, - method = 'vst', + method = VST, nselect = 2000L, - fmargin = 1L, verbose = TRUE, ... ) { + if (is_quosure(x = method)) { + method <- eval( + expr = quo_get_expr(quo = method), + envir = quo_get_env(quo = method) + ) + } if (is.character(x = method)) { - method <- method[1L] - method <- match.arg(arg = method, choices = names(x = hvf.methods)) - method <- hvf.methods[[method]] + method <- get(x = method) } if (!is.function(x = method)) { - stop("'method' must be a function for calculating highly variable features") + stop( + "'method' must be a function for calculating highly variable features", + call. = FALSE + ) } return(method( data = object, - fmargin = fmargin, nselect = nselect, verbose = verbose, ... )) } -#' @importFrom SeuratObject DefaultLayer Features Key +g <- function(x, method = VST) { + method <- enquo(arg = method) + FindVariableFeatures(object = x, method = method, layer = 'counts') +} + +#' @importFrom rlang as_name enquo is_quosure +#' @importFrom SeuratObject DefaultLayer Features Key Layers #' #' @method FindVariableFeatures StdAssay #' @export #' FindVariableFeatures.StdAssay <- function( object, - method = 'vst', + method = VST, nselect = 2000L, layer = NULL, span = 0.3, @@ -70,61 +76,122 @@ FindVariableFeatures.StdAssay <- function( verbose = TRUE, ... ) { - layer <- layer %||% DefaultLayer(object = object) - data <- LayerData(object = object, layer = layer, fast = TRUE) - f <- if (inherits(x = data, what = 'V3Matrix')) { - FindVariableFeatures.default - } else { - FindVariableFeatures + layer <- unique(x = layer) %||% DefaultLayer(object = object) + layer <- Layers(object = object, search = layer) + if (is.null(x = key)) { + false <- function(...) { + return(FALSE) + } + key <- if (tryCatch(expr = is_quosure(x = method), error = false)) { + method + } else if (is.function(x = method)) { + substitute(expr = method) + } else if (is.call(x = enquo(arg = method))) { + enquo(arg = method) + } else if (is.character(x = method)) { + method + } else { + parse(text = method) + } + key <- .Abbrv(x = as_name(x = key)) } - hvf.info <- f( - object = data, + warn.var <- warn.rank <- TRUE + for (i in seq_along(along.with = layer)) { + if (isTRUE(x = verbose)) { + message("Finding variable features for layer ", layer[i]) + } + data <- LayerData(object = object, layer = layer[i], fast = TRUE) + f <- if (inherits(x = data, what = 'V3Matrix')) { + FindVariableFeatures.default + } else { + FindVariableFeatures + } + hvf.info <- f( + object = data, + method = method, + nselect = nselect, + span = span, + clip = clip, + verbose = verbose, + ... + ) + if (warn.var) { + if (!'variable' %in% colnames(x = hvf.info) || !is.logical(x = hvf.info$variable)) { + warning( + "No variable feature indication in HVF info for method ", + key, + ", `VariableFeatures` will not work", + call. = FALSE, + immediate. = TRUE + ) + warn.var <- FALSE + } + } else if (warn.rank && !'rank' %in% colnames(x = hvf.info)) { + warning( + "No variable feature rank in HVF info for method ", + key, + ", `VariableFeatures` will return variable features in assay order", + call. = FALSE, + immediate. = TRUE + ) + warn.rank <- FALSE + } + colnames(x = hvf.info) <- paste( + 'vf', + key, + layer[i], + colnames(x = hvf.info), + sep = '_' + ) + rownames(x = hvf.info) <- Features(x = object, layer = layer[i]) + object[[colnames(x = hvf.info)]] <- hvf.info + } + return(object) +} + +#' @importFrom rlang enquo +#' @method FindVariableFeatures Seurat5 +#' @export +#' +FindVariableFeatures.Seurat5 <- function( + object, + assay = NULL, + method = VST, + nselect = 2000L, + layer = NULL, + span = 0.3, + clip = NULL, + key = NULL, + verbose = TRUE, + ... +) { + assay <- assay[1L] %||% DefaultAssay(object = object) + assay <- match.arg(arg = assay, choices = Assays(object = object)) + method <- enquo(arg = method) + object[[assay]] <- FindVariableFeatures( + object = object[[assay]], method = method, nselect = nselect, - fmargin = .MARGIN(object = object, type = 'features'), + layer = layer, span = span, clip = clip, + key = key, verbose = verbose, ... ) - rownames(x = hvf.info) <- Features(x = object, layer = layer) - pattern <- '^[[:alnum:]]+_' - if (!any(grepl(pattern = pattern, x = colnames(x = hvf.info)))) { - key <- key %||% if (is.character(x = method)) { - if (grepl(pattern = '\\.', x = method)) { - x <- vapply( - X = unlist(x = strsplit(x = method, split = '\\.')), - FUN = substr, - FUN.VALUE = character(length = 1L), - start = 1L, - stop = 1L, - USE.NAMES = FALSE - ) - paste(x, collapse = '') - } else { - method - } - } else { - SeuratObject:::RandomKey() - } - key <- suppressWarnings(expr = Key(object = key)) - colnames(x = hvf.info) <- paste0(key, colnames(x = hvf.info)) - } #else if (!all(grepl(pattern = pattern, x = colnames(x = hvf.info)))) { - # '' - # } - object[[]] <- hvf.info return(object) } -#' -#' @method LogNormalize5 default +#' @rdname LogNormalize +#' @method LogNormalize default #' @export #' -LogNormalize5.default <- function( +LogNormalize.default <- function( data, scale.factor = 1e4, margin = 2L, - verbose = TRUE + verbose = TRUE, + ... ) { margin <- SeuratObject:::.CheckFmargin(fmargin = margin) ncells <- dim(x = data)[margin] @@ -164,7 +231,6 @@ NormalizeData.default <- function( scale.factor = 1e4, cmargin = 2L, margin = 1L, - block.size = NULL, verbose = TRUE, ... ) { @@ -181,7 +247,7 @@ NormalizeData.default <- function( verbose = verbose ) } else { - LogNormalize5( + LogNormalize( data = object, scale.factor = scale.factor, margin = cmargin, @@ -204,46 +270,173 @@ NormalizeData.StdAssay <- function( method = 'LogNormalize', scale.factor = 1e4, margin = 1L, - block.size = NULL, layer = NULL, - save = 'normalized', + save = 'data', default = TRUE, verbose = TRUE, ... ) { - layer <- layer %||% DefaultLayer(object = object) + olayer <- layer <- unique(x = layer) %||% DefaultLayer(object = object) + layer <- Layers(object = object, search = layer) if (save == DefaultLayer(object = object)) { default <- FALSE } - data <- LayerData(object = object, layer = layer, fast = TRUE) - f <- if (inherits(x = data, what = 'V3Matrix')) { - NormalizeData.default - } else { - NormalizeData + if (length(x = save) != length(x = layer)) { + save <- make.unique(names = gsub( + pattern = olayer, + replacement = save, + x = layer + )) + } + for (i in seq_along(along.with = layer)) { + l <- layer[i] + if (isTRUE(x = verbose)) { + message("Normalizing layer: ", l) + } + LayerData( + object = object, + layer = save[i], + features = Features(x = object, layer = l), + cells = Cells(x = object, layer = l) + ) <- NormalizeData( + object = LayerData(object = object, layer = l, fast = TRUE), + method = method, + scale.factor = scale.factor, + margin = margin, + verbose = verbose, + ... + ) } - data <- f( - object = LayerData(object = object, layer = layer, fast = TRUE), + if (isTRUE(x = default)) { + DefaultLayer(object = object) <- save[1L] + } + gc(verbose = FALSE) + return(object) +} + +#' @importFrom SeuratObject DefaultAssay +#' +#' @method NormalizeData Seurat5 +#' @export +#' +NormalizeData.Seurat5 <- function( + object, + assay = NULL, + method = 'LogNormalize', + scale.factor = 1e4, + margin = 1L, + layer = NULL, + save = 'data', + default = TRUE, + verbose = TRUE, + ... +) { + assay <- assay[1L] %||% DefaultAssay(object = object) + assay <- match.arg(arg = assay, choices = Assays(object = object)) + object[[assay]] <- NormalizeData( + object = object[[assay]], method = method, - scale.factor = 1e4, - cmargin = .MARGIN(object = object, type = 'cells'), + scale.factor = scale.factor, margin = margin, - block.size = block.size, + layer = layer, + save = save, + default = default, verbose = verbose, ... ) - LayerData( - object = object, - layer = save, - features = Features(x = object, layer = layer), - cells = Cells(x = object, layer = layer) - ) <- data - if (isTRUE(x = default)) { - DefaultLayer(object = object) <- save - } - gc(verbose = FALSE) return(object) } +#' @rdname VST +#' @method VST default +#' @export +#' +VST.default <- function( + data, + margin = 1L, + nselect = 2000L, + span = 0.3, + clip = NULL, + ... +) { + .NotYetImplemented() +} + +#' @importFrom stats loess +#' @importFrom Matrix rowMeans +#' +#' @rdname VST +#' @method VST dgCMatrix +#' @export +#' +VST.dgCMatrix <- function( + data, + margin = 1L, + nselect = 2000L, + span = 0.3, + clip = NULL, + verbose = TRUE, + ... +) { + nfeatures <- nrow(x = data) + hvf.info <- SeuratObject:::EmptyDF(n = nfeatures) + # Calculate feature menas + hvf.info$mean <- Matrix::rowMeans(x = data) + # Calculate feature variance + hvf.info$variance <- SparseRowVar2( + mat = data, + mu = hvf.info$mean, + display_progress = verbose + ) + hvf.info$variance.expected <- 0L + not.const <- hvf.info$variance > 0 + fit <- loess( + formula = log10(x = variance) ~ log10(x = mean), + data = hvf.info[not.const, , drop = TRUE], + span = span + ) + hvf.info$variance.expected[not.const] <- 10 ^ fit$fitted + hvf.info$variance.standardized <- SparseRowVarStd( + mat = data, + mu = hvf.info$mean, + sd = sqrt(x = hvf.info$variance.expected), + vmax = clip %||% sqrt(x = ncol(x = data)), + display_progress = TRUE + ) + # Set variable features + hvf.info$variable <- FALSE + hvf.info$rank <- NA + vf <- head( + x = order(hvf.info$variance.standardized, decreasing = TRUE), + n = nselect + ) + hvf.info$variable[vf] <- TRUE + hvf.info$rank[vf] <- seq_along(along.with = vf) + return(hvf.info) +} + +#' @rdname VST +#' @method VST matrix +#' @export +#' +VST.matrix <- function( + data, + margin = 1L, + nselect = 2000L, + span = 0.3, + clip = NULL, + ... +) { + return(VST( + data = as.sparse(x = data), + margin = margin, + nselect = nselect, + span = span, + clip = clip, + ... + )) +} + #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Methods for R-defined generics #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -459,18 +652,19 @@ NormalizeData.StdAssay <- function( #' @inheritParams stats::loess #' @param data A matrix #' @param fmargin Feature margin -#' @param nfeatures Number of features to select +#' @param nselect Number of features to select #' @param clip After standardization values larger than \code{clip} will be set #' to \code{clip}; default is \code{NULL} which sets this value to the square #' root of the number of cells #' #' @importFrom stats loess +#' @importFrom Matrix rowMeans #' #' @keywords internal #' #' @noRd #' -VST <- function( +.VST <- function( data, fmargin = 1L, nselect = 2000L, @@ -481,6 +675,8 @@ VST <- function( ) { fmargin <- SeuratObject:::.CheckFmargin(fmargin = fmargin) nfeatures <- dim(x = data)[fmargin] + # TODO: Support transposed matrices + # nfeatures <- nrow(x = data) if (IsSparse(x = data)) { mean.func <- .SparseMean var.func <- .SparseFeatureVar @@ -489,7 +685,8 @@ VST <- function( var.func <- .FeatureVar } hvf.info <- SeuratObject:::EmptyDF(n = nfeatures) - hvf.info$mean <- mean.func(data = data, margin = 1L) + # hvf.info$mean <- mean.func(data = data, margin = fmargin) + hvf.info$mean <- rowMeans(x = data) hvf.info$variance <- var.func( data = data, mu = hvf.info$mean, @@ -526,7 +723,7 @@ VST <- function( return(hvf.info) } -hvf.methods$vst <- VST +# hvf.methods$vst <- VST #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # S4 Methods From 3c5d058e2fc02764b2f4fa0c30dfdedc343c8527 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 13 Apr 2022 19:01:09 -0400 Subject: [PATCH 132/979] Add LeverageScore and associated sketching functions --- R/sketching.R | 290 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 290 insertions(+) create mode 100644 R/sketching.R diff --git a/R/sketching.R b/R/sketching.R new file mode 100644 index 000000000..6ff65319f --- /dev/null +++ b/R/sketching.R @@ -0,0 +1,290 @@ +#' @include zzz.R +#' @include generics.R +#' @importFrom rlang enquo is_quosure quo_get_env quo_get_expr +#' +NULL + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Generics +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Functions +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Methods for Seurat-defined generics +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#' @importFrom Matrix qrR +#' +#' @method LeverageScore default +#' @export +#' +LeverageScore.default <- function( + object, + nsketch = 5000L, + ndims = NULL, + method = CountSketch, + eps = 0.5, + seed = 123L, + verbose = TRUE, + ... +) { + # Check the dimensions of the object, nsketch, and ndims + ncells <- ncol(x = object) + if (nrow(x = object) > 5000L) { + stop("too slow", call. = FALSE) + } else if (nrow(x = object) > (ncells / 1.1)) { + stop("too square", call. = FALSE) + } + ndims <- ndims %||% ncells + if (nsketch < (1.1 * nrow(x = object))) { + nsketch <- 1.1 * nrow(x = object) + warning( + "'nsketch' is too close to the number of features, setting to ", + round(x = nsketch, digits = 2L), + call. = FALSE, + immediate. = TRUE + ) + } + nsketch <- min(nsketch, ndims) + # Check the method + if (is_quosure(x = method)) { + method <- eval( + expr = quo_get_expr(quo = method), + envir = quo_get_env(quo = method) + ) + } + if (is.character(x = method)) { + method <- get(x = method) + } + stopifnot(is.function(x = method)) + # Run the sketching + if (isTRUE(x = verbose)) { + message("sampling ", nsketch, " cells") + } + S <- method(nsketch = nsketch, ncells = ncells, seed = seed, ...) + object <- t(x = object) + if (isTRUE(x = verbose)) { + message("Performing QR decomposition") + } + sa <- S %*% object + qr.sa <- base::qr(x = sa) + R <- if (inherits(x = qr.sa, what = 'sparseQR')) { + qrR(qr = qr.sa) + } else { + base::qr.R(qr = qr.sa) + } + R.inv <- as.sparse(x = backsolve(r = R, x = diag(x = ncol(x = R)))) + if (isTRUE(x = verbose)) { + message("Performing random projection") + } + JL <- as.sparse(x = JLEmbed( + nrow = ncol(x = R.inv), + ncol = ndims, + eps = eps, + seed = seed + )) + Z <- object %*% (R.inv %*% JL) + return(rowSums(x = Z ^ 2)) +} + +#' @method LeverageScore StdAssay +#' @export +#' +LeverageScore.StdAssay <- function( + object, + features = NULL, + nsketch = 5000L, + ndims = NULL, + method = CountSketch, + layer = 'data', + eps = 0.5, + seed = 123L, + verbose = TRUE, + ... +) { + layer <- unique(x = layer) %||% DefaultLayer(object = object) + layer <- Layers(object = object, search = layer) + if (!is_quosure(x = method)) { + method <- enquo(arg = method) + } + scores <- SeuratObject:::EmptyDF(n = ncol(x = object)) + row.names(x = scores) <- colnames(x = object) + scores[, layer] <- NA_real_ + for (i in seq_along(along.with = layer)) { + l <- layer[i] + if (isTRUE(x = verbose)) { + message("Running LeverageScore for layer ", l) + } + scores[Cells(x = object, layer = l), l] <- LeverageScore( + object = LayerData( + object = object, + layer = l, + features = features %||% VariableFeatures(object = object, layer = l), + fast = TRUE + ), + nsketch = nsketch, + ndims = ndims %||% ncol(x = object), + method = method, + eps = eps, + seed = seed, + verbose = verbose, + ... + ) + } + names(x = scores) <- paste0('leverage_score_', names(x = scores)) + return(scores) +} + +#' @method LeverageScore Seurat5 +#' @export +#' +LeverageScore.Seurat5 <- function( + object, + assay = NULL, + features = NULL, + nsketch = 5000L, + ndims = NULL, + method = CountSketch, + layer = 'data', + eps = 0.5, + seed = 123L, + verbose = TRUE, + ... +) { + assay <- assay[1L] %||% DefaultAssay(object = object) + assay <- match.arg(arg = assay, choices = Assays(object = object)) + method <- enquo(arg = method) + scores <- LeverageScore( + object = object[[assay]], + features = features, + nsketch = nsketch, + ndims = ndims, + method = method, + layer = layer, + eps = eps, + seed = seed, + verbose = verbose, + ... + ) + names(x = scores) <- paste0("seurat_", names(x = scores)) + object[[]] <- scores + return(object) +} + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Methods for R-defined generics +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Internal +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#' Generate CountSketch random matrix +#' +#' @inheritParams base::set.seed +#' @param nsketch Number of sketching random cells +#' @param ncells Number of cells in the original data +#' @param ... Ignored +#' +#' @return ... +#' +#' @importFrom Matrix sparseMatrix +#' +#' @export +#' +#' @keywords internal +#' +#' @references Clarkson, KL. & Woodruff, DP. +#' Low-rank approximation and regression in input sparsity time. +#' Journal of the ACM (JACM). 2017 Jan 30;63(6):1-45. +#' \url{https://dl.acm.org/doi/abs/10.1145/3019134}; + +CountSketch <- function(nsketch, ncells, seed = NA_integer_, ...) { + if (!is.na(x = seed)) { + set.seed(seed = seed) + } + iv <- xv <- vector(mode = "numeric", length = ncells) + jv <- seq_len(length.out = ncells) + for (i in jv) { + iv[i] <- sample(x = seq_len(length.out = nsketch), size = 1L) + xv[i] <- sample(x = c(-1L, 1L), size = 1L) + } + return(sparseMatrix( + i = iv, + j = jv, + x = xv + )) +} + +#' Gaussian sketching +#' +#' @inheritParams CountSketch +#' +#' @return ... +#' +#' @export +#' +#' @keywords internal +#' +GaussianSketch <- function(nsketch, ncells, seed = NA_integer_, ...) { + if (!is.na(x = seed)) { + set.seed(seed = seed) + } + return(matrix( + data = rnorm(n = nsketch * ncells, mean = 0L, sd = 1 / (ncells ^ 2)), + nrow = nsketch, + ncol = ncells + )) +} + +#' Generate JL random projection embeddings +#' +#' @keywords internal +#' +#' @references Aghila G and Siddharth R (2020). +#' RandPro: Random Projection with Classification. R package version 0.2.2. +#' \url{https://CRAN.R-project.org/package=RandPro} +#' +#' @noRd +# +JLEmbed <- function(nrow, ncol, eps = 0.1, seed = NA_integer_, method = "li") { + if (!is.na(x = seed)) { + set.seed(seed = seed) + } + method <- method[1L] + method <- match.arg(arg = method) + if (!is.null(x = eps)) { + if (eps > 1 || eps <= 0) { + stop("'eps' must be 0 < eps <= 1") + } + ncol <- floor(x = 4 * log(x = ncol) / ((eps ^ 2) / 2 - (eps ^ 3 / 3))) + } + m <- switch( + EXPR = method, + "li" = { + s <- ceiling(x = sqrt(x = ncol)) + prob <- c( + 1 / (2 * s), + 1 - (1 / s), + 1 / (2 * s) + ) + matrix( + data = sample( + x = seq.int(from = -1L, to = 1L), + size = nrow * ncol, + replace = TRUE, + prob = prob + ), + nrow = nrow + ) + } + ) + return(m) +} + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# S4 Methods +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% From ccb7cfdd780ae228da4a72401b886f89d82115e9 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 13 Apr 2022 19:01:33 -0400 Subject: [PATCH 133/979] Update docs --- man/CountSketch.Rd | 31 +++++++++++++++ man/GaussianSketch.Rd | 25 ++++++++++++ man/Load10X_Spatial.Rd | 9 +---- man/LogNormalize.Rd | 24 +++++++++--- man/Read10X_Image.Rd | 11 +----- man/ReadMtx.Rd | 9 ----- man/VST.Rd | 60 +++++++++++++++++++++++++++++ man/roxygen/templates/param-dotsi.R | 1 + man/roxygen/templates/param-dotsm.R | 1 + 9 files changed, 140 insertions(+), 31 deletions(-) create mode 100644 man/CountSketch.Rd create mode 100644 man/GaussianSketch.Rd create mode 100644 man/VST.Rd create mode 100644 man/roxygen/templates/param-dotsi.R create mode 100644 man/roxygen/templates/param-dotsm.R diff --git a/man/CountSketch.Rd b/man/CountSketch.Rd new file mode 100644 index 000000000..b1735c148 --- /dev/null +++ b/man/CountSketch.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sketching.R +\name{CountSketch} +\alias{CountSketch} +\title{Generate CountSketch random matrix} +\usage{ +CountSketch(nsketch, ncells, seed = NA_integer_, ...) +} +\arguments{ +\item{nsketch}{Number of sketching random cells} + +\item{ncells}{Number of cells in the original data} + +\item{seed}{a single value, interpreted as an integer, or \code{NULL} + (see \sQuote{Details}).} + +\item{...}{Ignored} +} +\value{ +... +} +\description{ +Generate CountSketch random matrix +} +\references{ +Clarkson, KL. & Woodruff, DP. +Low-rank approximation and regression in input sparsity time. +Journal of the ACM (JACM). 2017 Jan 30;63(6):1-45. +\url{https://dl.acm.org/doi/abs/10.1145/3019134}; +} +\keyword{internal} diff --git a/man/GaussianSketch.Rd b/man/GaussianSketch.Rd new file mode 100644 index 000000000..ba26f7567 --- /dev/null +++ b/man/GaussianSketch.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sketching.R +\name{GaussianSketch} +\alias{GaussianSketch} +\title{Gaussian sketching} +\usage{ +GaussianSketch(nsketch, ncells, seed = NA_integer_, ...) +} +\arguments{ +\item{nsketch}{Number of sketching random cells} + +\item{ncells}{Number of cells in the original data} + +\item{seed}{a single value, interpreted as an integer, or \code{NULL} + (see \sQuote{Details}).} + +\item{...}{Ignored} +} +\value{ +... +} +\description{ +Gaussian sketching +} +\keyword{internal} diff --git a/man/Load10X_Spatial.Rd b/man/Load10X_Spatial.Rd index cfe5c63d6..84c8c0ec8 100644 --- a/man/Load10X_Spatial.Rd +++ b/man/Load10X_Spatial.Rd @@ -11,7 +11,6 @@ Load10X_Spatial( slice = "slice1", filter.matrix = TRUE, to.upper = FALSE, - image = NULL, ... ) } @@ -28,12 +27,8 @@ and the image data in a subdirectory called \code{spatial}} \item{filter.matrix}{Only keep spots that have been determined to be over tissue} -\item{to.upper}{Converts all feature names to upper case. This can provide an -approximate conversion of mouse to human gene names which can be useful in an -explorative analysis. For cross-species comparisons, orthologous genes should -be identified across species and used instead.} - -\item{image}{An object of class VisiumV1. Typically, an output from \code{\link{Read10X_Image}}} +\item{to.upper}{Converts all feature names to upper case. Can be useful when +analyses require comparisons between human and mouse gene names for example.} \item{...}{Arguments passed to \code{\link{Read10X_h5}}} } diff --git a/man/LogNormalize.Rd b/man/LogNormalize.Rd index 468b37a45..ab1917897 100644 --- a/man/LogNormalize.Rd +++ b/man/LogNormalize.Rd @@ -1,23 +1,35 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/preprocessing.R +% Please edit documentation in R/generics.R, R/preprocessing.R, +% R/preprocessing5.R \name{LogNormalize} \alias{LogNormalize} -\title{Normalize raw data} +\alias{LogNormalize.data.frame} +\alias{LogNormalize.V3Matrix} +\alias{LogNormalize.default} +\title{Normalize Raw Data} \usage{ -LogNormalize(data, scale.factor = 10000, verbose = TRUE) +LogNormalize(data, scale.factor = 10000, verbose = TRUE, ...) + +\method{LogNormalize}{data.frame}(data, scale.factor = 10000, verbose = TRUE, ...) + +\method{LogNormalize}{V3Matrix}(data, scale.factor = 10000, verbose = TRUE, ...) + +\method{LogNormalize}{default}(data, scale.factor = 10000, margin = 2L, verbose = TRUE, ...) } \arguments{ \item{data}{Matrix with the raw count data} -\item{scale.factor}{Scale the data. Default is 1e4} +\item{scale.factor}{Scale the data; default is \code{1e4}} \item{verbose}{Print progress} + +\item{...}{Arguments passed to other methods} } \value{ -Returns a matrix with the normalize and log transformed data +A matrix with the normalized and log-transformed data } \description{ -Normalize count data per cell and transform to log scale +Normalize Raw Data } \examples{ mat <- matrix(data = rbinom(n = 25, size = 5, prob = 0.2), nrow = 5) diff --git a/man/Read10X_Image.Rd b/man/Read10X_Image.Rd index a42388e80..99a9e38cc 100644 --- a/man/Read10X_Image.Rd +++ b/man/Read10X_Image.Rd @@ -4,18 +4,11 @@ \alias{Read10X_Image} \title{Load a 10X Genomics Visium Image} \usage{ -Read10X_Image( - image.dir, - image.name = "tissue_lowres_image.png", - filter.matrix = TRUE, - ... -) +Read10X_Image(image.dir, filter.matrix = TRUE, ...) } \arguments{ \item{image.dir}{Path to directory with 10X Genomics visium image data; -should include files \code{tissue_lowres_image.png},} - -\item{image.name}{The file name of the image. Defaults to tissue_lowres_image.png. +should include files \code{tissue_lowres_iamge.png}, \code{scalefactors_json.json} and \code{tissue_positions_list.csv}} \item{filter.matrix}{Filter spot/feature matrix to only include spots that diff --git a/man/ReadMtx.Rd b/man/ReadMtx.Rd index 2c78d722f..e24620851 100644 --- a/man/ReadMtx.Rd +++ b/man/ReadMtx.Rd @@ -10,11 +10,8 @@ ReadMtx( features, cell.column = 1, feature.column = 2, - cell.sep = "\\t", - feature.sep = "\\t", skip.cell = 0, skip.feature = 0, - mtx.transpose = FALSE, unique.features = TRUE, strip.suffix = FALSE ) @@ -30,16 +27,10 @@ ReadMtx( \item{feature.column}{Specify which column of features files to use for feature/gene names; default is 2} -\item{cell.sep}{Specify the delimiter in the cell name file} - -\item{feature.sep}{Specify the delimiter in the feature name file} - \item{skip.cell}{Number of lines to skip in the cells file before beginning to read cell names} \item{skip.feature}{Number of lines to skip in the features file before beginning to gene names} -\item{mtx.transpose}{Transpose the matrix after reading in} - \item{unique.features}{Make feature names unique (default TRUE)} \item{strip.suffix}{Remove trailing "-1" if present in all cell barcodes.} diff --git a/man/VST.Rd b/man/VST.Rd new file mode 100644 index 000000000..646e4e882 --- /dev/null +++ b/man/VST.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generics.R, R/preprocessing5.R +\name{VST} +\alias{VST} +\alias{VST.default} +\alias{VST.dgCMatrix} +\alias{VST.matrix} +\title{Variance Stabilizing Transformation} +\usage{ +VST(data, margin = 1L, nselect = 2000L, span = 0.3, clip = NULL, ...) + +\method{VST}{default}(data, margin = 1L, nselect = 2000L, span = 0.3, clip = NULL, ...) + +\method{VST}{dgCMatrix}( + data, + margin = 1L, + nselect = 2000L, + span = 0.3, + clip = NULL, + verbose = TRUE, + ... +) + +\method{VST}{matrix}(data, margin = 1L, nselect = 2000L, span = 0.3, clip = NULL, ...) +} +\arguments{ +\item{data}{A matrix-like object} + +\item{margin}{Unused} + +\item{nselect}{Number of of features to select} + +\item{span}{the parameter \eqn{\alpha} which controls the degree of + smoothing.} + +\item{clip}{Upper bound for values post-standardization; defaults to the +square root of the number of cells} + +\item{...}{Arguments passed to other methods} + +\item{verbose}{...} +} +\value{ +A data frame with the following columns: +\itemize{ + \item \dQuote{\code{mean}}: ... + \item \dQuote{\code{variance}}: ... + \item \dQuote{\code{variance.expected}}: ... + \item \dQuote{\code{variance.standardized}}: ... + \item \dQuote{\code{variable}}: \code{TRUE} if the feature selected as + variable, otherwise \code{FALSE} + \item \dQuote{\code{rank}}: If the feature is selected as variable, then how + it compares to other variable features with lower ranks as more variable; + otherwise, \code{NA} +} +} +\description{ +Apply variance stabilizing transformation for selection of variable features +} +\keyword{internal} diff --git a/man/roxygen/templates/param-dotsi.R b/man/roxygen/templates/param-dotsi.R new file mode 100644 index 000000000..4fd71cedb --- /dev/null +++ b/man/roxygen/templates/param-dotsi.R @@ -0,0 +1 @@ +#' @param ... Ignored diff --git a/man/roxygen/templates/param-dotsm.R b/man/roxygen/templates/param-dotsm.R new file mode 100644 index 000000000..17d5b6da6 --- /dev/null +++ b/man/roxygen/templates/param-dotsm.R @@ -0,0 +1 @@ +#' @param ... Arguments passed to other methods From 2a21ae2f32625ce278f64085228f4c92af15911d Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 13 Apr 2022 19:01:49 -0400 Subject: [PATCH 134/979] Update imports, exports, v5 version --- DESCRIPTION | 11 ++++++----- NAMESPACE | 28 ++++++++++++++++++++++++++-- 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 90741eff2..933130692 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.0.4.9003 -Date: 2021-09-29 +Version: 4.0.4.9004 +Date: 2022-04-13 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( @@ -67,7 +67,7 @@ Imports: scales, scattermore (>= 0.7), sctransform (>= 0.3.2), - SeuratObject (>= 4.9.9.9001), + SeuratObject (>= 4.9.9.9013), shiny, spatstat.core, spatstat.geom, @@ -94,10 +94,11 @@ Collate: 'objects.R' 'preprocessing.R' 'preprocessing5.R' + 'zzz.R' + 'sketching.R' 'tree.R' 'utilities.R' - 'zzz.R' -RoxygenNote: 7.1.1 +RoxygenNote: 7.1.2 Encoding: UTF-8 Suggests: ape, diff --git a/NAMESPACE b/NAMESPACE index d4c6af611..42dea5ffd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -28,6 +28,7 @@ S3method(FindSpatiallyVariableFeatures,default) S3method(FindVariableFeatures,Assay) S3method(FindVariableFeatures,SCTAssay) S3method(FindVariableFeatures,Seurat) +S3method(FindVariableFeatures,Seurat5) S3method(FindVariableFeatures,StdAssay) S3method(FindVariableFeatures,V3Matrix) S3method(FindVariableFeatures,default) @@ -45,11 +46,17 @@ S3method(GetTissueCoordinates,VisiumV1) S3method(HVFInfo,SCTAssay) S3method(IntegrateEmbeddings,IntegrationAnchorSet) S3method(IntegrateEmbeddings,TransferAnchorSet) -S3method(LogNormalize5,default) +S3method(LeverageScore,Seurat5) +S3method(LeverageScore,StdAssay) +S3method(LeverageScore,default) +S3method(LogNormalize,V3Matrix) +S3method(LogNormalize,data.frame) +S3method(LogNormalize,default) S3method(MappingScore,AnchorSet) S3method(MappingScore,default) S3method(NormalizeData,Assay) S3method(NormalizeData,Seurat) +S3method(NormalizeData,Seurat5) S3method(NormalizeData,StdAssay) S3method(NormalizeData,V3Matrix) S3method(NormalizeData,default) @@ -95,6 +102,9 @@ S3method(ScaleFactors,VisiumV1) S3method(ScoreJackStraw,DimReduc) S3method(ScoreJackStraw,JackStrawData) S3method(ScoreJackStraw,Seurat) +S3method(VST,default) +S3method(VST,dgCMatrix) +S3method(VST,matrix) S3method(as.CellDataSet,Seurat) S3method(as.Seurat,CellDataSet) S3method(as.Seurat,SingleCellExperiment) @@ -111,6 +121,8 @@ S3method(subset,SCTAssay) S3method(subset,STARmap) S3method(subset,SlideSeq) S3method(subset,VisiumV1) +export("%iff%") +export("%||%") export("DefaultAssay<-") export("Idents<-") export("Index<-") @@ -150,6 +162,7 @@ export(CollapseSpeciesExpressionMatrix) export(ColorDimSplit) export(CombinePlots) export(Command) +export(CountSketch) export(CreateAssayObject) export(CreateDimReducObject) export(CreateSCTAssayObject) @@ -190,6 +203,7 @@ export(FindTransferAnchors) export(FindVariableFeatures) export(FoldChange) export(FontSize) +export(GaussianSketch) export(GeneSymbolThesarus) export(GetAssay) export(GetAssayData) @@ -223,6 +237,7 @@ export(L2CCA) export(L2Dim) export(LabelClusters) export(LabelPoints) +export(LeverageScore) export(LinkedDimPlot) export(LinkedFeaturePlot) export(Load10X_Spatial) @@ -231,7 +246,6 @@ export(LoadSTARmap) export(Loadings) export(LocalStruct) export(LogNormalize) -export(LogNormalize5) export(LogSeuratCommand) export(LogVMR) export(Luminance) @@ -326,6 +340,7 @@ export(UMAPPlot) export(UpdateSCTAssays) export(UpdateSeuratObject) export(UpdateSymbolList) +export(VST) export(VariableFeaturePlot) export(VariableFeatures) export(VizDimLoadings) @@ -370,6 +385,7 @@ importFrom(Matrix,as.matrix) importFrom(Matrix,colMeans) importFrom(Matrix,colSums) importFrom(Matrix,crossprod) +importFrom(Matrix,qrR) importFrom(Matrix,readMM) importFrom(Matrix,rowMeans) importFrom(Matrix,rowSums) @@ -386,6 +402,8 @@ importFrom(RcppAnnoy,AnnoyEuclidean) importFrom(RcppAnnoy,AnnoyHamming) importFrom(RcppAnnoy,AnnoyManhattan) importFrom(Rtsne,Rtsne) +importFrom(SeuratObject,"%iff%") +importFrom(SeuratObject,"%||%") importFrom(SeuratObject,"DefaultAssay<-") importFrom(SeuratObject,"DefaultLayer<-") importFrom(SeuratObject,"Idents<-") @@ -427,6 +445,7 @@ importFrom(SeuratObject,IsSparse) importFrom(SeuratObject,JS) importFrom(SeuratObject,Key) importFrom(SeuratObject,LayerData) +importFrom(SeuratObject,Layers) importFrom(SeuratObject,Loadings) importFrom(SeuratObject,LogSeuratCommand) importFrom(SeuratObject,Misc) @@ -631,6 +650,11 @@ importFrom(reticulate,py_module_available) importFrom(reticulate,py_set_seed) importFrom(rlang,"!!") importFrom(rlang,as_label) +importFrom(rlang,as_name) +importFrom(rlang,enquo) +importFrom(rlang,is_quosure) +importFrom(rlang,quo_get_env) +importFrom(rlang,quo_get_expr) importFrom(scales,brewer_pal) importFrom(scales,hue_pal) importFrom(scales,rescale) From 469312178b04357471275e3c5ab7beed9a96fb40 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 13 Apr 2022 19:02:24 -0400 Subject: [PATCH 135/979] update lockfile --- renv.lock | 89 ++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 75 insertions(+), 14 deletions(-) diff --git a/renv.lock b/renv.lock index 62fe30bb5..40cd8570e 100644 --- a/renv.lock +++ b/renv.lock @@ -35,18 +35,18 @@ }, "MASS": { "Package": "MASS", - "Version": "7.3-55", + "Version": "7.3-56", "Source": "Repository", "Repository": "CRAN", - "Hash": "c5232ffb549f6d7a04a152c34ca1353d", + "Hash": "af0e1955cb80bb36b7988cc657db261e", "Requirements": [] }, "Matrix": { "Package": "Matrix", - "Version": "1.4-0", + "Version": "1.4-1", "Source": "Repository", "Repository": "CRAN", - "Hash": "130c0caba175739d98f2963c6a407cf6", + "Hash": "699c47c606293bdfbc9fd78a93c9c8fe", "Requirements": [ "lattice" ] @@ -166,15 +166,15 @@ }, "SeuratObject": { "Package": "SeuratObject", - "Version": "4.9.9.9010", + "Version": "4.9.9.9012", "Source": "GitHub", "RemoteType": "github", "RemoteHost": "api.github.com", - "RemoteRepo": "seurat-object", "RemoteUsername": "mojaveazure", + "RemoteRepo": "seurat-object", "RemoteRef": "feat/standard", - "RemoteSha": "3114047de4187bfb1d532293ce8513f0732c1a17", - "Hash": "d2d746377b67b152f40d5166451b6d9b", + "RemoteSha": "23738d0d1461d6e41bf71bee90bb09dd36914b63", + "Hash": "1bc149a60ab86ec92c5b538e23ee83e7", "Requirements": [ "Matrix", "Rcpp", @@ -283,10 +283,10 @@ }, "cluster": { "Package": "cluster", - "Version": "2.1.2", + "Version": "2.1.3", "Source": "Repository", "Repository": "CRAN", - "Hash": "ce49bfe5bc0b3ecd43a01fe1b01c2243", + "Hash": "c5f8447373ec2a0f593c694024e5b7ee", "Requirements": [] }, "codetools": { @@ -671,6 +671,16 @@ "rprojroot" ] }, + "highr": { + "Package": "highr", + "Version": "0.9", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "8eb36c8125038e648e5d111c0d7b2ed4", + "Requirements": [ + "xfun" + ] + }, "htmltools": { "Package": "htmltools", "Version": "0.5.2", @@ -779,6 +789,20 @@ "Hash": "d07e729b27b372429d42d24d503613a0", "Requirements": [] }, + "knitr": { + "Package": "knitr", + "Version": "1.38", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "10b3dc3c6acb925910edda5d0543b3a2", + "Requirements": [ + "evaluate", + "highr", + "stringr", + "xfun", + "yaml" + ] + }, "labeling": { "Package": "labeling", "Version": "0.4.2", @@ -873,10 +897,10 @@ }, "mgcv": { "Package": "mgcv", - "Version": "1.8-39", + "Version": "1.8-40", "Source": "Repository", "Repository": "CRAN", - "Hash": "055265005c238024e306fe0b600c89ff", + "Hash": "c6b2fdb18cf68ab613bd564363e1ba0d", "Requirements": [ "Matrix", "nlme" @@ -913,10 +937,10 @@ }, "nlme": { "Package": "nlme", - "Version": "3.1-155", + "Version": "3.1-157", "Source": "Repository", "Repository": "CRAN", - "Hash": "74ad940dccc9e977189a5afe5fcdb7ba", + "Hash": "dbca60742be0c9eddc5205e5c7ca1f44", "Requirements": [ "lattice" ] @@ -1171,6 +1195,25 @@ "Hash": "04884d9a75d778aca22c7154b8333ec9", "Requirements": [] }, + "rmarkdown": { + "Package": "rmarkdown", + "Version": "2.13", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "ac78f4d2e0289d4cba73b88af567b8b1", + "Requirements": [ + "bslib", + "evaluate", + "htmltools", + "jquerylib", + "jsonlite", + "knitr", + "stringr", + "tinytex", + "xfun", + "yaml" + ] + }, "rpart": { "Package": "rpart", "Version": "4.1.16", @@ -1518,6 +1561,16 @@ "vctrs" ] }, + "tinytex": { + "Package": "tinytex", + "Version": "0.38", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "759d047596ac173433985deddf313450", + "Requirements": [ + "xfun" + ] + }, "utf8": { "Package": "utf8", "Version": "1.2.2", @@ -1587,6 +1640,14 @@ "Hash": "c0e49a9760983e81e55cdd9be92e7182", "Requirements": [] }, + "xfun": { + "Package": "xfun", + "Version": "0.30", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "e83f48136b041845e50a6658feffb197", + "Requirements": [] + }, "xtable": { "Package": "xtable", "Version": "1.8-4", From bbbba0bfd68c0075fa18ed1c9912d9ab10bb829a Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 13 Apr 2022 19:03:35 -0400 Subject: [PATCH 136/979] Update lockfile --- renv.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/renv.lock b/renv.lock index 40cd8570e..4b7128e2c 100644 --- a/renv.lock +++ b/renv.lock @@ -166,15 +166,15 @@ }, "SeuratObject": { "Package": "SeuratObject", - "Version": "4.9.9.9012", + "Version": "4.9.9.9013", "Source": "GitHub", "RemoteType": "github", "RemoteHost": "api.github.com", - "RemoteUsername": "mojaveazure", "RemoteRepo": "seurat-object", + "RemoteUsername": "mojaveazure", "RemoteRef": "feat/standard", - "RemoteSha": "23738d0d1461d6e41bf71bee90bb09dd36914b63", - "Hash": "1bc149a60ab86ec92c5b538e23ee83e7", + "RemoteSha": "c9e4d752750f5fc29a184f2b072cf237363ccf5d", + "Hash": "beafc05491c07a69e3014ae454830eeb", "Requirements": [ "Matrix", "Rcpp", From 3908000d1cd54bbd833922e932e3f0297c415651 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Thu, 14 Apr 2022 18:05:40 -0400 Subject: [PATCH 137/979] Add LeverageScoreSampling Update SeuratObject version --- DESCRIPTION | 6 ++--- NAMESPACE | 1 + R/sketching.R | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++ renv.lock | 8 +++---- 4 files changed, 72 insertions(+), 7 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 933130692..8dac46d0f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.0.4.9004 -Date: 2022-04-13 +Version: 4.0.4.9005 +Date: 2022-04-14 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( @@ -67,7 +67,7 @@ Imports: scales, scattermore (>= 0.7), sctransform (>= 0.3.2), - SeuratObject (>= 4.9.9.9013), + SeuratObject (>= 4.9.9.9014), shiny, spatstat.core, spatstat.geom, diff --git a/NAMESPACE b/NAMESPACE index 42dea5ffd..46eb8f017 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -238,6 +238,7 @@ export(L2Dim) export(LabelClusters) export(LabelPoints) export(LeverageScore) +export(LeverageScoreSampling) export(LinkedDimPlot) export(LinkedFeaturePlot) export(Load10X_Spatial) diff --git a/R/sketching.R b/R/sketching.R index 6ff65319f..47556040f 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -12,6 +12,70 @@ NULL # Functions #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#' @importFrom SeuratObject Key Key<- Layers +#' +#' @export +#' +LeverageScoreSampling <- function( + object, + assay = NULL, + ncells = 5000, + save = 'sketch', + default = TRUE, + seed = NA_integer_, + ... +) { + # browser() + assay <- assay[1L] %||% DefaultAssay(object = object) + assay <- match.arg(arg = assay, choices = Assays(object = object)) + # TODO: fix this in [[<-,Seurat5 + if (save == assay) { + stop("Cannot overwrite existing assays", call. = FALSE) + } + if (save %in% Assays(object = object)) { + if (save == DefaultAssay(object = object)) { + DefaultAssay(object = object) <- assay + } + object[[save]] <- NULL + } + vars <- grep( + pattern = '^seurat_leverage_score_', + x = names(x = object[[]]), + value = TRUE + ) + names(x = vars) <- vars + vars <- gsub(pattern = '^seurat_leverage_score_', replacement = '', x = vars) + vars <- vars[vars %in% Layers(object = object[[assay]])] + if (!length(x = vars)) { + stop("No leverage scores found for assay ", assay, call. = FALSE) + } + cells <- lapply( + X = seq_along(along.with = vars), + FUN = function(i, seed) { + if (!is.na(x = seed)) { + set.seed(seed = seed) + } + return(sample( + x = Cells(x = object[[assay]], layer = vars[i]), + size = ncells, + prob = object[[names(x = vars)[i], drop = TRUE, na.rm = TRUE]] + )) + }, + seed = seed + ) + sketched <- suppressWarnings(expr = subset( + x = object[[assay]], + cells = Reduce(f = union, x = cells), + layers = vars + )) + Key(object = sketched) <- Key(object = save, quiet = TRUE) + object[[save]] <- sketched + if (isTRUE(x = default)) { + DefaultAssay(object = object) <- save + } + return(object) +} + #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Methods for Seurat-defined generics #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/renv.lock b/renv.lock index 4b7128e2c..0fc772390 100644 --- a/renv.lock +++ b/renv.lock @@ -166,15 +166,15 @@ }, "SeuratObject": { "Package": "SeuratObject", - "Version": "4.9.9.9013", + "Version": "4.9.9.9014", "Source": "GitHub", "RemoteType": "github", "RemoteHost": "api.github.com", - "RemoteRepo": "seurat-object", "RemoteUsername": "mojaveazure", + "RemoteRepo": "seurat-object", "RemoteRef": "feat/standard", - "RemoteSha": "c9e4d752750f5fc29a184f2b072cf237363ccf5d", - "Hash": "beafc05491c07a69e3014ae454830eeb", + "RemoteSha": "7a6c3222b5220191a841f11fb98fc9b4e9548d57", + "Hash": "494398a3f9dddd678cd0fadabdc4b8b2", "Requirements": [ "Matrix", "Rcpp", From 61a6ee7e6213c9b7c3e544f6ad9e41149f3d8cbd Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Thu, 28 Apr 2022 15:03:16 -0400 Subject: [PATCH 138/979] update lockfile --- renv.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/renv.lock b/renv.lock index 0fc772390..87e8b6740 100644 --- a/renv.lock +++ b/renv.lock @@ -166,15 +166,15 @@ }, "SeuratObject": { "Package": "SeuratObject", - "Version": "4.9.9.9014", + "Version": "4.9.9.9015", "Source": "GitHub", "RemoteType": "github", "RemoteHost": "api.github.com", "RemoteUsername": "mojaveazure", "RemoteRepo": "seurat-object", "RemoteRef": "feat/standard", - "RemoteSha": "7a6c3222b5220191a841f11fb98fc9b4e9548d57", - "Hash": "494398a3f9dddd678cd0fadabdc4b8b2", + "RemoteSha": "a3a604e47e3ad026520eec0096b5dc31a2211a82", + "Hash": "437a8f6c9e8e877cd9d814f02345459f", "Requirements": [ "Matrix", "Rcpp", From b787ed86e507d125e991fee3e0b88ec4898da12f Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sun, 8 May 2022 14:36:17 -0400 Subject: [PATCH 139/979] optimize Pseudobulk --- R/utilities.R | 39 ++++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index 65741c2a2..48329c80a 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -1281,7 +1281,7 @@ PseudobulkExpression <- function( category.matrix <- category.matrix[, colsums > 0] colsums <- colsums[colsums > 0] if (pb.method == 'average') { - category.matrix <- Sweep( + category.matrix <- SweepSparse( x = category.matrix, MARGIN = 2, STATS = colsums, @@ -1332,12 +1332,12 @@ PseudobulkExpression <- function( warning("Exponentiation yielded infinite values. `data` may not be log-normed.") } } - data.return[[i]] <- as.matrix(x = (data.use %*% category.matrix)) + data.return[[i]] <- data.use %*% category.matrix names(x = data.return)[i] <- assays[[i]] } if (return.seurat) { if (slot[1] == 'scale.data') { - na.matrix <- data.return[[1]] + na.matrix <- as.matrix(x = as.madata.return[[1]]) na.matrix[1:length(x = na.matrix)] <- NA # TODO: restore once check.matrix is in SeuratObject # toRet <- CreateSeuratObject( @@ -1397,7 +1397,7 @@ PseudobulkExpression <- function( if (length(x = data.return) > 1) { for (i in 2:length(x = data.return)) { if (slot[i] == 'scale.data') { - na.matrix <- data.return[[i]] + na.matrix <- as.matrix(x = data.return[[i]]) na.matrix[1:length(x = na.matrix)] <- NA # TODO: restore once check.matrix is in SeuratObject # toRet[[names(x = data.return)[i]]] <- CreateAssayObject(counts = na.matrix, check.matrix = FALSE) @@ -1441,10 +1441,11 @@ PseudobulkExpression <- function( } } if ('ident' %in% group.by) { - first.cells <- c() - for (i in 1:ncol(x = category.matrix)) { - first.cells <- c(first.cells, Position(x = category.matrix[,i], f = function(x) {x > 0})) + first.cells <- sapply(X = 1:ncol(x = category.matrix), + FUN = function(x) { + return(category.matrix[,x, drop = FALSE ]@i[1] + 1) } + ) Idents(object = toRet) <- Idents(object = object)[first.cells] } return(toRet) @@ -2464,3 +2465,27 @@ ToNumeric <- function(x){ } return(x) } + + +# sparse version of sweep +SweepSparse <- function( + x, + MARGIN, + STATS, + FUN = "/" + ) { + if (!inherits(x = x, what = 'dgCMatrix')) { + stop('input should be dgCMatrix. eg: x <- as(x, "CsparseMatrix")') + } + fun <- match.fun(FUN) + if (MARGIN == 1) { + idx <- x@i + 1 + x@x <- fun(x@x, STATS[idx]) + } else if (MARGIN == 2) { + x <- as(x, "RsparseMatrix") + idx <- x@j + 1 + x@x <- fun(x@x, STATS[idx]) + x <- as(x, "CsparseMatrix") + } + return(x) +} \ No newline at end of file From 3d319a0a4d6ae7fe6d90fd9be5ee2ad624faed00 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Tue, 17 May 2022 17:04:38 -0400 Subject: [PATCH 140/979] Minor updates --- R/preprocessing5.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 4dd4d6882..3a2c70fba 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -308,7 +308,7 @@ NormalizeData.StdAssay <- function( ) } if (isTRUE(x = default)) { - DefaultLayer(object = object) <- save[1L] + DefaultLayer(object = object) <- save } gc(verbose = FALSE) return(object) From 87ba00b4abe94eec862772588d6abbdf4c4d19f5 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Tue, 17 May 2022 17:04:46 -0400 Subject: [PATCH 141/979] Minor fixes --- R/visualization.R | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/R/visualization.R b/R/visualization.R index a06d64798..8a60bdecd 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -5604,6 +5604,7 @@ ExIPlot <- function( ) { assay <- assay %||% DefaultAssay(object = object) DefaultAssay(object = object) <- assay + cells <- Cells(x = object, assay = NULL) if (isTRUE(x = stack)) { if (!is.null(x = ncol)) { warning( @@ -5626,14 +5627,15 @@ ExIPlot <- function( no = min(length(x = features), 3) ) } - data <- FetchData(object = object, vars = features, slot = slot) + if (!is.null(x = idents)) { + cells <- intersect( + x = names(x = Idents(object = object)[Idents(object = object) %in% idents]), + y = cells + ) + } + data <- FetchData(object = object, vars = features, slot = slot, cells = cells) pt.size <- pt.size %||% AutoPointSize(data = object) features <- colnames(x = data) - if (is.null(x = idents)) { - cells <- colnames(x = object) - } else { - cells <- names(x = Idents(object = object)[Idents(object = object) %in% idents]) - } data <- data[cells, , drop = FALSE] idents <- if (is.null(x = group.by)) { Idents(object = object)[cells] From 0a0b57c2768c621f7fee46031dab3122608c4298 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 8 Jun 2022 11:58:43 -0400 Subject: [PATCH 142/979] Add ScaleData.v5, SelectIntegrationFeatures5 --- NAMESPACE | 4 + R/integration.R | 57 ++++++ R/preprocessing5.R | 125 +++++++++++++ R/sketching.R | 8 +- renv.lock | 446 +++++++++++++++++++++++++++++++++++++++------ 5 files changed, 581 insertions(+), 59 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 46eb8f017..444a73db3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -97,6 +97,8 @@ S3method(SCTResults,SCTModel) S3method(SCTResults,Seurat) S3method(ScaleData,Assay) S3method(ScaleData,Seurat) +S3method(ScaleData,Seurat5) +S3method(ScaleData,StdAssay) S3method(ScaleData,default) S3method(ScaleFactors,VisiumV1) S3method(ScoreJackStraw,DimReduc) @@ -464,6 +466,7 @@ importFrom(SeuratObject,SetIdent) importFrom(SeuratObject,SpatiallyVariableFeatures) importFrom(SeuratObject,StashIdent) importFrom(SeuratObject,Stdev) +importFrom(SeuratObject,StitchMatrix) importFrom(SeuratObject,Tool) importFrom(SeuratObject,UpdateSeuratObject) importFrom(SeuratObject,VariableFeatures) @@ -654,6 +657,7 @@ importFrom(rlang,as_label) importFrom(rlang,as_name) importFrom(rlang,enquo) importFrom(rlang,is_quosure) +importFrom(rlang,is_scalar_character) importFrom(rlang,quo_get_env) importFrom(rlang,quo_get_expr) importFrom(scales,brewer_pal) diff --git a/R/integration.R b/R/integration.R index 81c34006b..7d3f80fa4 100644 --- a/R/integration.R +++ b/R/integration.R @@ -2701,6 +2701,63 @@ SelectIntegrationFeatures <- function( return(features) } +.FeatureRank <- function(features, flist, ranks = FALSE) { + franks <- vapply( + X = features, + FUN = function(x) { + return(median(x = unlist(x = lapply( + X = flist, + FUN = function(fl) { + if (x %in% fl) { + return(which(x = x == fl)) + } + return(NULL) + } + )))) + }, + FUN.VALUE = numeric(length = 1L) + ) + franks <- sort(x = franks) + if (!isTRUE(x = ranks)) { + franks <- names(x = franks) + } + return(franks) +} + +SelectIntegrationFeatures5 <- function( + object, + nfeatures = 2000, + assay = NULL, + layers = NULL, + verbose = TRUE, + # fvf.nfeatures = 2000, + ... +) { + assay <- assay %||% DefaultAssay(object = object) + layers <- Layers(object = object[[assay]], search = layers) + vf.list <- lapply( + X = layers, + FUN = function(x) { + return(VariableFeatures(object = object, assay = assay, layer = x)) + } + ) + var.features <- unname(obj = unlist(x = vf.list)) + fmat <- slot(object = object[[assay]], name = 'features')[, layers] + fmat <- droplevels(x = fmat) + var.features <- var.features[var.features %in% rownames(x = fmat)] + var.features <- sort(x = table(var.features), decreasing = TRUE) + tie.val <- var.features[min(nfeatures, length(x = var.features))] + features <- names(x = var.features[which(x = var.features > tie.val)]) + if (length(x = features)) { + features <- .FeatureRank(features = features, flist = vf.list) + } + features.tie <- .FeatureRank( + features = names(x = var.features[which(x = var.features == tie.val)]), + flist = vf.list + ) + return(head(x = c(features, features.tie), n = nfeatures)) +} + #' Transfer data #' #' Transfer categorical or continuous data across single-cell datasets. For diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 3a2c70fba..df044731b 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -347,6 +347,131 @@ NormalizeData.Seurat5 <- function( return(object) } +#' @importFrom SeuratObject StitchMatrix +#' +#' @method ScaleData StdAssay +#' @export +#' +ScaleData.StdAssay <- function( + object, + features = NULL, + layer = NULL, + vars.to.regress = NULL, + latent.data = NULL, + split.by = NULL, + model.use = 'linear', + use.umi = FALSE, + do.scale= TRUE, + do.center = TRUE, + scale.max = 10, + block.size = 1000, + min.cells.to.block = 3000, + save = 'scale.data', + verbose = TRUE, + ... +) { + use.umi <- ifelse(test = model.use != 'linear', yes = TRUE, no = use.umi) + layer <- Layers(object = object, search = layer) + if (isTRUE(x = use.umi)) { + message("'use.umi' is TRUE, please make sure 'layer' specifies raw counts") + } + features <- features %||% Reduce( + f = union, + x = lapply( + X = layer, + FUN = function(x) { + return(VariableFeatures(object = object, layer = layer)) + } + ) + ) + if (!length(x = features)) { + features <- Reduce(f = union, x = lapply(X = layer, FUN = Features, x = object)) + } + ldata <- if (length(x = layer) > 1L) { + StitchMatrix( + x = LayerData(object = object, layer = layer[1L], features = features), + y = lapply( + X = layer[2:length(x = layer)], + FUN = LayerData, + object = object, + features = features + ), + rowmap = slot(object = object, name = 'features')[features, layer], + colmap = slot(object = object, name = 'cells')[, layer] + ) + } else { + LayerData(object = object, layer = layer, features = features) + } + LayerData(object = object, layer = save, features = features) <- ScaleData( + object = ldata, + features = features, + vars.to.regress = vars.to.regress, + latent.data = latent.data, + split.by = split.by, + model.use = model.use, + use.umi = use.umi, + do.scale = do.scale, + do.center = do.center, + scale.max = scale.max, + block.size = block.size, + min.cells.to.block = min.cells.to.block, + verbose = verbose, + ... + ) + return(object) +} + +#' @importFrom rlang is_scalar_character +#' +#' @method ScaleData Seurat5 +#' @export +#' +ScaleData.Seurat5 <- function( + object, + features = NULL, + assay = NULL, + layer = NULL, + vars.to.regress = NULL, + split.by = NULL, + model.use = 'linear', + use.umi = FALSE, + do.scale = TRUE, + do.center = TRUE, + scale.max = 10, + block.size = 1000, + min.cells.to.block = 3000, + verbose = TRUE, + ... +) { + assay <- assay %||% DefaultAssay(object = object) + vars.to.regress <- intersect(x = vars.to.regress, y = names(x = object[[]])) + latent.data <- if (length(x = vars.to.regress)) { + object[[vars.to.regress]] + } else { + NULL + } + if (is_scalar_character(x = split.by)) { + split.by <- object[[split.by]] + } + object[[assay]] <- ScaleData( + object = object[[assay]], + features = features, + layer = layer, + vars.to.regress = vars.to.regress, + latent.data = latent.data, + split.by = split.by, + model.use = model.use, + use.umi = use.umi, + do.scale = do.scale, + do.center = do.center, + scale.max = scale.max, + min.cells.to.block = min.cells.to.block, + verbose = verbose, + ... + ) + return(object) +} + #' @rdname VST #' @method VST default #' @export diff --git a/R/sketching.R b/R/sketching.R index 47556040f..8987c5215 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -25,7 +25,6 @@ LeverageScoreSampling <- function( seed = NA_integer_, ... ) { - # browser() assay <- assay[1L] %||% DefaultAssay(object = object) assay <- match.arg(arg = assay, choices = Assays(object = object)) # TODO: fix this in [[<-,Seurat5 @@ -68,6 +67,13 @@ LeverageScoreSampling <- function( cells = Reduce(f = union, x = cells), layers = vars )) + for (lyr in vars) { + try( + expr = VariableFeatures(object = sketched, selection.method = "sketch", layer = lyr) <- + VariableFeatures(object = object[[assay]], layer = lyr), + silent = TRUE + ) + } Key(object = sketched) <- Key(object = save, quiet = TRUE) object[[save]] <- sketched if (isTRUE(x = default)) { diff --git a/renv.lock b/renv.lock index 87e8b6740..b1a66d87b 100644 --- a/renv.lock +++ b/renv.lock @@ -1,6 +1,6 @@ { "R": { - "Version": "4.1.3", + "Version": "4.2.0", "Repositories": [ { "Name": "CRAN", @@ -8,6 +8,9 @@ } ] }, + "Bioconductor": { + "Version": "3.15" + }, "Packages": { "BH": { "Package": "BH", @@ -17,6 +20,42 @@ "Hash": "4e348572ffcaa2fb1e610e7a941f6f3a", "Requirements": [] }, + "BiocGenerics": { + "Package": "BiocGenerics", + "Version": "0.42.0", + "Source": "Bioconductor", + "git_url": "https://git.bioconductor.org/packages/BiocGenerics", + "git_branch": "RELEASE_3_15", + "git_last_commit": "3582d47", + "git_last_commit_date": "2022-04-26", + "Hash": "37348ee784e82c0a6b650146275b459d", + "Requirements": [] + }, + "BiocManager": { + "Package": "BiocManager", + "Version": "1.30.17", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "0ab5f6e14502755ca875f938781909af", + "Requirements": [] + }, + "DelayedArray": { + "Package": "DelayedArray", + "Version": "0.22.0", + "Source": "Bioconductor", + "git_url": "https://git.bioconductor.org/packages/DelayedArray", + "git_branch": "RELEASE_3_15", + "git_last_commit": "4a5afd1", + "git_last_commit_date": "2022-04-26", + "Hash": "c83a6e282a9669d4e04eb2d2ce8660a8", + "Requirements": [ + "BiocGenerics", + "IRanges", + "Matrix", + "MatrixGenerics", + "S4Vectors" + ] + }, "FNN": { "Package": "FNN", "Version": "1.1.3", @@ -25,6 +64,20 @@ "Hash": "b56998fff55e4a4b4860ad6e8c67e0f9", "Requirements": [] }, + "IRanges": { + "Package": "IRanges", + "Version": "2.30.0", + "Source": "Bioconductor", + "git_url": "https://git.bioconductor.org/packages/IRanges", + "git_branch": "RELEASE_3_15", + "git_last_commit": "9b5f3ca", + "git_last_commit_date": "2022-04-26", + "Hash": "27d19cab80fa238e21abfbd2892120f4", + "Requirements": [ + "BiocGenerics", + "S4Vectors" + ] + }, "KernSmooth": { "Package": "KernSmooth", "Version": "2.23-20", @@ -35,10 +88,10 @@ }, "MASS": { "Package": "MASS", - "Version": "7.3-56", + "Version": "7.3-57", "Source": "Repository", "Repository": "CRAN", - "Hash": "af0e1955cb80bb36b7988cc657db261e", + "Hash": "71476c1d88d1ebdf31580e5a257d5d31", "Requirements": [] }, "Matrix": { @@ -51,6 +104,19 @@ "lattice" ] }, + "MatrixGenerics": { + "Package": "MatrixGenerics", + "Version": "1.8.0", + "Source": "Bioconductor", + "git_url": "https://git.bioconductor.org/packages/MatrixGenerics", + "git_branch": "RELEASE_3_15", + "git_last_commit": "e4cc34d", + "git_last_commit_date": "2022-04-26", + "Hash": "68a6ed57fd9a1d0d6beea73507b3ff59", + "Requirements": [ + "matrixStats" + ] + }, "R6": { "Package": "R6", "Version": "2.5.1", @@ -87,10 +153,10 @@ }, "RSpectra": { "Package": "RSpectra", - "Version": "0.16-0", + "Version": "0.16-1", "Source": "Repository", "Repository": "CRAN", - "Hash": "a41329d24d5a98eaed2bd0159adb1b5f", + "Hash": "6b5ab997fd5ff6d46a5f1d9f8b76961c", "Requirements": [ "Matrix", "Rcpp", @@ -125,17 +191,45 @@ "Rcpp" ] }, + "RcppCCTZ": { + "Package": "RcppCCTZ", + "Version": "0.2.10", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "f41ba6c0b1b62d8f3610e70029d6fb9e", + "Requirements": [ + "Rcpp" + ] + }, + "RcppDate": { + "Package": "RcppDate", + "Version": "0.0.3", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "08cc427d6fe7a63e604cfa11aad31006", + "Requirements": [] + }, "RcppEigen": { "Package": "RcppEigen", - "Version": "0.3.3.9.1", + "Version": "0.3.3.9.2", "Source": "Repository", "Repository": "CRAN", - "Hash": "ddfa72a87fdf4c80466a20818be91d00", + "Hash": "4c86baed78388ceb06f88e3e9a1d87f5", "Requirements": [ "Matrix", "Rcpp" ] }, + "RcppGSL": { + "Package": "RcppGSL", + "Version": "0.3.11", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "af9a4cf1669ff630aa4c5d2aa32c9732", + "Requirements": [ + "Rcpp" + ] + }, "RcppProgress": { "Package": "RcppProgress", "Version": "0.4.2", @@ -154,35 +248,113 @@ "Rcpp" ] }, + "RcppZiggurat": { + "Package": "RcppZiggurat", + "Version": "0.1.6", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "75b4a36aeeed440ad03b996081190703", + "Requirements": [ + "Rcpp", + "RcppGSL" + ] + }, + "Rfast": { + "Package": "Rfast", + "Version": "2.0.6", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "330d7a6d1f7062cc49e68c7d4797bb75", + "Requirements": [ + "Rcpp", + "RcppArmadillo", + "RcppZiggurat" + ] + }, + "Rfast2": { + "Package": "Rfast2", + "Version": "0.1.3", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "28900a0181cd8a68bc65b82ba67770bd", + "Requirements": [ + "RANN", + "Rcpp", + "RcppArmadillo", + "Rfast" + ] + }, "Rtsne": { "Package": "Rtsne", - "Version": "0.15", + "Version": "0.16", "Source": "Repository", "Repository": "CRAN", - "Hash": "f153432c4ca15b937ccfaa40f167c892", + "Hash": "e921b89ef921905fc89b95886675706d", "Requirements": [ "Rcpp" ] }, + "S4Vectors": { + "Package": "S4Vectors", + "Version": "0.34.0", + "Source": "Bioconductor", + "git_url": "https://git.bioconductor.org/packages/S4Vectors", + "git_branch": "RELEASE_3_15", + "git_last_commit": "f590de3", + "git_last_commit_date": "2022-04-26", + "Hash": "90677cc888563927cecacd4067ccad45", + "Requirements": [ + "BiocGenerics" + ] + }, "SeuratObject": { "Package": "SeuratObject", - "Version": "4.9.9.9015", + "Version": "4.9.9.9027", "Source": "GitHub", "RemoteType": "github", "RemoteHost": "api.github.com", "RemoteUsername": "mojaveazure", "RemoteRepo": "seurat-object", "RemoteRef": "feat/standard", - "RemoteSha": "a3a604e47e3ad026520eec0096b5dc31a2211a82", - "Hash": "437a8f6c9e8e877cd9d814f02345459f", + "RemoteSha": "6d88922df63ec4a90d79a9f979b1a7c34ff6abc9", + "Hash": "59b9fdbdffbbfdebcfec27bb42725c76", "Requirements": [ "Matrix", "Rcpp", "RcppEigen", + "future", + "future.apply", + "progressr", + "rgeos", "rlang", + "sp", "spam" ] }, + "TileDBArray": { + "Package": "TileDBArray", + "Version": "1.6.0", + "Source": "Bioconductor", + "git_url": "https://git.bioconductor.org/packages/TileDBArray", + "git_branch": "RELEASE_3_15", + "git_last_commit": "9cda8da", + "git_last_commit_date": "2022-04-26", + "Hash": "2bdefe19c80ca24aeccfee96f30948de", + "Requirements": [ + "DelayedArray", + "Rcpp", + "S4Vectors", + "tiledb" + ] + }, + "VGAM": { + "Package": "VGAM", + "Version": "1.1-6", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "3a3f2e54570b48d0670c26363b6728c4", + "Requirements": [] + }, "abind": { "Package": "abind", "Version": "1.4-5", @@ -191,6 +363,18 @@ "Hash": "4f57884290cc75ab22f4af9e9d4ca862", "Requirements": [] }, + "ape": { + "Package": "ape", + "Version": "5.6-2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "894108412a7ec23d5de85cdcce871c8b", + "Requirements": [ + "Rcpp", + "lattice", + "nlme" + ] + }, "askpass": { "Package": "askpass", "Version": "1.1", @@ -209,6 +393,24 @@ "Hash": "543776ae6848fde2f48ff3816d0628bc", "Requirements": [] }, + "bit": { + "Package": "bit", + "Version": "4.0.4", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "f36715f14d94678eea9933af927bc15d", + "Requirements": [] + }, + "bit64": { + "Package": "bit64", + "Version": "4.0.5", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "9fe98599ca456d6552421db0d6772d8f", + "Requirements": [ + "bit" + ] + }, "bitops": { "Package": "bitops", "Version": "1.0-7", @@ -273,10 +475,10 @@ }, "cli": { "Package": "cli", - "Version": "3.2.0", + "Version": "3.3.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "1bdb126893e9ce6aae50ad1d6fc32faf", + "Hash": "23abf173c2b783dcc43379ab9bba00ee", "Requirements": [ "glue" ] @@ -419,10 +621,10 @@ }, "dplyr": { "Package": "dplyr", - "Version": "1.0.8", + "Version": "1.0.9", "Source": "Repository", "Repository": "CRAN", - "Hash": "ef47665e64228a17609d6df877bf86f2", + "Hash": "f0bda1627a7f5d3f9a0b5add931596ac", "Requirements": [ "R6", "generics", @@ -458,6 +660,18 @@ "rlang" ] }, + "enrichR": { + "Package": "enrichR", + "Version": "3.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "8cdcd06e4fa171911dc4d83feaa26467", + "Requirements": [ + "ggplot2", + "httr", + "rjson" + ] + }, "evaluate": { "Package": "evaluate", "Version": "0.15", @@ -522,10 +736,10 @@ }, "future": { "Package": "future", - "Version": "1.24.0", + "Version": "1.25.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "5cc7addaa73372fbee0a7d06c880068e", + "Hash": "877024e372cf61e41f5d13eafd8d4bac", "Requirements": [ "digest", "globals", @@ -535,10 +749,10 @@ }, "future.apply": { "Package": "future.apply", - "Version": "1.8.1", + "Version": "1.9.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "f568ce73d3d59582b0f7babd0eb33d07", + "Hash": "8ec2bd333ccd4df6bf70e68dded6c364", "Requirements": [ "future", "globals" @@ -554,10 +768,10 @@ }, "ggplot2": { "Package": "ggplot2", - "Version": "3.3.5", + "Version": "3.3.6", "Source": "Repository", "Repository": "CRAN", - "Hash": "d7566c471c7b17e095dd023b9ef155ad", + "Hash": "0fb26d0674c82705c6b701d1a61e02ea", "Requirements": [ "MASS", "digest", @@ -625,10 +839,10 @@ }, "gplots": { "Package": "gplots", - "Version": "3.1.1", + "Version": "3.1.3", "Source": "Repository", "Repository": "CRAN", - "Hash": "e65e5d5dea4cbb9ba822dcd782b2ee1f", + "Hash": "75437dd4c43599f6e9418ea249495fda", "Requirements": [ "KernSmooth", "caTools", @@ -661,6 +875,17 @@ "Hash": "2ace6c4a06297d0b364e0444384a2b82", "Requirements": [] }, + "hdf5r": { + "Package": "hdf5r", + "Version": "1.3.5", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "0870af1f511f6d6d1230e71856801f86", + "Requirements": [ + "R6", + "bit64" + ] + }, "here": { "Package": "here", "Version": "1.0.1", @@ -721,10 +946,10 @@ }, "httr": { "Package": "httr", - "Version": "1.4.2", + "Version": "1.4.3", "Source": "Repository", "Repository": "CRAN", - "Hash": "a525aba14184fec243f9eaec62fbed43", + "Hash": "88d1b310583777edf01ccd1216fb0b2b", "Requirements": [ "R6", "curl", @@ -743,10 +968,10 @@ }, "igraph": { "Package": "igraph", - "Version": "1.3.0", + "Version": "1.3.1", "Source": "Repository", "Repository": "CRAN", - "Hash": "bf0f6f80c2eaf5c4485ecbe710dd0a30", + "Hash": "820595efd173f9bac4ea76c1be007299", "Requirements": [ "Matrix", "magrittr", @@ -789,12 +1014,20 @@ "Hash": "d07e729b27b372429d42d24d503613a0", "Requirements": [] }, + "kernlab": { + "Package": "kernlab", + "Version": "0.9-30", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "911c5e7638c514aac7d0b46840ce6a91", + "Requirements": [] + }, "knitr": { "Package": "knitr", - "Version": "1.38", + "Version": "1.39", "Source": "Repository", "Repository": "CRAN", - "Hash": "10b3dc3c6acb925910edda5d0543b3a2", + "Hash": "029ab7c4badd3cf8af69016b2ba27493", "Requirements": [ "evaluate", "highr", @@ -840,10 +1073,10 @@ }, "leiden": { "Package": "leiden", - "Version": "0.3.9", + "Version": "0.3.10", "Source": "Repository", "Repository": "CRAN", - "Hash": "d6768920a499f996e6025c5daecf33fb", + "Hash": "f0b75ad9505081a5a91b1c2947d70cdb", "Requirements": [ "Matrix", "igraph", @@ -889,10 +1122,10 @@ }, "matrixStats": { "Package": "matrixStats", - "Version": "0.61.0", + "Version": "0.62.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "b8e6221fc11247b12ab1b055a6f66c27", + "Hash": "36ad89a805c436c5316c22490079da67", "Requirements": [] }, "mgcv": { @@ -925,6 +1158,19 @@ "shiny" ] }, + "mixtools": { + "Package": "mixtools", + "Version": "1.2.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "4999c07351f4c5e2f323c1d6e88652b9", + "Requirements": [ + "MASS", + "kernlab", + "segmented", + "survival" + ] + }, "munsell": { "Package": "munsell", "Version": "0.5.0", @@ -935,6 +1181,20 @@ "colorspace" ] }, + "nanotime": { + "Package": "nanotime", + "Version": "0.3.6", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "609658c0d87106c130e1101c3c37c4dd", + "Requirements": [ + "Rcpp", + "RcppCCTZ", + "RcppDate", + "bit64", + "zoo" + ] + }, "nlme": { "Package": "nlme", "Version": "3.1-157", @@ -957,10 +1217,10 @@ }, "parallelly": { "Package": "parallelly", - "Version": "1.30.0", + "Version": "1.31.1", "Source": "Repository", "Repository": "CRAN", - "Hash": "67db13907a9cea89c118cf82d448799f", + "Hash": "115faaa1a50897c3e2339d1cb7d3d493", "Requirements": [] }, "patchwork": { @@ -1026,10 +1286,10 @@ }, "plotly": { "Package": "plotly", - "Version": "4.10.0", + "Version": "4.10.0.9001", "Source": "Repository", "Repository": "CRAN", - "Hash": "fbb11e44d057996ca5fe40d959cacfb0", + "Hash": "166e09e58492fc26d2293c67a99a0e5b", "Requirements": [ "RColorBrewer", "base64enc", @@ -1099,6 +1359,16 @@ "ps" ] }, + "progressr": { + "Package": "progressr", + "Version": "0.10.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "7df448f5ae46ab0a1af0fb619349d3fd", + "Requirements": [ + "digest" + ] + }, "promises": { "Package": "promises", "Version": "1.2.0.1", @@ -1115,10 +1385,10 @@ }, "ps": { "Package": "ps", - "Version": "1.6.0", + "Version": "1.7.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "32620e2001c1dce1af49c49dccbb9420", + "Hash": "eef74b13f32cae6bb0d495e53317c44c", "Requirements": [] }, "purrr": { @@ -1187,6 +1457,24 @@ "withr" ] }, + "rgeos": { + "Package": "rgeos", + "Version": "0.5-9", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "7cfa4f73a070042ecf141d5c1d1bfcae", + "Requirements": [ + "sp" + ] + }, + "rjson": { + "Package": "rjson", + "Version": "0.2.21", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "f9da75e6444e95a1baf8ca24909d63b9", + "Requirements": [] + }, "rlang": { "Package": "rlang", "Version": "1.0.2", @@ -1197,10 +1485,10 @@ }, "rmarkdown": { "Package": "rmarkdown", - "Version": "2.13", + "Version": "2.14", "Source": "Repository", "Repository": "CRAN", - "Hash": "ac78f4d2e0289d4cba73b88af567b8b1", + "Hash": "31b60a882fabfabf6785b8599ffeb8ba", "Requirements": [ "bslib", "evaluate", @@ -1238,6 +1526,16 @@ "Hash": "06c85365a03fdaf699966cc1d3cf53ea", "Requirements": [] }, + "rsvd": { + "Package": "rsvd", + "Version": "1.0.5", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "b462187d887abc519894874486dbd6fd", + "Requirements": [ + "Matrix" + ] + }, "sass": { "Package": "sass", "Version": "0.4.1", @@ -1254,10 +1552,10 @@ }, "scales": { "Package": "scales", - "Version": "1.1.1", + "Version": "1.2.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "6f76f71042411426ec8df6c54f34e6dd", + "Hash": "6e8750cdd13477aa440d453da93d5cac", "Requirements": [ "R6", "RColorBrewer", @@ -1265,6 +1563,7 @@ "labeling", "lifecycle", "munsell", + "rlang", "viridisLite" ] }, @@ -1301,6 +1600,16 @@ "rlang" ] }, + "segmented": { + "Package": "segmented", + "Version": "1.5-0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "eefd32a9f572433282949f775c7d5bf0", + "Requirements": [ + "MASS" + ] + }, "shiny": { "Package": "shiny", "Version": "1.7.1", @@ -1348,6 +1657,16 @@ "Hash": "947e4e02a79effa5d512473e10f41797", "Requirements": [] }, + "sp": { + "Package": "sp", + "Version": "1.4-7", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "c5dad1d43440f0c426a6d29b30b333fa", + "Requirements": [ + "lattice" + ] + }, "spam": { "Package": "spam", "Version": "2.8-0", @@ -1381,10 +1700,10 @@ }, "spatstat.data": { "Package": "spatstat.data", - "Version": "2.1-4", + "Version": "2.2-0", "Source": "Repository", "Repository": "CRAN", - "Hash": "cc7222397c84a957caaff24d6260bf5a", + "Hash": "5b4f6c12d1a222680579070a045786bb", "Requirements": [ "Matrix", "spatstat.utils" @@ -1417,10 +1736,10 @@ }, "spatstat.sparse": { "Package": "spatstat.sparse", - "Version": "2.1-0", + "Version": "2.1-1", "Source": "Repository", "Repository": "CRAN", - "Hash": "66728f844c20c0a3a17f1f7ca62dcae6", + "Hash": "cde91679dea4a2fb53833a86f32c94f5", "Requirements": [ "Matrix", "abind", @@ -1458,10 +1777,10 @@ }, "survival": { "Package": "survival", - "Version": "3.2-13", + "Version": "3.3-1", "Source": "Repository", "Repository": "CRAN", - "Hash": "6f0a0fadc63bc6570fe172770f15bbc4", + "Hash": "f6189c70451d3d68e0d571235576e833", "Requirements": [ "Matrix" ] @@ -1484,10 +1803,10 @@ }, "testthat": { "Package": "testthat", - "Version": "3.1.3", + "Version": "3.1.4", "Source": "Repository", "Repository": "CRAN", - "Hash": "affcf9db2c99dd2c7e6459ef55ed3385", + "Hash": "f76c2a02d0fdc24aa7a47ea34261a6e3", "Requirements": [ "R6", "brio", @@ -1512,10 +1831,10 @@ }, "tibble": { "Package": "tibble", - "Version": "3.1.6", + "Version": "3.1.7", "Source": "Repository", "Repository": "CRAN", - "Hash": "8a8f02d1934dfd6431c671361510dd0b", + "Hash": "08415af406e3dd75049afef9552e7355", "Requirements": [ "ellipsis", "fansi", @@ -1561,6 +1880,17 @@ "vctrs" ] }, + "tiledb": { + "Package": "tiledb", + "Version": "0.12.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "b7fdc76172db79497f91faa14b6a6f85", + "Requirements": [ + "Rcpp", + "nanotime" + ] + }, "tinytex": { "Package": "tinytex", "Version": "0.38", @@ -1598,10 +1928,10 @@ }, "vctrs": { "Package": "vctrs", - "Version": "0.4.0", + "Version": "0.4.1", "Source": "Repository", "Repository": "CRAN", - "Hash": "95c2573b232eac82df562f9e300f9790", + "Hash": "8b54f22e2a58c4f275479c92ce041a57", "Requirements": [ "cli", "glue", @@ -1666,10 +1996,10 @@ }, "zoo": { "Package": "zoo", - "Version": "1.8-9", + "Version": "1.8-10", "Source": "Repository", "Repository": "CRAN", - "Hash": "035d1c7c12593038c26fb1c2fd40c4d2", + "Hash": "277b8b4c5b7b47e664aebfe024a2092e", "Requirements": [ "lattice" ] From 05bf02ca1a137e222f8dc335da7bab8024141c44 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 8 Jun 2022 16:35:57 -0400 Subject: [PATCH 143/979] Add RunPCA.v5 --- DESCRIPTION | 4 +- NAMESPACE | 3 ++ R/dimensional_reduction.R | 108 ++++++++++++++++++++++++++++++++++++++ R/integration.R | 2 + R/visualization.R | 7 ++- 5 files changed, 120 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 8dac46d0f..2d8c0b43f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.0.4.9005 -Date: 2022-04-14 +Version: 4.0.4.9006 +Date: 2022-06-08 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( diff --git a/NAMESPACE b/NAMESPACE index 444a73db3..89a30413e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -80,6 +80,8 @@ S3method(RunLDA,Seurat) S3method(RunLDA,default) S3method(RunPCA,Assay) S3method(RunPCA,Seurat) +S3method(RunPCA,Seurat5) +S3method(RunPCA,StdAssay) S3method(RunPCA,default) S3method(RunSPCA,Assay) S3method(RunSPCA,Seurat) @@ -319,6 +321,7 @@ export(ScaleData) export(ScaleFactors) export(ScoreJackStraw) export(SelectIntegrationFeatures) +export(SelectIntegrationFeatures5) export(SetAssayData) export(SetIdent) export(SetIntegrationData) diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R index 322ff6880..8aef8a2dd 100644 --- a/R/dimensional_reduction.R +++ b/R/dimensional_reduction.R @@ -965,6 +965,45 @@ RunPCA.Assay <- function( return(reduction.data) } +#' @method RunPCA StdAssay +#' @export +#' +RunPCA.StdAssay <- function( + object, + assay = NULL, + features = NULL, + layer = 'scale.data', + npcs = 50, + rev.pca = FALSE, + weight.by.var = TRUE, + verbose = TRUE, + ndims.print = 1:5, + nfeatures.print = 30, + reduction.key = "PC_", + seed.use = 42, + ... +) { + data.use <- PrepDR5( + object = object, + features = features, + layer = layer, + verbose = verbose + ) + return(RunPCA( + object = data.use, + assay = assay, + npcs = npcs, + rev.pca = rev.pca, + weight.by.var = weight.by.var, + verbose = verbose, + ndims.print = ndims.print, + nfeatures.print = nfeatures.print, + reduction.key = reduction.key, + seed.use = seed.use, + ... + )) +} + #' @param reduction.name dimensional reduction name, pca by default #' #' @rdname RunPCA @@ -1008,6 +1047,45 @@ RunPCA.Seurat <- function( return(object) } +#' @method RunPCA Seurat5 +#' @export +#' +RunPCA.Seurat5 <- function( + object, + assay = NULL, + features = NULL, + npcs = 50, + rev.pca = FALSE, + weight.by.var = TRUE, + verbose = TRUE, + ndims.print = 1:5, + nfeatures.print = 30, + reduction.name = "pca", + reduction.key = "PC_", + seed.use = 42, + ... +) { + assay <- assay %||% DefaultAssay(object = object) + # assay.data <- GetAssay(object = object, assay = assay) + reduction.data <- RunPCA( + object = object[[assay]], + assay = assay, + features = features, + npcs = npcs, + rev.pca = rev.pca, + weight.by.var = weight.by.var, + verbose = verbose, + ndims.print = ndims.print, + nfeatures.print = nfeatures.print, + reduction.key = reduction.key, + seed.use = seed.use, + ... + ) + object[[reduction.name]] <- reduction.data + # object <- LogSeuratCommand(object = object) + return(object) +} + #' @param assay Name of assay that that t-SNE is being run on #' @param seed.use Random seed for the t-SNE. If NULL, does not set the seed #' @param tsne.method Select the method to use to compute the tSNE. Available @@ -2291,6 +2369,36 @@ PrepDR <- function( return(data.use) } +PrepDR5 <- function(object, features = NULL, layer = 'scale.data', verbose = TRUE) { + layer <- layer[1L] + layer <- match.arg(arg = layer, choices = Layers(object = object)) + features <- features %||% VariableFeatures(object = object, layer = layer) + if (!length(x = features)) { + stop("No variable features, run FindVariableFeatures() or provide a vector of features", call. = FALSE) + } + data.use <- LayerData(object = object, layer = layer, features = features) + features.var <- apply(X = data.use, MARGIN = 1L, FUN = var) + features.keep <- features[features.var > 0] + if (!length(x = features.keep)) { + stop("None of the requested features have any variance", call. = FALSE) + } else if (length(x = features.keep) < length(x = features)) { + exclude <- setdiff(x = features, y = features.keep) + if (isTRUE(x = verbose)) { + warning( + "The following ", + length(x = exclude), + " features requested have zero variance; running reduction without them: ", + paste(exclude, collapse = ', '), + call. = FALSE, + immediate. = TRUE + ) + } + } + # features <- features.keep + # features <- features[!is.na(x = features)] + return(LayerData(object = object, layer = layer, features = features.keep)) +} + #' @param assay Name of Assay SPCA is being run on #' @param npcs Total Number of SPCs to compute and store (50 by default) #' @param verbose Print the top genes associated with high/low loadings for diff --git a/R/integration.R b/R/integration.R index 7d3f80fa4..28314ebef 100644 --- a/R/integration.R +++ b/R/integration.R @@ -2724,6 +2724,8 @@ SelectIntegrationFeatures <- function( return(franks) } +#' @export +#' SelectIntegrationFeatures5 <- function( object, nfeatures = 2000, diff --git a/R/visualization.R b/R/visualization.R index 8a60bdecd..ef7fe5943 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -805,8 +805,11 @@ DimPlot <- function( stop("'dims' must be a two-length vector") } reduction <- reduction %||% DefaultDimReduc(object = object) - cells <- cells %||% colnames(x = object) - + # cells <- cells %||% colnames(x = object) + cells <- cells %||% Cells( + x = object, + assay = DefaultAssay(object = object[[reduction]]) + ) data <- Embeddings(object = object[[reduction]])[cells, dims] data <- as.data.frame(x = data) dims <- paste0(Key(object = object[[reduction]]), dims) From dd9b8fa396c8284d698ea888dd45cf6187a6691d Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 8 Jun 2022 18:04:03 -0400 Subject: [PATCH 144/979] Fixes in ScaleData.Seurat5 --- R/preprocessing5.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index df044731b..aad616dd9 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -444,7 +444,9 @@ ScaleData.Seurat5 <- function( ... ) { assay <- assay %||% DefaultAssay(object = object) - vars.to.regress <- intersect(x = vars.to.regress, y = names(x = object[[]])) + if (!is.null(x = vars.to.regress)) { + vars.to.regress <- intersect(x = vars.to.regress, y = names(x = object[[]])) + } latent.data <- if (length(x = vars.to.regress)) { object[[vars.to.regress]] } else { From 2fd2e0644484917a56a2a44d0ceab9a8e71cc751 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 9 Jun 2022 16:52:09 -0400 Subject: [PATCH 145/979] harmony IntegrateSketchEmbeddings --- R/integration.R | 158 ++++++++++++++++++++++++++++++++++++++++++++++++ R/sketching.R | 1 + 2 files changed, 159 insertions(+) diff --git a/R/integration.R b/R/integration.R index 28314ebef..55c712dad 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1784,6 +1784,164 @@ IntegrateEmbeddings.TransferAnchorSet <- function( return(query) } + +#' Integrate embeddings from the integrated atoms +#' +#' The main steps of this procedure are outlined below. For a more detailed +#' description of the methodology, please see Hao, et al Biorxiv 2022: +#' \doi{10.1101/2022.02.24.481684} +#' +#' First learn a atom dictionary representation to reconstruct each cell. +#' Then, using this dictionary representation, +#' reconstruct the embeddings of each cell from the integrated atoms. +#' +#' @param object A Seurat object with all cells for one dataset +#' @param atom.assay Assay name for sketched-cell expression (default is 'sketch') +#' @param orig.assay Assay name for original expression (default is 'RNA') +#' @param features Features used for atomic sketch integration +#' @param atom.sketch.reduction Dimensional reduction name for batch-corrected embeddings +#' in the sketched object (default is 'integrated_dr') +#' @param dictionary.method Methods to construct sketch-cell representation +#' for all cells (default is 'sketch'). Can be one of: +#' \itemize{ +#' \item{sketch: Use random sketched data slot} +#' \item{data: Use data slot} +#' } +#' @param sketch.ratio Sketch ratio of data slot when \code{dictionary.method} is set to 'sketch' (default is 0.8) +#' @param layers Names of layers for correction. +#' @param verbose Print progress and message (default is TRUE) +#' +#' @return Returns a Seurat object with an integrated dimensional reduction +#' @importFrom MASS ginv +#' @importFrom Matrix t +#' @export + +IntegrateSketchEmbeddings <- function( + object, + atom.assay = 'sketch', + orig.assay = 'RNA', + features = NULL, + atom.sketch.reduction = 'integrated_dr', + dictionary.method = c('sketch', 'data'), + sketch.ratio = 0.8, + layers = NULL, + verbose = TRUE +) { + reduction.name = atom.sketch.reduction + reduction.key = Key(object = object[[atom.sketch.reduction]]) + dictionary.method <- match.arg(arg = dictionary.method) + layers <- setdiff(x = Layers(object = object[[atom.assay]], search = 'data'), + y = 'scale.data') %||% layers + # check layers + layers.missing <- setdiff(layers, Layers(object = object[[orig.assay]], search = 'data')) + if (length(layers.missing) > 0) { + stop('layer ', layers.missing, ' are not present in ', orig.assay, " assay") + } + + # check features + features <- features %||% VariableFeatures(object = atom.sketch.object) + features <- intersect(features, + LayerIntersectFeatures(object = object, assay = atom.assay, layers = layers) + ) + my.lapply <- ifelse( + test = verbose && nbrOfWorkers() == 1, + yes = pblapply, + no = future_lapply + ) + + emb_all.mat <- matrix(data = NA, nrow = ncol(object), ncol = ncol(object[[atom.sketch.reduction]])) + ncells <- c(0, sapply(X = layers, + FUN = function(x) ncol(LayerData(object = object[[orig.assay]], layer = x)) + ) + ) + block_set <- lapply(X = seq_along(along.with = layers), + FUN = function(x) (sum(ncells[1:x]) + 1) : sum(ncells[1:(x + 1)]) + ) + + for (i in seq_along(along.with = layers)) { + cells.sketch <- Cells(object[[atom.assay]], layer = layers[i]) + if (verbose) { + message(length(cells.sketch),' atomic cells are identified in the atom.sketch.object') + } + if (verbose) { + message("Correcting embeddings") + } + + emb <- switch( + EXPR = dictionary.method, + 'data' = { + exp.mat <- t( + x = as.matrix( + LayerData( + object = object[[atom.assay]], + layer = layers[i], + features = features + ) + ) + ) + sketch.transform <- ginv(X = exp.mat) %*% + Embeddings(object = object[[atom.sketch.reduction]])[cells.sketch ,] + emb <- as.matrix( + x = t(as( + object = LayerData( + object = object[[orig.assay]], + layer = layers[i], + features = features + ), + Class = "CsparseMatrix")) %*% + sketch.transform + ) + emb + }, + 'sketch' = { + R <- t( + x = CountSketch( + nsketch = round(sketch.ratio * length(x = features)), ncells = length(x = features) + ) + ) + exp.mat <- as.matrix( + x = t( + x = LayerData( + object = object[[atom.assay]], + layer = layers[i], + features = features + ) + ) %*% + R + ) + sketch.transform <- ginv(X = exp.mat) %*% + Embeddings(object = object[[atom.sketch.reduction]])[cells.sketch ,] + emb <- as.matrix( + x = ( + t(as( + object = LayerData( + object = object[[orig.assay]], + layer = layers[i], + features = features + ), + Class = "CsparseMatrix")) + %*% + R) %*% + sketch.transform + ) + emb + } + ) + emb_all.mat[ block_set[[i]],] <- as.matrix(emb) + } + + object[[reduction.name]] <- CreateDimReducObject( + embeddings = emb_all.mat, + loadings = Loadings(object[[atom.sketch.reduction]]), + key = reduction.key, + assay = orig.assay + ) + return(object) +} + + + + #' Calculate the local structure preservation metric #' #' Calculates a metric that describes how well the local structure of each group diff --git a/R/sketching.R b/R/sketching.R index 8987c5215..2c23ec9e9 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -289,6 +289,7 @@ CountSketch <- function(nsketch, ncells, seed = NA_integer_, ...) { )) } + #' Gaussian sketching #' #' @inheritParams CountSketch From c1f02400d0f5da251c3e4dbf9a78197eb9409049 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Mon, 13 Jun 2022 12:32:44 -0400 Subject: [PATCH 146/979] Remove calls to GetAssay (except in SCTransform()) --- R/clustering.R | 3 +-- R/dimensional_reduction.R | 10 +++------- R/mixscape.R | 3 +-- R/preprocessing.R | 16 ++++++++-------- 4 files changed, 13 insertions(+), 19 deletions(-) diff --git a/R/clustering.R b/R/clustering.R index de0cc96c5..6e9973ed6 100644 --- a/R/clustering.R +++ b/R/clustering.R @@ -774,9 +774,8 @@ FindNeighbors.Seurat <- function( ) } else { assay <- assay %||% DefaultAssay(object = object) - data.use <- GetAssay(object = object, assay = assay) neighbor.graphs <- FindNeighbors( - object = data.use, + object = object[[assay]], features = features, k.param = k.param, compute.SNN = compute.SNN, diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R index 8aef8a2dd..5cc591ea3 100644 --- a/R/dimensional_reduction.R +++ b/R/dimensional_reduction.R @@ -799,9 +799,8 @@ RunICA.Seurat <- function( ... ) { assay <- assay %||% DefaultAssay(object = object) - assay.data <- GetAssay(object = object, assay = assay) reduction.data <- RunICA( - object = assay.data, + object = object[[assay]], assay = assay, features = features, nics = nics, @@ -1027,9 +1026,8 @@ RunPCA.Seurat <- function( ... ) { assay <- assay %||% DefaultAssay(object = object) - assay.data <- GetAssay(object = object, assay = assay) reduction.data <- RunPCA( - object = assay.data, + object = object[[assay]], assay = assay, features = features, npcs = npcs, @@ -1066,7 +1064,6 @@ RunPCA.Seurat5 <- function( ... ) { assay <- assay %||% DefaultAssay(object = object) - # assay.data <- GetAssay(object = object, assay = assay) reduction.data <- RunPCA( object = object[[assay]], assay = assay, @@ -2505,14 +2502,13 @@ RunSPCA.Seurat <- function( ... ) { assay <- assay %||% DefaultAssay(object = object) - assay.data <- GetAssay(object = object, assay = assay) if (is.null(x = graph)) { stop("Graph is not provided") } else if (is.character(x = graph)) { graph <- object[[graph]] } reduction.data <- RunSPCA( - object = assay.data, + object = object[[assay]], assay = assay, features = features, npcs = npcs, diff --git a/R/mixscape.R b/R/mixscape.R index 3504563dd..8af9ab44d 100644 --- a/R/mixscape.R +++ b/R/mixscape.R @@ -592,9 +592,8 @@ RunLDA.Seurat <- function( ... ) { assay <- assay %||% DefaultAssay(object = object) - assay.data <- GetAssay(object = object, assay = assay) reduction.data <- RunLDA( - object = assay.data, + object = object[[assay]], assay = assay, labels = labels, features = features, diff --git a/R/preprocessing.R b/R/preprocessing.R index 5142959d9..8d409a0b9 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -2070,10 +2070,10 @@ FindVariableFeatures.Seurat <- function( verbose = TRUE, ... ) { - assay <- assay %||% DefaultAssay(object = object) - assay.data <- GetAssay(object = object, assay = assay) + assay <- assay[1L] %||% DefaultAssay(object = object) + assay <- match.arg(arg = assay, Assays(object = object)) assay.data <- FindVariableFeatures( - object = assay.data, + object = object[[assay]], selection.method = selection.method, loess.span = loess.span, clip.max = clip.max, @@ -2497,9 +2497,8 @@ NormalizeData.Seurat <- function( ... ) { assay <- assay %||% DefaultAssay(object = object) - assay.data <- GetAssay(object = object, assay = assay) assay.data <- NormalizeData( - object = assay.data, + object = object[[assay]], normalization.method = normalization.method, scale.factor = scale.factor, verbose = verbose, @@ -2858,8 +2857,8 @@ ScaleData.Seurat <- function( verbose = TRUE, ... ) { - assay <- assay %||% DefaultAssay(object = object) - assay.data <- GetAssay(object = object, assay = assay) + assay <- assay[1L] %||% DefaultAssay(object = object) + assay <- match.arg(arg = assay, choices = Assays(object = object)) if (any(vars.to.regress %in% colnames(x = object[[]]))) { latent.data <- object[[vars.to.regress[vars.to.regress %in% colnames(x = object[[]])]]] } else { @@ -2869,7 +2868,8 @@ ScaleData.Seurat <- function( split.by <- object[[split.by]] } assay.data <- ScaleData( - object = assay.data, + # object = assay.data, + object = object[[assay]], features = features, vars.to.regress = vars.to.regress, latent.data = latent.data, From 63af2fffb981c08568e964d5a4d1ccb0745c133b Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Thu, 23 Jun 2022 18:52:28 -0400 Subject: [PATCH 147/979] Implement LeverageScore on v4 Seurat objects --- R/sketching.R | 41 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/R/sketching.R b/R/sketching.R index 2c23ec9e9..d82f16b56 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -69,7 +69,7 @@ LeverageScoreSampling <- function( )) for (lyr in vars) { try( - expr = VariableFeatures(object = sketched, selection.method = "sketch", layer = lyr) <- + expr = VariableFeatures(object = sketched, method = "sketch", layer = lyr) <- VariableFeatures(object = object[[assay]], layer = lyr), silent = TRUE ) @@ -127,7 +127,8 @@ LeverageScore.default <- function( ) } if (is.character(x = method)) { - method <- get(x = method) + # method <- get(x = method) + method <- match.fun(FUN = method) } stopifnot(is.function(x = method)) # Run the sketching @@ -208,6 +209,42 @@ LeverageScore.StdAssay <- function( return(scores) } +#' @method LeverageScore Seurat +#' @export +#' +LeverageScore.Seurat <- function( + object, + assay = NULL, + features = NULL, + nsketch = 5000L, + ndims = NULL, + method = CountSketch, + layer = 'data', + eps = 0.5, + seed = 123L, + verbose = TRUE, + ... +) { + assay <- assay[1L] %||% DefaultAssay(object = object) + assay <- match.arg(arg = assay, choices = Assays(object = object)) + method <- enquo(arg = method) + scores <- LeverageScore( + object = object[[assay]], + features = features, + nsketch = nsketch, + ndims = ndims, + method = method, + layer = layer, + eps = eps, + seed = seed, + verbose = verbose, + ... + ) + names(x = scores) <- paste0("seurat_", names(x = scores)) + object[[]] <- scores + return(object) +} + #' @method LeverageScore Seurat5 #' @export #' From 5ccfd14fc5618c47d8771bff57634c2f1ac684a1 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Thu, 23 Jun 2022 18:52:47 -0400 Subject: [PATCH 148/979] Get IntegrateSketchEmbeddings working Fixes for SelectIntegrationFeatures5 --- R/integration.R | 236 ++++++++++++++++++++++++++++++------------------ 1 file changed, 146 insertions(+), 90 deletions(-) diff --git a/R/integration.R b/R/integration.R index 55c712dad..5dba9256b 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1796,95 +1796,153 @@ IntegrateEmbeddings.TransferAnchorSet <- function( #' reconstruct the embeddings of each cell from the integrated atoms. #' #' @param object A Seurat object with all cells for one dataset -#' @param atom.assay Assay name for sketched-cell expression (default is 'sketch') -#' @param orig.assay Assay name for original expression (default is 'RNA') +#' @param atoms Assay name for sketched-cell expression (default is 'sketch') +#' @param orig Assay name for original expression (default is 'RNA') #' @param features Features used for atomic sketch integration -#' @param atom.sketch.reduction Dimensional reduction name for batch-corrected embeddings +#' @param reduction Dimensional reduction name for batch-corrected embeddings #' in the sketched object (default is 'integrated_dr') -#' @param dictionary.method Methods to construct sketch-cell representation +#' @param method Methods to construct sketch-cell representation #' for all cells (default is 'sketch'). Can be one of: #' \itemize{ -#' \item{sketch: Use random sketched data slot} -#' \item{data: Use data slot} +#' \item \dQuote{\code{sketch}}: Use random sketched data slot +#' \item \dQuote{\code{data}}: Use data slot #' } -#' @param sketch.ratio Sketch ratio of data slot when \code{dictionary.method} is set to 'sketch' (default is 0.8) +#' @param ratio Sketch ratio of data slot when \code{dictionary.method} is set +#' to \dQuote{\code{sketch}}; defaults to 0.8 +#' @param reduction.name Name to save new reduction as; defaults to +#' \code{paste0(reduction, '.orig')} +#' @param reduction.key Key for new dimensional reduction; defaults to creating +#' one from \code{reduction.name} #' @param layers Names of layers for correction. -#' @param verbose Print progress and message (default is TRUE) +#' @param verbose Print progress and message #' #' @return Returns a Seurat object with an integrated dimensional reduction +#' #' @importFrom MASS ginv #' @importFrom Matrix t +#' #' @export - +#' IntegrateSketchEmbeddings <- function( object, - atom.assay = 'sketch', - orig.assay = 'RNA', - features = NULL, - atom.sketch.reduction = 'integrated_dr', - dictionary.method = c('sketch', 'data'), - sketch.ratio = 0.8, + atoms = 'sketch', # DefaultAssay(object) + orig = 'RNA', + features = NULL, # VF from object[[atom.assay]] + reduction = 'integrated_dr', # harmony; rerun UMAP on this + method = c('sketch', 'data'), + ratio = 0.8, + reduction.name = NULL, + reduction.key = NULL, layers = NULL, verbose = TRUE ) { - reduction.name = atom.sketch.reduction - reduction.key = Key(object = object[[atom.sketch.reduction]]) - dictionary.method <- match.arg(arg = dictionary.method) - layers <- setdiff(x = Layers(object = object[[atom.assay]], search = 'data'), - y = 'scale.data') %||% layers + # Check input and output dimensional reductions + reduction <- match.arg(arg = reduction, choices = Reductions(object = object)) + reduction.name <- reduction.name %||% paste0(reduction, '.orig') + reduction.key <- reduction.key %||% Key(object = reduction.name, quiet = TRUE) + if (reduction.name %in% Reductions(object = object)) { + warning( + "'", + reduction.name, + "' already exists, overwriting", + call. = FALSE, + immediate. = TRUE + ) + } + # Check the method being used + method <- method[1L] + method <- match.arg(arg = method) + # Check our layers + atoms <- match.arg(arg = atoms, choices = Assays(object = object)) + orig <- match.arg(arg = orig, choices = Assays(object = object)) + layer.orig <- layers + layers <- layers %||% intersect( + x = DefaultLayer(object[[atoms]]), + y = Layers(object[[orig]]) + ) + if (is.null(x = layer.orig)) { + atoms.missing <- setdiff(x = layers, DefaultLayer(object = object[[atoms]])) + if (length(x = atoms.missing) == length(x = layers)) { + stop("None of the requested layers are present in the atoms") + } else if (length(x = atoms.missing)) { + warning( + length(x = atoms.missing), + " layers missing from the atoms", + call. = FALSE, + immediate. = TRUE + ) + layers <- intersect(x = layers, y = DefaultLayer(object = object[[atoms]])) + } + } # check layers - layers.missing <- setdiff(layers, Layers(object = object[[orig.assay]], search = 'data')) - if (length(layers.missing) > 0) { - stop('layer ', layers.missing, ' are not present in ', orig.assay, " assay") + layers.missing <- setdiff(layers, Layers(object = object[[orig]])) + if (length(x = layers.missing)) { + stop('layer ', layers.missing[1L], ' are not present in ', orig, " assay") } - # check features - features <- features %||% VariableFeatures(object = atom.sketch.object) - features <- intersect(features, - LayerIntersectFeatures(object = object, assay = atom.assay, layers = layers) - ) - my.lapply <- ifelse( - test = verbose && nbrOfWorkers() == 1, - yes = pblapply, - no = future_lapply + features <- features %||% unlist(x = VariableFeatures( + object = object[[atoms]], + layer = layers + )) + # TODO: see if we can handle missing features with `union` + features.atom <- Reduce( + f = intersect, + x = lapply( + X = layers, + FUN = function(lyr) { + return(Features(x = object[[atoms]], layer = lyr)) + } + ) ) - - emb_all.mat <- matrix(data = NA, nrow = ncol(object), ncol = ncol(object[[atom.sketch.reduction]])) - ncells <- c(0, sapply(X = layers, - FUN = function(x) ncol(LayerData(object = object[[orig.assay]], layer = x)) - ) - ) - block_set <- lapply(X = seq_along(along.with = layers), - FUN = function(x) (sum(ncells[1:x]) + 1) : sum(ncells[1:(x + 1)]) - ) - - for (i in seq_along(along.with = layers)) { - cells.sketch <- Cells(object[[atom.assay]], layer = layers[i]) - if (verbose) { - message(length(cells.sketch),' atomic cells are identified in the atom.sketch.object') + features <- intersect(x = features, y = features.atom) + emb.all <- matrix( + data = NA_real_, + nrow = ncol(x = object[[orig]]), + ncol = length(x = object[[reduction]]) + ) + ncells <- c( + 0, + sapply( + X = layers, + FUN = function(lyr) { + return(length(x = Cells(x = object[[orig]], layer = lyr))) + } + ) + ) + blocks <- lapply( + X = seq_along(along.with = layers), + FUN = function(x) { + return((sum(ncells[1:x]) + 1):sum(ncells[1:(x + 1)])) } - if (verbose) { + ) + for (i in seq_along(along.with = layers)) { + cells.sketch <- Cells(x = object[[atoms]], layer = layers[i]) + if (isTRUE(x = verbose)) { + message( + length(x = cells.sketch), + ' atomic cells identified in the atoms' + ) message("Correcting embeddings") } - emb <- switch( - EXPR = dictionary.method, + EXPR = method, 'data' = { exp.mat <- t( x = as.matrix( LayerData( - object = object[[atom.assay]], + object = object[[atoms]], layer = layers[i], features = features ) ) ) sketch.transform <- ginv(X = exp.mat) %*% - Embeddings(object = object[[atom.sketch.reduction]])[cells.sketch ,] + Embeddings(object = object[[reduction]])[cells.sketch ,] emb <- as.matrix( - x = t(as( + # TODO: update as.sparse to have default method with `as(x, "CsparseMatrix")` + x = t(x = as( object = LayerData( - object = object[[orig.assay]], + object = object[[orig]], layer = layers[i], features = features ), @@ -1894,28 +1952,22 @@ IntegrateSketchEmbeddings <- function( emb }, 'sketch' = { - R <- t( - x = CountSketch( - nsketch = round(sketch.ratio * length(x = features)), ncells = length(x = features) - ) - ) - exp.mat <- as.matrix( - x = t( - x = LayerData( - object = object[[atom.assay]], - layer = layers[i], - features = features - ) - ) %*% - R - ) + R <- t(x = CountSketch( + nsketch = round(x = ratio * length(x = features)), + ncells = length(x = features) + )) + exp.mat <- as.matrix(x = t(x = LayerData( + object = object[[atoms]], + layer = layers[i], + features = features + )) %*% R) sketch.transform <- ginv(X = exp.mat) %*% - Embeddings(object = object[[atom.sketch.reduction]])[cells.sketch ,] + Embeddings(object = object[[reduction]])[cells.sketch ,] emb <- as.matrix( x = ( t(as( object = LayerData( - object = object[[orig.assay]], + object = object[[orig]], layer = layers[i], features = features ), @@ -1927,21 +1979,19 @@ IntegrateSketchEmbeddings <- function( emb } ) - emb_all.mat[ block_set[[i]],] <- as.matrix(emb) + emb.all[ blocks[[i]],] <- as.matrix(x = emb) } - - object[[reduction.name]] <- CreateDimReducObject( - embeddings = emb_all.mat, - loadings = Loadings(object[[atom.sketch.reduction]]), + rownames(x = emb.all) <- colnames(x = object[[orig]]) + object[[reduction.name]] <- suppressWarnings(expr = CreateDimReducObject( + embeddings = emb.all, + loadings = Loadings(object = object[[reduction]]), key = reduction.key, - assay = orig.assay - ) + assay = orig + )) + CheckGC() return(object) } - - - #' Calculate the local structure preservation metric #' #' Calculates a metric that describes how well the local structure of each group @@ -2888,25 +2938,31 @@ SelectIntegrationFeatures5 <- function( object, nfeatures = 2000, assay = NULL, + method = NULL, layers = NULL, verbose = TRUE, - # fvf.nfeatures = 2000, ... ) { assay <- assay %||% DefaultAssay(object = object) layers <- Layers(object = object[[assay]], search = layers) - vf.list <- lapply( - X = layers, - FUN = function(x) { - return(VariableFeatures(object = object, assay = assay, layer = x)) - } + vf.list <- VariableFeatures( + object = object, + assay = assay, + method = method, + layer = layers ) - var.features <- unname(obj = unlist(x = vf.list)) - fmat <- slot(object = object[[assay]], name = 'features')[, layers] - fmat <- droplevels(x = fmat) - var.features <- var.features[var.features %in% rownames(x = fmat)] + var.features <- unlist(x = vf.list, use.names = FALSE) var.features <- sort(x = table(var.features), decreasing = TRUE) + # Select only variable features present in all layers + fmat <- slot(object = object[[assay]], name = 'features')[, layers] + idx <- which(x = apply( + X = fmat[names(x = var.features), , drop = FALSE], + MARGIN = 1L, + FUN = all + )) + var.features <- var.features[idx] tie.val <- var.features[min(nfeatures, length(x = var.features))] + # Select integration features features <- names(x = var.features[which(x = var.features > tie.val)]) if (length(x = features)) { features <- .FeatureRank(features = features, flist = vf.list) From 73cd8dea53a28ff5ad43fd4ef765cc914f0194ac Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Thu, 23 Jun 2022 18:53:20 -0400 Subject: [PATCH 149/979] devtools::document() updated these --- NAMESPACE | 4 +++ man/IntegrateSketchEmbeddings.Rd | 56 ++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 man/IntegrateSketchEmbeddings.Rd diff --git a/NAMESPACE b/NAMESPACE index 89a30413e..37d2b6501 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -46,6 +46,7 @@ S3method(GetTissueCoordinates,VisiumV1) S3method(HVFInfo,SCTAssay) S3method(IntegrateEmbeddings,IntegrationAnchorSet) S3method(IntegrateEmbeddings,TransferAnchorSet) +S3method(LeverageScore,Seurat) S3method(LeverageScore,Seurat5) S3method(LeverageScore,StdAssay) S3method(LeverageScore,default) @@ -231,6 +232,7 @@ export(Index) export(Indices) export(IntegrateData) export(IntegrateEmbeddings) +export(IntegrateSketchEmbeddings) export(Intensity) export(IsGlobal) export(JS) @@ -385,6 +387,7 @@ importClassesFrom(SeuratObject,Seurat) importClassesFrom(SeuratObject,SeuratCommand) importClassesFrom(SeuratObject,SpatialImage) importFrom(KernSmooth,bkde) +importFrom(MASS,ginv) importFrom(MASS,glm.nb) importFrom(MASS,lda) importFrom(Matrix,as.matrix) @@ -397,6 +400,7 @@ importFrom(Matrix,rowMeans) importFrom(Matrix,rowSums) importFrom(Matrix,sparse.model.matrix) importFrom(Matrix,sparseMatrix) +importFrom(Matrix,t) importFrom(RANN,nn2) importFrom(RColorBrewer,brewer.pal) importFrom(RColorBrewer,brewer.pal.info) diff --git a/man/IntegrateSketchEmbeddings.Rd b/man/IntegrateSketchEmbeddings.Rd new file mode 100644 index 000000000..9bdf2f318 --- /dev/null +++ b/man/IntegrateSketchEmbeddings.Rd @@ -0,0 +1,56 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/integration.R +\name{IntegrateSketchEmbeddings} +\alias{IntegrateSketchEmbeddings} +\title{Integrate embeddings from the integrated atoms} +\usage{ +IntegrateSketchEmbeddings( + object, + atom.assay = "sketch", + orig.assay = "RNA", + features = NULL, + atom.sketch.reduction = "integrated_dr", + dictionary.method = c("sketch", "data"), + sketch.ratio = 0.8, + layers = NULL, + verbose = TRUE +) +} +\arguments{ +\item{object}{A Seurat object with all cells for one dataset} + +\item{atom.assay}{Assay name for sketched-cell expression (default is 'sketch')} + +\item{orig.assay}{Assay name for original expression (default is 'RNA')} + +\item{features}{Features used for atomic sketch integration} + +\item{atom.sketch.reduction}{Dimensional reduction name for batch-corrected embeddings +in the sketched object (default is 'integrated_dr')} + +\item{dictionary.method}{Methods to construct sketch-cell representation +for all cells (default is 'sketch'). Can be one of: +\itemize{ +\item{sketch: Use random sketched data slot} +\item{data: Use data slot} +}} + +\item{sketch.ratio}{Sketch ratio of data slot when \code{dictionary.method} is set to 'sketch' (default is 0.8)} + +\item{layers}{Names of layers for correction.} + +\item{verbose}{Print progress and message (default is TRUE)} +} +\value{ +Returns a Seurat object with an integrated dimensional reduction +} +\description{ +The main steps of this procedure are outlined below. For a more detailed +description of the methodology, please see Hao, et al Biorxiv 2022: +\doi{10.1101/2022.02.24.481684} +} +\details{ +First learn a atom dictionary representation to reconstruct each cell. +Then, using this dictionary representation, +reconstruct the embeddings of each cell from the integrated atoms. +} From 3e0eb083163530ea231a732695cd574cbf72e5dc Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Thu, 23 Jun 2022 18:53:48 -0400 Subject: [PATCH 150/979] Bump v5 version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2d8c0b43f..d51dcbde0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.0.4.9006 -Date: 2022-06-08 +Version: 4.0.4.9007 +Date: 2022-06-23 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From 5e967b02db742b998eb434a20aca5702a69434f2 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 24 Jun 2022 15:32:14 -0400 Subject: [PATCH 151/979] fix npcs --- R/integration.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/integration.R b/R/integration.R index c033d7683..1bd429abb 100644 --- a/R/integration.R +++ b/R/integration.R @@ -7401,6 +7401,7 @@ FastRPCAIntegration <- function( findintegrationanchors.args = list(), verbose = TRUE ) { + npcs <- max(npcs, dims) my.lapply <- ifelse( test = verbose && nbrOfWorkers() == 1, yes = pblapply, @@ -7428,7 +7429,7 @@ FastRPCAIntegration <- function( if (normalization.method != 'SCT') { x <- ScaleData(x, features = anchor.features, do.scale = scale, verbose = FALSE) } - x <- RunPCA(x, features = anchor.features, verbose = FALSE) + x <- RunPCA(x, features = anchor.features, verbose = FALSE, npcs = npcs) return(x) } ) From 1127a81924e0454454c5391c41ea915e20b7bb25 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 19 Jul 2022 17:44:57 -0400 Subject: [PATCH 152/979] alpha value for dimplot --- R/visualization.R | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/R/visualization.R b/R/visualization.R index 9af3a6e02..5b57641d1 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -747,6 +747,7 @@ ColorDimSplit <- function( #' @param label.color Sets the color of the label text #' @param label.box Whether to put a box around the label text (geom_text vs #' geom_label) +#' @param alpha Alpha value for plotting (default is 1) #' @param repel Repel labels #' @param cells.highlight A list of character or numeric vectors of cells to #' highlight. If only one group of cells desired, can simply @@ -806,6 +807,7 @@ DimPlot <- function( label.color = 'black', label.box = FALSE, repel = FALSE, + alpha = 1, cells.highlight = NULL, cols.highlight = '#DE2D26', sizes.highlight = 1, @@ -855,6 +857,7 @@ DimPlot <- function( pt.size = pt.size, shape.by = shape.by, order = order, + alpha = alpha, label = FALSE, cells.highlight = cells.highlight, cols.highlight = cols.highlight, @@ -7011,6 +7014,7 @@ SingleCorPlot <- function( #' @param shape.by If NULL, all points are circles (default). You can specify #' any cell attribute (that can be pulled with \code{\link{FetchData}}) #' allowing for both different colors and different shapes on cells. +#' @param alpha Alpha value for plotting (default is 1) #' @param alpha.by Mapping variable for the point alpha value #' @param order Specify the order of plotting for the idents. This can be #' useful for crowded plots if points of interest are being buried. Provide @@ -7052,6 +7056,7 @@ SingleDimPlot <- function( cols = NULL, pt.size = NULL, shape.by = NULL, + alpha = 1, alpha.by = NULL, order = NULL, label = FALSE, @@ -7158,6 +7163,7 @@ SingleDimPlot <- function( alpha = alpha.by ), pointsize = pt.size, + alpha = alpha, pixels = raster.dpi ) } else { @@ -7169,7 +7175,8 @@ SingleDimPlot <- function( shape = shape.by, alpha = alpha.by ), - size = pt.size + size = pt.size, + alpha = alpha ) } plot <- plot + From 2cb24777ef4bd2d935a76664ef052c297e8c986e Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 19 Jul 2022 17:55:39 -0400 Subject: [PATCH 153/979] add alpha to violin plot --- R/visualization.R | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/R/visualization.R b/R/visualization.R index 5b57641d1..90ea62194 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -554,6 +554,7 @@ RidgePlot <- function( #' #' @inheritParams RidgePlot #' @param pt.size Point size for geom_violin +#' @param alpha Alpha value for geom_violin #' @param split.by A variable to split the violin plots by, #' @param split.plot plot each group of the split violin plots by multiple or #' single violin shapes. @@ -581,6 +582,7 @@ VlnPlot <- function( features, cols = NULL, pt.size = NULL, + alpha = 1, idents = NULL, sort = FALSE, assay = NULL, @@ -624,6 +626,7 @@ VlnPlot <- function( same.y.lims = same.y.lims, adjust = adjust, pt.size = pt.size, + alpha = alpha, cols = cols, group.by = group.by, split.by = split.by, @@ -5553,6 +5556,7 @@ Col2Hex <- function(...) { # @param same.y.lims Set all the y-axis limits to the same values # @param adjust Adjust parameter for geom_violin # @param pt.size Point size for geom_violin +# @param alpha Alpha value for geom_violin # @param cols Colors to use for plotting # @param group.by Group (color) cells in different ways (for example, orig.ident) # @param split.by A variable to split the plot by @@ -5586,6 +5590,7 @@ ExIPlot <- function( adjust = 1, cols = NULL, pt.size = 0, + alpha = 1, group.by = NULL, split.by = NULL, log = FALSE, @@ -5695,6 +5700,7 @@ ExIPlot <- function( adjust = adjust, cols = cols, pt.size = pt.size, + alpha = alpha, log = log, raster = raster )) @@ -7217,6 +7223,7 @@ SingleDimPlot <- function( #' @param y.max Maximum Y value to plot #' @param adjust Adjust parameter for geom_violin #' @param pt.size Size of points for violin plots +#' @param alpha Alpha vlaue for violin plots #' @param cols Colors to use for plotting #' @param seed.use Random seed to use. If NULL, don't set a seed #' @param log plot Y axis on log10 scale @@ -7246,6 +7253,7 @@ SingleExIPlot <- function( y.max = NULL, adjust = 1, pt.size = 0, + alpha = 1, cols = NULL, seed.use = 42, log = FALSE, @@ -7326,21 +7334,23 @@ SingleExIPlot <- function( ) if (is.null(x = split)) { if (isTRUE(x = raster)) { - jitter <- ggrastr::rasterize(geom_jitter(height = 0, size = pt.size, show.legend = FALSE)) + jitter <- ggrastr::rasterize(geom_jitter(height = 0, size = pt.size, alpha = alpha, show.legend = FALSE)) } else { - jitter <- geom_jitter(height = 0, size = pt.size, show.legend = FALSE) + jitter <- geom_jitter(height = 0, size = pt.size, alpha = alpha, show.legend = FALSE) } } else { if (isTRUE(x = raster)) { jitter <- ggrastr::rasterize(geom_jitter( position = position_jitterdodge(jitter.width = 0.4, dodge.width = 0.9), size = pt.size, + alpha = alpha, show.legend = FALSE )) } else { jitter <- geom_jitter( position = position_jitterdodge(jitter.width = 0.4, dodge.width = 0.9), size = pt.size, + alpha = alpha, show.legend = FALSE ) } @@ -7359,7 +7369,7 @@ SingleExIPlot <- function( scale_y_discrete(expand = c(0.01, 0)), scale_x_continuous(expand = c(0, 0)) ) - jitter <- geom_jitter(width = 0, size = pt.size, show.legend = FALSE) + jitter <- geom_jitter(width = 0, size = pt.size, alpha = alpha, show.legend = FALSE) log.scale <- scale_x_log10() axis.scale <- function(...) { invisible(x = NULL) From ad3c160640dc705908c130418680ecc90e84983e Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 19 Jul 2022 18:01:24 -0400 Subject: [PATCH 154/979] alpha in FeaturePlot --- R/visualization.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/visualization.R b/R/visualization.R index 90ea62194..bef695d34 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -989,6 +989,7 @@ FeaturePlot <- function( c('lightgrey', 'blue') }, pt.size = NULL, + alpha = 1, order = FALSE, min.cutoff = NA, max.cutoff = NA, @@ -1269,6 +1270,7 @@ FeaturePlot <- function( col.by = feature, order = order, pt.size = pt.size, + alpha = alpha, cols = cols.use, shape.by = shape.by, label = FALSE, From 95c6016c71903db0ba4abe371debae0ea47d0cdf Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 19 Jul 2022 18:02:46 -0400 Subject: [PATCH 155/979] update docu --- man/ColorDimSplit.Rd | 1 + man/DimPlot.Rd | 3 +++ man/FeaturePlot.Rd | 3 +++ man/ISpatialDimPlot.Rd | 4 +--- man/ISpatialFeaturePlot.Rd | 4 +--- man/LinkedPlots.Rd | 4 +--- man/SingleDimPlot.Rd | 3 +++ man/SingleExIPlot.Rd | 3 +++ man/VlnPlot.Rd | 3 +++ 9 files changed, 19 insertions(+), 9 deletions(-) diff --git a/man/ColorDimSplit.Rd b/man/ColorDimSplit.Rd index 4dab0677c..1f85d1974 100644 --- a/man/ColorDimSplit.Rd +++ b/man/ColorDimSplit.Rd @@ -53,6 +53,7 @@ useful for crowded plots if points of interest are being buried. (default is FAL \item{\code{label.color}}{Sets the color of the label text} \item{\code{label.box}}{Whether to put a box around the label text (geom_text vs geom_label)} + \item{\code{alpha}}{Alpha value for plotting (default is 1)} \item{\code{repel}}{Repel labels} \item{\code{cells.highlight}}{A list of character or numeric vectors of cells to highlight. If only one group of cells desired, can simply diff --git a/man/DimPlot.Rd b/man/DimPlot.Rd index c93a78b01..3d7fa3027 100644 --- a/man/DimPlot.Rd +++ b/man/DimPlot.Rd @@ -26,6 +26,7 @@ DimPlot( label.color = "black", label.box = FALSE, repel = FALSE, + alpha = 1, cells.highlight = NULL, cols.highlight = "#DE2D26", sizes.highlight = 1, @@ -88,6 +89,8 @@ geom_label)} \item{repel}{Repel labels} +\item{alpha}{Alpha value for plotting (default is 1)} + \item{cells.highlight}{A list of character or numeric vectors of cells to highlight. If only one group of cells desired, can simply pass a vector instead of a list. If set, colors selected cells to the color(s) diff --git a/man/FeaturePlot.Rd b/man/FeaturePlot.Rd index 239c0a8c2..59e9dc965 100644 --- a/man/FeaturePlot.Rd +++ b/man/FeaturePlot.Rd @@ -13,6 +13,7 @@ FeaturePlot( cols = if (blend) { c("lightgrey", "#ff0000", "#00ff00") } else { c("lightgrey", "blue") }, pt.size = NULL, + alpha = 1, order = FALSE, min.cutoff = NA, max.cutoff = NA, @@ -64,6 +65,8 @@ When blend is \code{TRUE}, takes anywhere from 1-3 colors: \item{pt.size}{Adjust point size for plotting} +\item{alpha}{Alpha value for plotting (default is 1)} + \item{order}{Boolean determining whether to plot cells in order of expression. Can be useful if cells expressing given feature are getting buried.} diff --git a/man/ISpatialDimPlot.Rd b/man/ISpatialDimPlot.Rd index a19d06119..226278fcd 100644 --- a/man/ISpatialDimPlot.Rd +++ b/man/ISpatialDimPlot.Rd @@ -14,9 +14,7 @@ ISpatialDimPlot(object, image = NULL, group.by = NULL, alpha = c(0.3, 1)) \item{group.by}{Name of one or more metadata columns to group (color) cells by (for example, orig.ident); pass 'ident' to group by identity class} -\item{alpha}{Controls opacity of spots. Provide as a vector specifying the -min and max for SpatialFeaturePlot. For SpatialDimPlot, provide a single -alpha value for each plot.} +\item{alpha}{Alpha value for plotting (default is 1)} } \value{ Returns final plot as a ggplot object diff --git a/man/ISpatialFeaturePlot.Rd b/man/ISpatialFeaturePlot.Rd index 48528668a..81a6b96a1 100644 --- a/man/ISpatialFeaturePlot.Rd +++ b/man/ISpatialFeaturePlot.Rd @@ -21,9 +21,7 @@ ISpatialFeaturePlot( \item{slot}{Which slot to pull expression data from?} -\item{alpha}{Controls opacity of spots. Provide as a vector specifying the -min and max for SpatialFeaturePlot. For SpatialDimPlot, provide a single -alpha value for each plot.} +\item{alpha}{Alpha value for plotting (default is 1)} } \value{ Returns final plot as a ggplot object diff --git a/man/LinkedPlots.Rd b/man/LinkedPlots.Rd index 103241f9f..d7e358a56 100644 --- a/man/LinkedPlots.Rd +++ b/man/LinkedPlots.Rd @@ -41,9 +41,7 @@ LinkedFeaturePlot( \item{group.by}{Name of one or more metadata columns to group (color) cells by (for example, orig.ident); pass 'ident' to group by identity class} -\item{alpha}{Controls opacity of spots. Provide as a vector specifying the -min and max for SpatialFeaturePlot. For SpatialDimPlot, provide a single -alpha value for each plot.} +\item{alpha}{Alpha value for plotting (default is 1)} \item{combine}{Combine plots into a single \code{\link[patchwork]{patchwork}ed} ggplot object. If \code{FALSE}, return a list of ggplot objects} diff --git a/man/SingleDimPlot.Rd b/man/SingleDimPlot.Rd index 6fb2f6956..413bd721a 100644 --- a/man/SingleDimPlot.Rd +++ b/man/SingleDimPlot.Rd @@ -11,6 +11,7 @@ SingleDimPlot( cols = NULL, pt.size = NULL, shape.by = NULL, + alpha = 1, alpha.by = NULL, order = NULL, label = FALSE, @@ -42,6 +43,8 @@ default, ggplot2 assigns colors} any cell attribute (that can be pulled with \code{\link{FetchData}}) allowing for both different colors and different shapes on cells.} +\item{alpha}{Alpha value for plotting (default is 1)} + \item{alpha.by}{Mapping variable for the point alpha value} \item{order}{Specify the order of plotting for the idents. This can be diff --git a/man/SingleExIPlot.Rd b/man/SingleExIPlot.Rd index f7b8cdb4f..e37b8bc7f 100644 --- a/man/SingleExIPlot.Rd +++ b/man/SingleExIPlot.Rd @@ -13,6 +13,7 @@ SingleExIPlot( y.max = NULL, adjust = 1, pt.size = 0, + alpha = 1, cols = NULL, seed.use = 42, log = FALSE, @@ -37,6 +38,8 @@ expression of the attribute being potted} \item{pt.size}{Size of points for violin plots} +\item{alpha}{Alpha vlaue for violin plots} + \item{cols}{Colors to use for plotting} \item{seed.use}{Random seed to use. If NULL, don't set a seed} diff --git a/man/VlnPlot.Rd b/man/VlnPlot.Rd index ec75da8f9..98fefff60 100644 --- a/man/VlnPlot.Rd +++ b/man/VlnPlot.Rd @@ -9,6 +9,7 @@ VlnPlot( features, cols = NULL, pt.size = NULL, + alpha = 1, idents = NULL, sort = FALSE, assay = NULL, @@ -38,6 +39,8 @@ anything that can be retreived by FetchData)} \item{pt.size}{Point size for geom_violin} +\item{alpha}{Alpha value for geom_violin} + \item{idents}{Which classes to include in the plot (default is all)} \item{sort}{Sort identity classes (on the x-axis) by the average From e694bb8f51ee84d6c884aa1ff41ded67dc0adda1 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Mon, 25 Jul 2022 12:08:07 -0400 Subject: [PATCH 156/979] Add generic and dispatch methods for SCTransform --- DESCRIPTION | 4 +- NAMESPACE | 3 + R/generics.R | 11 +++ R/preprocessing.R | 186 ++++++++++++++++++++++++++++++++++----------- man/SCTransform.Rd | 66 +++++++++++++--- 5 files changed, 211 insertions(+), 59 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index d51dcbde0..05cfef657 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 4.0.4.9007 +Version: 4.0.4.9008 Date: 2022-06-23 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. @@ -98,7 +98,7 @@ Collate: 'sketching.R' 'tree.R' 'utilities.R' -RoxygenNote: 7.1.2 +RoxygenNote: 7.2.1 Encoding: UTF-8 Suggests: ape, diff --git a/NAMESPACE b/NAMESPACE index 37d2b6501..ae8c28f34 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -98,6 +98,9 @@ S3method(RunUMAP,default) S3method(SCTResults,SCTAssay) S3method(SCTResults,SCTModel) S3method(SCTResults,Seurat) +S3method(SCTransform,Assay) +S3method(SCTransform,Seurat) +S3method(SCTransform,default) S3method(ScaleData,Assay) S3method(ScaleData,Seurat) S3method(ScaleData,Seurat5) diff --git a/R/generics.R b/R/generics.R index f0c1099ca..6bdae310e 100644 --- a/R/generics.R +++ b/R/generics.R @@ -615,6 +615,17 @@ ScoreJackStraw <- function(object, ...) { UseMethod(generic = 'ScoreJackStraw', object = object) } +#' Perform sctransform-based normalization +#' @param object An object +#' @param ... Arguments passed to other methods (not used) +#' +#' @rdname SCTransform +#' @export SCTransform +#' +SCTransform <- function(object, ...) { + UseMethod(generic = 'SCTransform', object = object) +} + #' Get SCT results from an Assay #' #' Pull the \code{\link{SCTResults}} information from an \code{\link{SCTAssay}} diff --git a/R/preprocessing.R b/R/preprocessing.R index 8d409a0b9..c0d954e1a 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -1426,9 +1426,8 @@ SampleUMI <- function( #' scale.data being pearson residuals; sctransform::vst intermediate results are saved #' in misc slot of new assay. #' -#' @param object A seurat object -#' @param assay Name of assay to pull the count data from; default is 'RNA' -#' @param new.assay.name Name for the new assay containing the normalized data +#' @param object UMI counts matrix +#' @param cell.attr A metadata with cell attributes #' @param reference.SCT.model If not NULL, compute residuals for the object #' using the provided SCT model; supports only log_umi as the latent variable. #' If residual.features are not specified, compute for the top variable.features.n @@ -1469,17 +1468,14 @@ SampleUMI <- function( #' @importFrom sctransform vst get_residual_var get_residuals correct_counts #' #' @seealso \code{\link[sctransform]{correct_counts}} \code{\link[sctransform]{get_residuals}} -#' @export -#' @concept preprocessing #' -#' @examples -#' data("pbmc_small") -#' SCTransform(object = pbmc_small) +#' @rdname SCTransform +#' @concept preprocessing +#' @export #' -SCTransform <- function( +SCTransform.default <- function( object, - assay = 'RNA', - new.assay.name = 'SCT', + cell.attr, reference.SCT.model = NULL, do.correct.umi = TRUE, ncells = 5000, @@ -1489,21 +1485,15 @@ SCTransform <- function( vars.to.regress = NULL, do.scale = FALSE, do.center = TRUE, - clip.range = c(-sqrt(x = ncol(x = object[[assay]]) / 30), sqrt(x = ncol(x = object[[assay]]) / 30)), + clip.range = c(-sqrt(x = ncol(x = umi) / 30), sqrt(x = ncol(x = umi) / 30)), conserve.memory = FALSE, return.only.var.genes = TRUE, seed.use = 1448145, verbose = TRUE, ... ) { - if (!is.null(x = seed.use)) { - set.seed(seed = seed.use) - } - assay <- assay %||% DefaultAssay(object = object) - assay.obj <- GetAssay(object = object, assay = assay) - umi <- GetAssayData(object = assay.obj, slot = 'counts') - cell.attr <- slot(object = object, name = 'meta.data') vst.args <- list(...) + umi <- object # check for batch_var in meta data if ('batch_var' %in% names(x = vst.args)) { if (!(vst.args[['batch_var']] %in% colnames(x = cell.attr))) { @@ -1711,28 +1701,7 @@ SCTransform <- function( } vst.out }) - # create output assay and put (corrected) umi counts in count slot - if (do.correct.umi & residual.type == 'pearson') { - if (verbose) { - message('Place corrected count matrix in counts slot') - } - # TODO: restore once check.matrix is in SeuratObject - # assay.out <- CreateAssayObject(counts = vst.out$umi_corrected, check.matrix = FALSE) - assay.out <- CreateAssayObject(counts = vst.out$umi_corrected,) - vst.out$umi_corrected <- NULL - } else { - # TODO: restore once check.matrix is in SeuratObject - # assay.out <- CreateAssayObject(counts = umi, check.matrix = FALSE) - assay.out <- CreateAssayObject(counts = umi) - } - # set the variable genes - VariableFeatures(object = assay.out) <- residual.features %||% top.features - # put log1p transformed counts in data - assay.out <- SetAssayData( - object = assay.out, - slot = 'data', - new.data = log1p(x = GetAssayData(object = assay.out, slot = 'counts')) - ) + vst.out$sct.method <- sct.method scale.data <- vst.out$y # clip the residuals scale.data[scale.data < clip.range[1]] <- clip.range[1] @@ -1752,21 +1721,148 @@ SCTransform <- function( min.cells.to.block = 3000, verbose = verbose ) + vst.out$y <- scale.data + vst.out$variable_features <- residual.features %||% top.features + + return(vst.out) +} + +#' @rdname SCTransform +#' @concept preprocessing +#' @export +#' @method SCTransform Assay +#' +SCTransform.Assay <- function( + object, + cell.attr, + reference.SCT.model = NULL, + do.correct.umi = TRUE, + ncells = 5000, + residual.features = NULL, + variable.features.n = 3000, + variable.features.rv.th = 1.3, + vars.to.regress = NULL, + do.scale = FALSE, + do.center = TRUE, + clip.range = c(-sqrt(x = ncol(x = object) / 30), sqrt(x = ncol(x = object) / 30)), + conserve.memory = FALSE, + return.only.var.genes = TRUE, + seed.use = 1448145, + verbose = TRUE, + ... +) { + if (!is.null(x = seed.use)) { + set.seed(seed = seed.use) + } + umi <- GetAssayData(object = object, slot = 'counts') + vst.out <- SCTransform(object = umi, + cell.attr = cell.attr, + reference.SCT.model = reference.SCT.model, + do.correct.umi = do.correct.umi, + ncells = ncells, + residual.features = residual.features, + variable.features.n = variable.features.n, + variable.features.rv.th = variable.features.rv.th, + vars.to.regress = vars.to.regress, + do.scale = do.scale, + do.center = do.center, + clip.range = clip.range, + conserve.memory = conserve.memory, + return.only.var.genes = return.only.var.genes, + seed.use = seed.use, + verbose = verbose, + ...) + residual.type <- vst.out[['residual_type']] %||% 'pearson' + sct.method <- vst.out[["sct.method"]] + # create output assay and put (corrected) umi counts in count slot + if (do.correct.umi & residual.type == 'pearson') { + if (verbose) { + message('Place corrected count matrix in counts slot') + } + assay.out <- CreateAssayObject(counts = vst.out$umi_corrected) + vst.out$umi_corrected <- NULL + } else { + # TODO: restore once check.matrix is in SeuratObject + # assay.out <- CreateAssayObject(counts = umi, check.matrix = FALSE) + assay.out <- CreateAssayObject(counts = umi) + } + # set the variable genes + VariableFeatures(object = assay.out) <- vst.out$variable_features + # put log1p transformed counts in data + assay.out <- SetAssayData( + object = assay.out, + slot = 'data', + new.data = log1p(x = GetAssayData(object = assay.out, slot = 'counts')) + ) + scale.data <- vst.out$y assay.out <- SetAssayData( object = assay.out, slot = 'scale.data', new.data = scale.data ) - # save vst output (except y) in @misc slot vst.out$y <- NULL # save clip.range into vst model vst.out$arguments$sct.clip.range <- clip.range vst.out$arguments$sct.method <- sct.method Misc(object = assay.out, slot = 'vst.out') <- vst.out assay.out <- as(object = assay.out, Class = "SCTAssay") - assay.out <- SCTAssay(assay.out, assay.orig = assay) - slot(object = slot(object = assay.out, name = "SCTModel.list")[[1]], name = "umi.assay") <- assay - object[[new.assay.name]] <- assay.out + return(assay.out) +} + +#' @param assay Name of assay to pull the count data from; default is 'RNA' +#' @param new.assay.name Name for the new assay containing the normalized data; default is 'SCT' +#' +#' @rdname SCTransform +#' @concept preprocessing +#' @export +#' @method SCTransform Seurat +#' +SCTransform.Seurat <- function( + object, + assay = 'RNA', + new.assay.name = 'SCT', + reference.SCT.model = NULL, + do.correct.umi = TRUE, + ncells = 5000, + residual.features = NULL, + variable.features.n = 3000, + variable.features.rv.th = 1.3, + vars.to.regress = NULL, + do.scale = FALSE, + do.center = TRUE, + clip.range = c(-sqrt(x = ncol(x = object[[assay]]) / 30), sqrt(x = ncol(x = object[[assay]]) / 30)), + conserve.memory = FALSE, + return.only.var.genes = TRUE, + seed.use = 1448145, + verbose = TRUE, + ... +) { + assay <- assay %||% DefaultAssay(object = object) + umi <- GetAssay(object = object, assay = assay, slot = "counts") + assay.obj <- GetAssay(object = object, assay = assay) + cell.attr <- slot(object = object, name = 'meta.data') + + assay.data <- SCTransform(object = assay.obj, + cell.attr = cell.attr, + reference.SCT.model = reference.SCT.model, + do.correct.umi = do.correct.umi, + ncells = ncells, + residual.features = residual.features, + variable.features.n = variable.features.n, + variable.features.rv.th = variable.features.rv.th, + vars.to.regress = vars.to.regress, + do.scale = do.scale, + do.center = do.center, + clip.range = clip.range, + conserve.memory = conserve.memory, + return.only.var.genes = return.only.var.genes, + seed.use = seed.use, + verbose = verbose, + ...) + assay.data <- SCTAssay(assay.data, assay.orig = assay) + slot(object = slot(object = assay.data, name = "SCTModel.list")[[1]], name = "umi.assay") <- assay + object[[new.assay.name]] <- assay.data + if (verbose) { message(paste("Set default assay to", new.assay.name)) } diff --git a/man/SCTransform.Rd b/man/SCTransform.Rd index 54d4c2e8a..40fdf5707 100644 --- a/man/SCTransform.Rd +++ b/man/SCTransform.Rd @@ -1,10 +1,55 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/preprocessing.R +% Please edit documentation in R/generics.R, R/preprocessing.R \name{SCTransform} \alias{SCTransform} -\title{Use regularized negative binomial regression to normalize UMI count data} +\alias{SCTransform.default} +\alias{SCTransform.Assay} +\alias{SCTransform.Seurat} +\title{Perform sctransform-based normalization} \usage{ -SCTransform( +SCTransform(object, ...) + +\method{SCTransform}{default}( + object, + cell.attr, + reference.SCT.model = NULL, + do.correct.umi = TRUE, + ncells = 5000, + residual.features = NULL, + variable.features.n = 3000, + variable.features.rv.th = 1.3, + vars.to.regress = NULL, + do.scale = FALSE, + do.center = TRUE, + clip.range = c(-sqrt(x = ncol(x = umi)/30), sqrt(x = ncol(x = umi)/30)), + conserve.memory = FALSE, + return.only.var.genes = TRUE, + seed.use = 1448145, + verbose = TRUE, + ... +) + +\method{SCTransform}{Assay}( + object, + cell.attr, + reference.SCT.model = NULL, + do.correct.umi = TRUE, + ncells = 5000, + residual.features = NULL, + variable.features.n = 3000, + variable.features.rv.th = 1.3, + vars.to.regress = NULL, + do.scale = FALSE, + do.center = TRUE, + clip.range = c(-sqrt(x = ncol(x = object)/30), sqrt(x = ncol(x = object)/30)), + conserve.memory = FALSE, + return.only.var.genes = TRUE, + seed.use = 1448145, + verbose = TRUE, + ... +) + +\method{SCTransform}{Seurat}( object, assay = "RNA", new.assay.name = "SCT", @@ -27,11 +72,11 @@ SCTransform( ) } \arguments{ -\item{object}{A seurat object} +\item{object}{UMI counts matrix} -\item{assay}{Name of assay to pull the count data from; default is 'RNA'} +\item{...}{Additional parameters passed to \code{sctransform::vst}} -\item{new.assay.name}{Name for the new assay containing the normalized data} +\item{cell.attr}{A metadata with cell attributes} \item{reference.SCT.model}{If not NULL, compute residuals for the object using the provided SCT model; supports only log_umi as the latent variable. @@ -76,7 +121,9 @@ NULL will not set a seed.} \item{verbose}{Whether to print messages and progress bars} -\item{...}{Additional parameters passed to \code{sctransform::vst}} +\item{assay}{Name of assay to pull the count data from; default is 'RNA'} + +\item{new.assay.name}{Name for the new assay containing the normalized data; default is 'SCT'} } \value{ Returns a Seurat object with a new assay (named SCT by default) with @@ -92,11 +139,6 @@ FindVariableFeatures, ScaleData workflow. Results are saved in a new assay (named SCT by default) with counts being (corrected) counts, data being log1p(counts), scale.data being pearson residuals; sctransform::vst intermediate results are saved in misc slot of new assay. -} -\examples{ -data("pbmc_small") -SCTransform(object = pbmc_small) - } \seealso{ \code{\link[sctransform]{correct_counts}} \code{\link[sctransform]{get_residuals}} From 13cd01942d20976e9b9256efa0712e38707731b9 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Mon, 25 Jul 2022 12:13:54 -0400 Subject: [PATCH 157/979] Add back SCTransform example --- man/SCTransform.Rd | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/man/SCTransform.Rd b/man/SCTransform.Rd index 40fdf5707..f3841bc59 100644 --- a/man/SCTransform.Rd +++ b/man/SCTransform.Rd @@ -139,6 +139,11 @@ FindVariableFeatures, ScaleData workflow. Results are saved in a new assay (named SCT by default) with counts being (corrected) counts, data being log1p(counts), scale.data being pearson residuals; sctransform::vst intermediate results are saved in misc slot of new assay. +} +\examples{ +data("pbmc_small") +SCTransform(object = pbmc_small) + } \seealso{ \code{\link[sctransform]{correct_counts}} \code{\link[sctransform]{get_residuals}} From 21a3d6e4bb8a7e30186323e2d1e3cf502c52e66b Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 28 Jul 2022 17:57:57 -0400 Subject: [PATCH 158/979] add bridge reduction method --- R/integration.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/integration.R b/R/integration.R index 1bd429abb..0e07126a7 100644 --- a/R/integration.R +++ b/R/integration.R @@ -7255,8 +7255,10 @@ FindBridgeTransferAnchors <- function( dims = 1:30, scale = FALSE, reduction = c('lsiproject', 'pcaproject'), + bridge.reduction = c('direct', 'cca'), verbose = TRUE ) { + bridge.reduction <- match.arg(arg = bridge.reduction) reduction <- match.arg(arg = reduction) query.assay <- query.assay %||% DefaultAssay(query) DefaultAssay(query) <- query.assay @@ -7290,6 +7292,7 @@ FindBridgeTransferAnchors <- function( object.reduction = c(reference.reduction, paste0('ref.', bridge.query.reduction)), bridge.reduction = c(bridge.ref.reduction, bridge.query.reduction), anchor.type = "Transfer", + reduction = bridge.reduction, reference.bridge.stored = TRUE, verbose = verbose ) From 9728452eee9b8cb5f13ea385911f4cdaa8bb2cf3 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 29 Jul 2022 16:36:47 -0400 Subject: [PATCH 159/979] update docu --- R/visualization.R | 12 ++++++------ man/ISpatialDimPlot.Rd | 9 +++++---- man/ISpatialFeaturePlot.Rd | 9 ++++++--- man/VlnPlot.Rd | 4 ++-- 4 files changed, 19 insertions(+), 15 deletions(-) diff --git a/R/visualization.R b/R/visualization.R index bef695d34..a6008ccf0 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -553,8 +553,8 @@ RidgePlot <- function( #' scores, etc.) #' #' @inheritParams RidgePlot -#' @param pt.size Point size for geom_violin -#' @param alpha Alpha value for geom_violin +#' @param pt.size Point size for points +#' @param alpha Alpha value for points #' @param split.by A variable to split the violin plots by, #' @param split.plot plot each group of the split violin plots by multiple or #' single violin shapes. @@ -2610,8 +2610,8 @@ LinkedFeaturePlot <- function( #' Visualize clusters spatially and interactively #' -#' @inheritParams DimPlot #' @inheritParams SpatialPlot +#' @inheritParams DimPlot #' @inheritParams LinkedPlots #' #' @return Returns final plot as a ggplot object @@ -2736,8 +2736,8 @@ ISpatialDimPlot <- function( #' Visualize features spatially and interactively #' -#' @inheritParams FeaturePlot #' @inheritParams SpatialPlot +#' @inheritParams FeaturePlot #' @inheritParams LinkedPlots #' #' @return Returns final plot as a ggplot object @@ -5557,8 +5557,8 @@ Col2Hex <- function(...) { # @param y.max Maximum y axis value # @param same.y.lims Set all the y-axis limits to the same values # @param adjust Adjust parameter for geom_violin -# @param pt.size Point size for geom_violin -# @param alpha Alpha value for geom_violin +# @param pt.size Point size for points +# @param alpha Alpha value for points # @param cols Colors to use for plotting # @param group.by Group (color) cells in different ways (for example, orig.ident) # @param split.by A variable to split the plot by diff --git a/man/ISpatialDimPlot.Rd b/man/ISpatialDimPlot.Rd index 226278fcd..daf46897f 100644 --- a/man/ISpatialDimPlot.Rd +++ b/man/ISpatialDimPlot.Rd @@ -7,14 +7,15 @@ ISpatialDimPlot(object, image = NULL, group.by = NULL, alpha = c(0.3, 1)) } \arguments{ -\item{object}{Seurat object} +\item{object}{A Seurat object} \item{image}{Name of the image to use in the plot} -\item{group.by}{Name of one or more metadata columns to group (color) cells by -(for example, orig.ident); pass 'ident' to group by identity class} +\item{group.by}{Name of meta.data column to group the data by} -\item{alpha}{Alpha value for plotting (default is 1)} +\item{alpha}{Controls opacity of spots. Provide as a vector specifying the +min and max for SpatialFeaturePlot. For SpatialDimPlot, provide a single +alpha value for each plot.} } \value{ Returns final plot as a ggplot object diff --git a/man/ISpatialFeaturePlot.Rd b/man/ISpatialFeaturePlot.Rd index 81a6b96a1..a24a42dcc 100644 --- a/man/ISpatialFeaturePlot.Rd +++ b/man/ISpatialFeaturePlot.Rd @@ -13,15 +13,18 @@ ISpatialFeaturePlot( ) } \arguments{ -\item{object}{Seurat object} +\item{object}{A Seurat object} \item{feature}{Feature to visualize} \item{image}{Name of the image to use in the plot} -\item{slot}{Which slot to pull expression data from?} +\item{slot}{If plotting a feature, which data slot to pull from (counts, +data, or scale.data)} -\item{alpha}{Alpha value for plotting (default is 1)} +\item{alpha}{Controls opacity of spots. Provide as a vector specifying the +min and max for SpatialFeaturePlot. For SpatialDimPlot, provide a single +alpha value for each plot.} } \value{ Returns final plot as a ggplot object diff --git a/man/VlnPlot.Rd b/man/VlnPlot.Rd index 98fefff60..b676804b5 100644 --- a/man/VlnPlot.Rd +++ b/man/VlnPlot.Rd @@ -37,9 +37,9 @@ anything that can be retreived by FetchData)} \item{cols}{Colors to use for plotting} -\item{pt.size}{Point size for geom_violin} +\item{pt.size}{Point size for points} -\item{alpha}{Alpha value for geom_violin} +\item{alpha}{Alpha value for points} \item{idents}{Which classes to include in the plot (default is all)} From 9492f305d659013d99d43e5b8e5ad1fd88f1a413 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 29 Jul 2022 20:00:49 -0400 Subject: [PATCH 160/979] fix alpha docu --- R/visualization.R | 4 ++-- man/LinkedPlots.Rd | 16 +++++++++------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/R/visualization.R b/R/visualization.R index a6008ccf0..2e8439c77 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -2251,9 +2251,9 @@ PolyFeaturePlot <- function( #' Visualize spatial and clustering (dimensional reduction) data in a linked, #' interactive framework #' -#' @inheritParams DimPlot -#' @inheritParams FeaturePlot #' @inheritParams SpatialPlot +#' @inheritParams FeaturePlot +#' @inheritParams DimPlot #' @param feature Feature to visualize #' @param image Name of the image to use in the plot #' diff --git a/man/LinkedPlots.Rd b/man/LinkedPlots.Rd index d7e358a56..6914f6d0b 100644 --- a/man/LinkedPlots.Rd +++ b/man/LinkedPlots.Rd @@ -30,7 +30,7 @@ LinkedFeaturePlot( ) } \arguments{ -\item{object}{Seurat object} +\item{object}{A Seurat object} \item{dims}{Dimensions to plot, must be a two-length numeric vector specifying x- and y-dimensions} @@ -38,17 +38,19 @@ LinkedFeaturePlot( \item{image}{Name of the image to use in the plot} -\item{group.by}{Name of one or more metadata columns to group (color) cells by -(for example, orig.ident); pass 'ident' to group by identity class} +\item{group.by}{Name of meta.data column to group the data by} -\item{alpha}{Alpha value for plotting (default is 1)} +\item{alpha}{Controls opacity of spots. Provide as a vector specifying the +min and max for SpatialFeaturePlot. For SpatialDimPlot, provide a single +alpha value for each plot.} -\item{combine}{Combine plots into a single \code{\link[patchwork]{patchwork}ed} -ggplot object. If \code{FALSE}, return a list of ggplot objects} +\item{combine}{Combine plots into a single gg object; note that if TRUE; +themeing will not work when plotting multiple features/groupings} \item{feature}{Feature to visualize} -\item{slot}{Which slot to pull expression data from?} +\item{slot}{If plotting a feature, which data slot to pull from (counts, +data, or scale.data)} } \value{ Returns final plots. If \code{combine}, plots are stiched together From 7db12514715a62fc5601b9723e8d0791dd299f88 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 3 Aug 2022 15:37:01 -0400 Subject: [PATCH 161/979] Add log-normalization, VST for delayed matrices --- R/preprocessing5.R | 316 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 311 insertions(+), 5 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index aad616dd9..2a3e3b7e1 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1,5 +1,6 @@ #' @include generics.R #' @include preprocessing.R +#' @importFrom stats loess #' @importFrom methods slot #' @importFrom SeuratObject .MARGIN .SparseSlots #' @importFrom utils txtProgressBar setTxtProgressBar @@ -220,6 +221,135 @@ LogNormalize.default <- function( return(data) } +#' @method LogNormalize DelayedMatrix +#' @export +#' +LogNormalize.DelayedMatrix <- function( + data, + scale.factor = 1e4, + margin = 2L, + verbose = TRUE, + sink = NULL, + ... +) { + check_installed( + pkg = 'DelayedArray', + reason = 'for working with delayed matrices' + ) + if (is.null(x = sink)) { + sink <- DelayedArray::AutoRealizationSink( + dim = dim(x = data), + dimnames = dimnames(x = data), + as.sparse = DelayedArray::is_sparse(x = data) + ) + } + if (!inherits(x = sink, what = 'RealizationSink')) { + abort(message = "'sink' must be a RealizationSink") + } else if (!all(dim(x = sink) == dim(x = data))) { + abort(message = "'sink' must be the same size as 'data'") + } + if (!margin %in% c(1L, 2L)) { + abort(message = "'margin' must be 1 or 2") + } + grid <- if (margin == 1L) { + DelayedArray::rowAutoGrid(x = data) + } else { + DelayedArray::colAutoGrid(x = data) + } + sparse <- DelayedArray::is_sparse(x = data) + if (isTRUE(x = verbose)) { + pb <- txtProgressBar(file = stderr(), style = 3) + } + for (i in seq_len(length.out = length(x = grid))) { + vp <- grid[[i]] + x <- DelayedArray::read_block(x = data, viewport = vp, as.sparse = sparse) + if (isTRUE(x = sparse)) { + x <- DelayedArray::sparse2dense(sas = x) + } + x <- apply( + X = x, + MARGIN = margin, + FUN = function(x) { + log1p(x = x / sum(x) * scale.factor) + } + ) + if (isTRUE(x = sparse)) { + x <- DelayedArray::dense2sparse(x = x) + } + DelayedArray::write_block(sink = sink, viewport = vp, block = x) + if (isTRUE(x = verbose)) { + setTxtProgressBar(pb = pb, value = i / length(x = grid)) + } + } + if (isTRUE(x = verbose)) { + close(con = pb) + } + DelayedArray::close(con = sink) + return(as(object = sink, Class = "DelayedArray")) +} + +#' @method LogNormalize H5ADMatrix +#' @export +#' +LogNormalize.H5ADMatrix <- function( + data, + scale.factor = 1e4, + margin = 2L, + verbose = TRUE, + layer = 'data', + ... +) { + results <- LogNormalize.HDF5Matrix( + data = data, + scale.factor = scale.factor, + margin = margin, + verbose = verbose, + layer = file.path('layers', layer, fsep = '/'), + ... + ) + rpath <- slot(object = slot(object = results, name = 'seed'), name = 'filepath') + return(HDF5Array::H5ADMatrix(filepath = rpath, layer = layer)) +} + +#' @method LogNormalize HDF5Matrix +#' @export +#' +LogNormalize.HDF5Matrix <- function( + data, + scale.factor = 1e4, + margin = 2L, + verbose = TRUE, + layer = 'data', + ... +) { + check_installed(pkg = 'HDF5Array', reason = 'for working with HDF5 matrices') + fpath <- slot(object = slot(object = data, name = 'seed'), name = 'filepath') + if (.DelayedH5DExists(object = data, path = layer)) { + rhdf5::h5delete(file = fpath, name = layer) + dpath <- file.path( + dirname(path = layer), + paste0('.', basename(layer), '_dimnames'), + fsep = '/' + ) + rhdf5::h5delete(file = fpath, name = dpath) + } + sink <- HDF5Array::HDF5RealizationSink( + dim = dim(x = data), + dimnames = dimnames(x = data), + as.sparse = DelayedArray::is_sparse(x = data), + filepath = fpath, + name = layer + ) + return(LogNormalize.DelayedMatrix( + data = data, + scale.factor = scale.factor, + margin = margin, + verbose = verbose, + sink = sink, + ... + )) +} + #' @importFrom SeuratObject IsSparse #' #' @method NormalizeData default @@ -251,7 +381,8 @@ NormalizeData.default <- function( data = object, scale.factor = scale.factor, margin = cmargin, - verbose = verbose + verbose = verbose, + ... ) } } @@ -259,6 +390,50 @@ NormalizeData.default <- function( return(normalized) } +.DelayedH5DExists <- function(object, path) { + check_installed(pkg = 'HDF5Array', reason = 'for working with HDF5 files') + if (!inherits(x = object, what = c('HDF5Array', 'H5ADMatrix'))) { + abort(message = "'object' must be an HDF5Array or H5ADMatrix") + } + on.exit(expr = rhdf5::h5closeAll(), add = TRUE) + fpath <- slot(object = slot(object = object, name = 'seed'), name = 'filepath') + h5loc <- rhdf5::H5Fopen( + name = fpath, + flags = 'H5F_ACC_RDWR', + fapl = NULL, + native = FALSE + ) + return(rhdf5::H5Lexists(h5loc = h5loc, name = path)) +} + +# #' @method NormalizeData DelayedArray +# #' @export +# #' +# NormalizeData.DelayedArray <- function( +# object, +# method = c('LogNormalize'), +# scale.factor = 1e4, +# cmargin = 2L, +# margin = 1L, +# layer = 'data', +# verbose = TRUE, +# ... +# ) { +# method <- arg_match(arg = method) +# normalized <- switch( +# EXPR = method, +# LogNormalize = LogNormalize( +# data = object, +# scale.factor = scale.factor, +# margin = 2L, +# verbose = TRUE, +# layer = layer, +# ... +# ) +# ) +# return(normalized) +# } + #' @importFrom SeuratObject Cells DefaultLayer DefaultLayer<- Features #' LayerData LayerData<- #' @@ -299,11 +474,12 @@ NormalizeData.StdAssay <- function( features = Features(x = object, layer = l), cells = Cells(x = object, layer = l) ) <- NormalizeData( - object = LayerData(object = object, layer = l, fast = TRUE), + object = LayerData(object = object, layer = l, fast = NA), method = method, scale.factor = scale.factor, margin = margin, verbose = verbose, + layer = save, ... ) } @@ -489,7 +665,138 @@ VST.default <- function( .NotYetImplemented() } -#' @importFrom stats loess +#' @method VST DelayedMatrix +#' @export +#' +VST.DelayedMatrix <- function( + data, + margin = 1L, + nselect = 2000L, + span = 0.3, + clip = NULL, + verbose = TRUE, + ... +) { + check_installed( + pkg = 'DelayedArray', + reason = 'for working with delayed matrices' + ) + if (!margin %in% c(1L, 2L)) { + abort(message = "'margin' must be 1 or 2") + } + grid <- if (margin == 1L) { + DelayedArray::rowAutoGrid(x = data) + } else { + DelayedArray::colAutoGrid(x = data) + } + nfeatures <- dim(x = data)[margin] + ncells <- dim(x = data)[-margin] + hvf.info <- SeuratObject::EmptyDF(n = nfeatures) + # Calculate feature means + hvf.info$mean <- if (margin == 1L) { + DelayedArray::rowMeans(x = data) + } else { + DelayedArray::colMeans(x = data) + } + # Calculate variance + hvf.info$variance <- NA_real_ + if (isTRUE(x = verbose)) { + inform(message = "Calculating feature variances") + pb <- txtProgressBar(style = 3L, file = stderr()) + } + for (i in seq_len(length.out = length(x = grid))) { + vp <- grid[[i]] + idx <- seq.int( + from = IRanges::start(x = slot(object = vp, name = 'ranges')[margin]), + to = IRanges::end(x = slot(object = vp, name = 'ranges')[margin]) + ) + x <- DelayedArray::read_block(x = data, viewport = vp, as.sparse = FALSE) + mu <- hvf.info$mean[idx] + hvf.info$variance[idx] <- vapply( + X = seq_along(along.with = mu), + FUN = function(j) { + y <- if (margin == 1L) { + x[j, ] + } else { + x[, j] + } + y <- y - mu[j] + return(sum(y ^ 2) / (ncells - 1L)) + }, + FUN.VALUE = numeric(length = 1L) + ) + if (isTRUE(x = verbose)) { + setTxtProgressBar(pb = pb, value = i / length(x = grid)) + } + } + if (isTRUE(x = verbose)) { + close(con = pb) + } + hvf.info$variance.expected <- 0 + not.const <- hvf.info$variance > 0 + fit <- loess( + formula = log10(x = variance) ~ log10(x = mean), + data = hvf.info[not.const, , drop = FALSE], + span = span + ) + hvf.info$variance.expected[not.const] <- 10 ^ fit$fitted + # Calculate standardized variance + hvf.info$variance.standardized <- NA_real_ + if (isTRUE(x = verbose)) { + inform( + message = "Calculating feature variances of standardized and clipped values" + ) + pb <- txtProgressBar(style = 3L, file = stderr()) + } + clip <- clip %||% sqrt(x = ncells) + for (i in seq_len(length.out = length(x = grid))) { + vp <- grid[[i]] + idx <- seq.int( + from = IRanges::start(x = slot(object = vp, name = 'ranges')[margin]), + to = IRanges::end(x = slot(object = vp, name = 'ranges')[margin]) + ) + x <- DelayedArray::read_block(x = data, viewport = vp, as.sparse = FALSE) + mu <- hvf.info$mean[idx] + sd <- sqrt(x = hvf.info$variance.expected[idx]) + hvf.info$variance.standardized[idx] <- vapply( + X = seq_along(along.with = mu), + FUN = function(j) { + if (sd[j] == 0) { + return(0) + } + y <- if (margin == 1L) { + x[j, ] + } else { + x[, j] + } + y <- y - mu[j] + y <- y / sd[j] + y[y > clip] <- clip + return(sum(y ^ 2) / (ncells - 1L)) + }, + FUN.VALUE = numeric(length = 1L) + ) + if (isTRUE(x = verbose)) { + setTxtProgressBar(pb = pb, value = i / length(x = grid)) + } + } + if (isTRUE(x = verbose)) { + close(con = pb) + } + # Set variable status + hvf.info$variable <- FALSE + hvf.info$rank <- NA_integer_ + vs <- hvf.info$variance.standardized + vs[vs == 0] <- NA + vf <- head( + x = order(vs, decreasing = TRUE), + n = nselect + ) + hvf.info$variable[vf] <- TRUE + hvf.info$rank[vf] <- seq_along(along.with = vf) + return(hvf.info) +} + #' @importFrom Matrix rowMeans #' #' @rdname VST @@ -507,7 +814,7 @@ VST.dgCMatrix <- function( ) { nfeatures <- nrow(x = data) hvf.info <- SeuratObject:::EmptyDF(n = nfeatures) - # Calculate feature menas + # Calculate feature means hvf.info$mean <- Matrix::rowMeans(x = data) # Calculate feature variance hvf.info$variance <- SparseRowVar2( @@ -784,7 +1091,6 @@ VST.matrix <- function( #' to \code{clip}; default is \code{NULL} which sets this value to the square #' root of the number of cells #' -#' @importFrom stats loess #' @importFrom Matrix rowMeans #' #' @keywords internal From cebb6fef2b6f914fc034740369cddf46671e0a65 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 3 Aug 2022 15:37:26 -0400 Subject: [PATCH 162/979] Improvements to LeverageScoreSampling --- R/sketching.R | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/R/sketching.R b/R/sketching.R index d82f16b56..8586bcad5 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -12,7 +12,7 @@ NULL # Functions #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -#' @importFrom SeuratObject Key Key<- Layers +#' @importFrom SeuratObject CastAssay Key Key<- Layers #' #' @export #' @@ -23,13 +23,14 @@ LeverageScoreSampling <- function( save = 'sketch', default = TRUE, seed = NA_integer_, + cast = NULL, ... ) { assay <- assay[1L] %||% DefaultAssay(object = object) assay <- match.arg(arg = assay, choices = Assays(object = object)) # TODO: fix this in [[<-,Seurat5 if (save == assay) { - stop("Cannot overwrite existing assays", call. = FALSE) + abort(message = "Cannot overwrite existing assays") } if (save %in% Assays(object = object)) { if (save == DefaultAssay(object = object)) { @@ -74,6 +75,9 @@ LeverageScoreSampling <- function( silent = TRUE ) } + if (!is.null(x = cast)) { + sketched <- CastAssay(object = sketched, to = cast, ...) + } Key(object = sketched) <- Key(object = save, quiet = TRUE) object[[save]] <- sketched if (isTRUE(x = default)) { @@ -127,7 +131,6 @@ LeverageScore.default <- function( ) } if (is.character(x = method)) { - # method <- get(x = method) method <- match.fun(FUN = method) } stopifnot(is.function(x = method)) From 286eb296d7cdc825e0a4b791470ad0471ac94620 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 3 Aug 2022 15:37:33 -0400 Subject: [PATCH 163/979] Update imports --- R/zzz.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/R/zzz.R b/R/zzz.R index 3dc72747e..73443ce83 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,3 +1,8 @@ +#' @importFrom methods slot +#' @importFrom rlang abort arg_match caller_env check_installed inform warn +#' +NULL + #' @section Package options: #' #' Seurat uses the following [options()] to configure behaviour: From 8b2ee4e86eafa04118c999a58023e32d2dfc3398 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 3 Aug 2022 15:37:55 -0400 Subject: [PATCH 164/979] UPdate docs --- NAMESPACE | 11 ++++++++++ man/IntegrateSketchEmbeddings.Rd | 35 ++++++++++++++++++++------------ man/STARmap-class.Rd | 5 ++++- man/SlideSeq-class.Rd | 5 ++++- man/merge.SCTAssay.Rd | 2 -- 5 files changed, 41 insertions(+), 17 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 37d2b6501..c4b1e8104 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -50,6 +50,9 @@ S3method(LeverageScore,Seurat) S3method(LeverageScore,Seurat5) S3method(LeverageScore,StdAssay) S3method(LeverageScore,default) +S3method(LogNormalize,DelayedMatrix) +S3method(LogNormalize,H5ADMatrix) +S3method(LogNormalize,HDF5Matrix) S3method(LogNormalize,V3Matrix) S3method(LogNormalize,data.frame) S3method(LogNormalize,default) @@ -107,6 +110,7 @@ S3method(ScaleFactors,VisiumV1) S3method(ScoreJackStraw,DimReduc) S3method(ScoreJackStraw,JackStrawData) S3method(ScoreJackStraw,Seurat) +S3method(VST,DelayedMatrix) S3method(VST,default) S3method(VST,dgCMatrix) S3method(VST,matrix) @@ -430,6 +434,7 @@ importFrom(SeuratObject,.MARGIN) importFrom(SeuratObject,.SparseSlots) importFrom(SeuratObject,AddMetaData) importFrom(SeuratObject,Assays) +importFrom(SeuratObject,CastAssay) importFrom(SeuratObject,Cells) importFrom(SeuratObject,CellsByIdentities) importFrom(SeuratObject,Command) @@ -660,13 +665,19 @@ importFrom(reticulate,import) importFrom(reticulate,py_module_available) importFrom(reticulate,py_set_seed) importFrom(rlang,"!!") +importFrom(rlang,abort) +importFrom(rlang,arg_match) importFrom(rlang,as_label) importFrom(rlang,as_name) +importFrom(rlang,caller_env) +importFrom(rlang,check_installed) importFrom(rlang,enquo) +importFrom(rlang,inform) importFrom(rlang,is_quosure) importFrom(rlang,is_scalar_character) importFrom(rlang,quo_get_env) importFrom(rlang,quo_get_expr) +importFrom(rlang,warn) importFrom(scales,brewer_pal) importFrom(scales,hue_pal) importFrom(scales,rescale) diff --git a/man/IntegrateSketchEmbeddings.Rd b/man/IntegrateSketchEmbeddings.Rd index 9bdf2f318..f668183b3 100644 --- a/man/IntegrateSketchEmbeddings.Rd +++ b/man/IntegrateSketchEmbeddings.Rd @@ -6,12 +6,14 @@ \usage{ IntegrateSketchEmbeddings( object, - atom.assay = "sketch", - orig.assay = "RNA", + atoms = "sketch", + orig = "RNA", features = NULL, - atom.sketch.reduction = "integrated_dr", - dictionary.method = c("sketch", "data"), - sketch.ratio = 0.8, + reduction = "integrated_dr", + method = c("sketch", "data"), + ratio = 0.8, + reduction.name = NULL, + reduction.key = NULL, layers = NULL, verbose = TRUE ) @@ -19,27 +21,34 @@ IntegrateSketchEmbeddings( \arguments{ \item{object}{A Seurat object with all cells for one dataset} -\item{atom.assay}{Assay name for sketched-cell expression (default is 'sketch')} +\item{atoms}{Assay name for sketched-cell expression (default is 'sketch')} -\item{orig.assay}{Assay name for original expression (default is 'RNA')} +\item{orig}{Assay name for original expression (default is 'RNA')} \item{features}{Features used for atomic sketch integration} -\item{atom.sketch.reduction}{Dimensional reduction name for batch-corrected embeddings +\item{reduction}{Dimensional reduction name for batch-corrected embeddings in the sketched object (default is 'integrated_dr')} -\item{dictionary.method}{Methods to construct sketch-cell representation +\item{method}{Methods to construct sketch-cell representation for all cells (default is 'sketch'). Can be one of: \itemize{ -\item{sketch: Use random sketched data slot} -\item{data: Use data slot} + \item \dQuote{\code{sketch}}: Use random sketched data slot + \item \dQuote{\code{data}}: Use data slot }} -\item{sketch.ratio}{Sketch ratio of data slot when \code{dictionary.method} is set to 'sketch' (default is 0.8)} +\item{ratio}{Sketch ratio of data slot when \code{dictionary.method} is set +to \dQuote{\code{sketch}}; defaults to 0.8} + +\item{reduction.name}{Name to save new reduction as; defaults to +\code{paste0(reduction, '.orig')}} + +\item{reduction.key}{Key for new dimensional reduction; defaults to creating +one from \code{reduction.name}} \item{layers}{Names of layers for correction.} -\item{verbose}{Print progress and message (default is TRUE)} +\item{verbose}{Print progress and message} } \value{ Returns a Seurat object with an integrated dimensional reduction diff --git a/man/STARmap-class.Rd b/man/STARmap-class.Rd index 7984f21c3..30ab87d92 100644 --- a/man/STARmap-class.Rd +++ b/man/STARmap-class.Rd @@ -16,7 +16,10 @@ The STARmap class priority for visualization when the assay is set as the active/default assay in a \code{Seurat} object} -\item{\code{key}}{Key for the image} +\item{\code{key}}{A one-length character vector with the object's key; keys must +be one or more alphanumeric characters followed by an underscore +\dQuote{\code{_}} (regex pattern +\dQuote{\code{^[a-zA-Z][a-zA-Z0-9]*_$}})} } } diff --git a/man/SlideSeq-class.Rd b/man/SlideSeq-class.Rd index a81a02e3c..60cdb125f 100644 --- a/man/SlideSeq-class.Rd +++ b/man/SlideSeq-class.Rd @@ -22,7 +22,10 @@ The SlideSeq class represents spatial information from the Slide-seq platform priority for visualization when the assay is set as the active/default assay in a \code{Seurat} object} -\item{\code{key}}{Key for the image} +\item{\code{key}}{A one-length character vector with the object's key; keys must +be one or more alphanumeric characters followed by an underscore +\dQuote{\code{_}} (regex pattern +\dQuote{\code{^[a-zA-Z][a-zA-Z0-9]*_$}})} } } diff --git a/man/merge.SCTAssay.Rd b/man/merge.SCTAssay.Rd index f976cbc1d..93afca7b1 100644 --- a/man/merge.SCTAssay.Rd +++ b/man/merge.SCTAssay.Rd @@ -14,8 +14,6 @@ ) } \arguments{ -\item{x}{A \code{\link[SeuratObject]{Seurat}} object} - \item{y}{A single \code{Seurat} object or a list of \code{Seurat} objects} \item{add.cell.ids}{A character vector of \code{length(x = c(x, y))}; From 66554c9263de82fd297f1ca39d7b64adda7ef0e7 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 3 Aug 2022 15:38:20 -0400 Subject: [PATCH 165/979] Bump develop version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index d51dcbde0..88bb09f33 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.0.4.9007 -Date: 2022-06-23 +Version: 4.0.4.9008 +Date: 2022-08-03 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From 67aaa1fd3130055445feed3926d49a2c33d9a3dd Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 9 Aug 2022 15:30:20 -0400 Subject: [PATCH 166/979] add vjust DoHeatmap --- R/visualization.R | 4 +++- man/DoHeatmap.Rd | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/R/visualization.R b/R/visualization.R index 2e8439c77..8132652c3 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -189,6 +189,7 @@ DimHeatmap <- function( #' @param label Label the cell identies above the color bar #' @param size Size of text above color bar #' @param hjust Horizontal justification of text above color bar +#' @param vjust Vertical justification of text above color bar #' @param angle Angle of text above color bar #' @param raster If true, plot with geom_raster, else use geom_tile. geom_raster may look blurry on #' some viewing applications such as Preview due to how the raster is interpolated. Set this to FALSE @@ -229,6 +230,7 @@ DoHeatmap <- function( label = TRUE, size = 5.5, hjust = 0, + vjust = 0, angle = 45, raster = TRUE, draw.lines = TRUE, @@ -387,7 +389,7 @@ DoHeatmap <- function( stat = "identity", data = label.x.pos, aes_string(label = 'group', x = 'label.x.pos'), - y = y.max + y.max * 0.03 * 0.5, + y = y.max + y.max * 0.03 * 0.5 + vjust, angle = angle, hjust = hjust, size = size diff --git a/man/DoHeatmap.Rd b/man/DoHeatmap.Rd index efa301bee..5747b1994 100644 --- a/man/DoHeatmap.Rd +++ b/man/DoHeatmap.Rd @@ -18,6 +18,7 @@ DoHeatmap( label = TRUE, size = 5.5, hjust = 0, + vjust = 0, angle = 45, raster = TRUE, draw.lines = TRUE, @@ -54,6 +55,8 @@ if \code{slot} is 'scale.data', 6 otherwise} \item{hjust}{Horizontal justification of text above color bar} +\item{vjust}{Vertical justification of text above color bar} + \item{angle}{Angle of text above color bar} \item{raster}{If true, plot with geom_raster, else use geom_tile. geom_raster may look blurry on From d919a0beb223e53ac1e0578a98cc3da891ecb586 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 10 Aug 2022 10:31:10 -0400 Subject: [PATCH 167/979] add weight matrix in transferLabel NN --- R/integration.R | 45 +++++++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/R/integration.R b/R/integration.R index 0e07126a7..b381ed5fa 100644 --- a/R/integration.R +++ b/R/integration.R @@ -3976,7 +3976,6 @@ FindWeights <- function( sd = sd.weight, display_progress = verbose ) - object <- SetIntegrationData( object = object, integration.name = integration.name, @@ -6109,26 +6108,43 @@ FindBridgeAnchor <- function(object.list, # @param nn.object the query neighbors object # @param reference.object the reference seurat object # @param group.by A vector of variables to group cells by +# @param weight.matrix A reference x query cell weight matrix # @return Returns a list for predicted labels, prediction score and matrix #' @importFrom Matrix sparseMatrix #' @importFrom fastDummies dummy_cols -#' @importFrom Matrix rowMeans +#' @importFrom Matrix rowMeans t #' TransferLablesNN <- function( - nn.object, + nn.object = NULL, reference.object, - group.by = NULL + group.by = NULL, + weight.matrix = NULL ){ - select_nn <- Indices(nn.object) - k.nn <- ncol(select_nn) - j <- as.numeric(x = t(x = select_nn )) - i <- ((1:length(x = j)) - 1) %/% k.nn + 1 - nn.matrix <- sparseMatrix( - i = i, - j = j, - x = 1, - dims = c(nrow(select_nn), ncol(x = reference.object)) - ) + if (!is.null(x = weight.matrix) & !is.null(x = nn.object)) { + warning('both nn.object and weight matrix are set. Only weight matrix is used for label transfer') + } + + if (is.null(x = weight.matrix)) { + select_nn <- Indices(nn.object) + k.nn <- ncol(select_nn) + j <- as.numeric(x = t(x = select_nn )) + i <- ((1:length(x = j)) - 1) %/% k.nn + 1 + nn.matrix <- sparseMatrix( + i = i, + j = j, + x = 1, + dims = c(nrow(select_nn), ncol(x = reference.object)) + ) + } else if (nrow(weights) == ncol(reference.object)) { + nn.matrix <- t(weights) + k.nn <- 1 + } else if (ncol(weights) == ncol(reference.object)) { + nn.matrix <- weights + k.nn <- 1 + } else { + stop('wrong weights matrix input') + } + reference.labels.matrix <- as.sparse( x = dummy_cols( reference.object[[group.by]] @@ -6139,6 +6155,7 @@ TransferLablesNN <- function( replacement = "", x = colnames(reference.labels.matrix) ) + query.label.mat <- nn.matrix %*% reference.labels.matrix query.label.mat <- query.label.mat/k.nn rownames(x = query.label.mat) <- Cells(nn.object) From ab75bae8fdfa1ee3fdb54e5390e9c7cf0a821131 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 10 Aug 2022 10:33:15 -0400 Subject: [PATCH 168/979] fix weight error --- R/integration.R | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/R/integration.R b/R/integration.R index b381ed5fa..7e42c68d2 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6123,7 +6123,6 @@ TransferLablesNN <- function( if (!is.null(x = weight.matrix) & !is.null(x = nn.object)) { warning('both nn.object and weight matrix are set. Only weight matrix is used for label transfer') } - if (is.null(x = weight.matrix)) { select_nn <- Indices(nn.object) k.nn <- ncol(select_nn) @@ -6135,11 +6134,11 @@ TransferLablesNN <- function( x = 1, dims = c(nrow(select_nn), ncol(x = reference.object)) ) - } else if (nrow(weights) == ncol(reference.object)) { - nn.matrix <- t(weights) + } else if (nrow(weight.matrix) == ncol(reference.object)) { + nn.matrix <- t(weight.matrix) k.nn <- 1 - } else if (ncol(weights) == ncol(reference.object)) { - nn.matrix <- weights + } else if (ncol(weight.matrix) == ncol(reference.object)) { + nn.matrix <- weight.matrix k.nn <- 1 } else { stop('wrong weights matrix input') From 6edb2b0ce657ff5fa1e41e70d9a7b71ade0b4c42 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 10 Aug 2022 11:14:22 -0400 Subject: [PATCH 169/979] Start DelayedMatrix method for LeverageScore --- NAMESPACE | 1 + R/sketching.R | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index c4b1e8104..1bec2ebd0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -46,6 +46,7 @@ S3method(GetTissueCoordinates,VisiumV1) S3method(HVFInfo,SCTAssay) S3method(IntegrateEmbeddings,IntegrationAnchorSet) S3method(IntegrateEmbeddings,TransferAnchorSet) +S3method(LeverageScore,DelayedMatrix) S3method(LeverageScore,Seurat) S3method(LeverageScore,Seurat5) S3method(LeverageScore,StdAssay) diff --git a/R/sketching.R b/R/sketching.R index 8586bcad5..b6c0576cc 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -164,6 +164,62 @@ LeverageScore.default <- function( return(rowSums(x = Z ^ 2)) } +#' @method LeverageScore DelayedMatrix +#' @export +#' +LeverageScore.DelayedMatrix <- function( + object, + nsketch = 5000L, + ndims = NULL, + method = CountSketch, + eps = 0.5, + seed = 123L, + verbose = TRUE, + ... +) { + check_installed( + pkg = 'DelayedArray', + reason = 'for working with delayed matrices' + ) + if (!is_quosure(x = method)) { + method <- enquo(arg = method) + } + grid <- DelayedArray::colAutoGrid(x = object) + scores <- vector(mode = 'numeric', length = ncol(x = object)) + if (isTRUE(x = verbose)) { + pb <- txtProgressBar(style = 3L, file = stderr()) + } + for (i in length(x = grid)) { + vp <- grid[[i]] + idx <- seq.int( + from = IRanges::start(x = slot(object = vp, name = 'ranges')[2L]), + to = IRanges::end(x = slot(object = vp, name = 'ranges')[2L]) + ) + x <- as.sparse(x = DelayedArray::read_block( + x = object, + viewport = vp, + as.sparse = FALSE + )) + scores[idx] <- LeverageScore( + object = x, + nsketch = nsketch, + ndims = ndims, + method = method, + eps = 0.5, + seed = seed, + verbose = FALSE + # ... + ) + if (isTRUE(x = verbose)) { + setTxtProgressBar(pb = pb, value = i / length(x = grid)) + } + } + if (isTRUE(x = verbose)) { + close(con = pb) + } + return(scores) +} + #' @method LeverageScore StdAssay #' @export #' From cb67a8ac2f741af9073c8894dcae94952969ff2a Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Fri, 19 Aug 2022 16:54:47 -0400 Subject: [PATCH 170/979] Add LogNormalize.SparseArraySeed --- NAMESPACE | 1 + R/preprocessing5.R | 46 ++++++++++++++++++++++++++++++++++------------ 2 files changed, 35 insertions(+), 12 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 1bec2ebd0..3dfc9ccd4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -54,6 +54,7 @@ S3method(LeverageScore,default) S3method(LogNormalize,DelayedMatrix) S3method(LogNormalize,H5ADMatrix) S3method(LogNormalize,HDF5Matrix) +S3method(LogNormalize,SparseArraySeed) S3method(LogNormalize,V3Matrix) S3method(LogNormalize,data.frame) S3method(LogNormalize,default) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 2a3e3b7e1..64a075734 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -263,19 +263,13 @@ LogNormalize.DelayedMatrix <- function( for (i in seq_len(length.out = length(x = grid))) { vp <- grid[[i]] x <- DelayedArray::read_block(x = data, viewport = vp, as.sparse = sparse) - if (isTRUE(x = sparse)) { - x <- DelayedArray::sparse2dense(sas = x) - } - x <- apply( - X = x, - MARGIN = margin, - FUN = function(x) { - log1p(x = x / sum(x) * scale.factor) - } + x <- LogNormalize( + data = x, + scale.factor = scale.factor, + margin = margin, + verbose = FALSE, + ... ) - if (isTRUE(x = sparse)) { - x <- DelayedArray::dense2sparse(x = x) - } DelayedArray::write_block(sink = sink, viewport = vp, block = x) if (isTRUE(x = verbose)) { setTxtProgressBar(pb = pb, value = i / length(x = grid)) @@ -350,6 +344,34 @@ LogNormalize.HDF5Matrix <- function( )) } +#' @method LogNormalize SparseArraySeed +#' @export +#' +LogNormalize.SparseArraySeed <- function( + data, + scale.factor = 1e4, + margin = 2L, + return.seed = TRUE, + verbose= TRUE, + ... +) { + check_installed( + pkg = 'DelayedArray', + reason = 'for working with SparseArraySeeds' + ) + data <- LogNormalize( + data = as(object = data, Class = 'CsparseMatrix'), + scale.factor = scale.factor, + margin = margin, + verbose = verbose, + ... + ) + if (!isFALSE(x = return.seed)) { + data <- as(object = data, Class = 'SparseArraySeed') + } + return(data) +} + #' @importFrom SeuratObject IsSparse #' #' @method NormalizeData default From c68bc74372d31a3b2b9cdb6fdf2b8d4aed9eb2ec Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Mon, 22 Aug 2022 10:36:53 -0400 Subject: [PATCH 171/979] Improvements to delayed processing --- R/preprocessing5.R | 177 +++++++++++++++++++++++++++------------------ R/sketching.R | 62 +++++++++++++++- 2 files changed, 166 insertions(+), 73 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 64a075734..57056212d 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -713,17 +713,23 @@ VST.DelayedMatrix <- function( } nfeatures <- dim(x = data)[margin] ncells <- dim(x = data)[-margin] - hvf.info <- SeuratObject::EmptyDF(n = nfeatures) + # hvf.info <- SeuratObject::EmptyDF(n = nfeatures) + hvf.info <- vector(mode = 'list', length = length(x = grid)) + sparse <- DelayedArray::is_sparse(x = data) # Calculate feature means - hvf.info$mean <- if (margin == 1L) { - DelayedArray::rowMeans(x = data) - } else { - DelayedArray::colMeans(x = data) - } + # if (isTRUE(x = verbose)) { + # inform(message = "Calculating feature means") + # } + # hvf.info$mean <- if (margin == 1L) { + # DelayedArray::rowMeans(x = data) + # } else { + # DelayedArray::colMeans(x = data) + # } # Calculate variance hvf.info$variance <- NA_real_ if (isTRUE(x = verbose)) { - inform(message = "Calculating feature variances") + # inform(message = "Calculating feature variances") + inform(message = "Identifying variable features") pb <- txtProgressBar(style = 3L, file = stderr()) } for (i in seq_len(length.out = length(x = grid))) { @@ -732,72 +738,97 @@ VST.DelayedMatrix <- function( from = IRanges::start(x = slot(object = vp, name = 'ranges')[margin]), to = IRanges::end(x = slot(object = vp, name = 'ranges')[margin]) ) - x <- DelayedArray::read_block(x = data, viewport = vp, as.sparse = FALSE) - mu <- hvf.info$mean[idx] - hvf.info$variance[idx] <- vapply( - X = seq_along(along.with = mu), - FUN = function(j) { - y <- if (margin == 1L) { - x[j, ] - } else { - x[, j] - } - y <- y - mu[j] - return(sum(y ^ 2) / (ncells - 1L)) - }, - FUN.VALUE = numeric(length = 1L) - ) - if (isTRUE(x = verbose)) { - setTxtProgressBar(pb = pb, value = i / length(x = grid)) + x <- DelayedArray::read_block(x = data, viewport = vp, as.sparse = sparse) + if (isTRUE(x = sparse)) { + x <- as(object = x, Class = "CsparseMatrix") } - } - if (isTRUE(x = verbose)) { - close(con = pb) - } - hvf.info$variance.expected <- 0 - not.const <- hvf.info$variance > 0 - fit <- loess( - formula = log10(x = variance) ~ log10(x = mean), - data = hvf.info[not.const, , drop = FALSE], - span = span - ) - hvf.info$variance.expected[not.const] <- 10 ^ fit$fitted - # Calculate standardized variance - hvf.info$variance.standardized <- NA_real_ - if (isTRUE(x = verbose)) { - inform( - message = "Calculating feature variances of standardized and clipped values" - ) - pb <- txtProgressBar(style = 3L, file = stderr()) - } - clip <- clip %||% sqrt(x = ncells) - for (i in seq_len(length.out = length(x = grid))) { - vp <- grid[[i]] - idx <- seq.int( - from = IRanges::start(x = slot(object = vp, name = 'ranges')[margin]), - to = IRanges::end(x = slot(object = vp, name = 'ranges')[margin]) - ) - x <- DelayedArray::read_block(x = data, viewport = vp, as.sparse = FALSE) - mu <- hvf.info$mean[idx] - sd <- sqrt(x = hvf.info$variance.expected[idx]) - hvf.info$variance.standardized[idx] <- vapply( - X = seq_along(along.with = mu), - FUN = function(j) { - if (sd[j] == 0) { - return(0) - } - y <- if (margin == 1L) { - x[j, ] - } else { - x[, j] - } - y <- y - mu[j] - y <- y / sd[j] - y[y > clip] <- clip - return(sum(y ^ 2) / (ncells - 1L)) - }, - FUN.VALUE = numeric(length = 1L) + hvf.info[[i]] <- VST( + data = x, + margin = margin, + nselect = floor(x = nselect / length(x = grid)), + span = span, + clip = clip, + verbose = FALSE, + ... ) + # if (margin == 2L) { + # x <- t(x = x) + # } + # mu <- hvf.info$mean[idx] + # hvf.info$variance[idx] <- rowSums(x = ((x - mu) ^ 2) / (ncells - 1L)) + # # hvf.info$variance[idx] <- vapply( + # # X = seq_along(along.with = mu), + # # FUN = function(j) { + # # y <- if (margin == 1L) { + # # x[j, ] + # # } else { + # # x[, j] + # # } + # # y <- y - mu[j] + # # return(sum(y ^ 2) / (ncells - 1L)) + # # }, + # # FUN.VALUE = numeric(length = 1L) + # # ) + # if (isTRUE(x = verbose)) { + # setTxtProgressBar(pb = pb, value = i / length(x = grid)) + # } + # } + # if (isTRUE(x = verbose)) { + # close(con = pb) + # } + # hvf.info$variance.expected <- 0 + # not.const <- hvf.info$variance > 0 + # fit <- loess( + # formula = log10(x = variance) ~ log10(x = mean), + # data = hvf.info[not.const, , drop = FALSE], + # span = span + # ) + # hvf.info$variance.expected[not.const] <- 10 ^ fit$fitted + # # Calculate standardized variance + # hvf.info$variance.standardized <- NA_real_ + # if (isTRUE(x = verbose)) { + # inform( + # message = "Calculating feature variances of standardized and clipped values" + # ) + # pb <- txtProgressBar(style = 3L, file = stderr()) + # } + # clip <- clip %||% sqrt(x = ncells) + # for (i in seq_len(length.out = length(x = grid))) { + # vp <- grid[[i]] + # idx <- seq.int( + # from = IRanges::start(x = slot(object = vp, name = 'ranges')[margin]), + # to = IRanges::end(x = slot(object = vp, name = 'ranges')[margin]) + # ) + # x <- DelayedArray::read_block(x = data, viewport = vp, as.sparse = sparse) + # if (isTRUE(x = sparse)) { + # x <- as(object = x, Class = "CsparseMatrix") + # } + # if (margin == 2L) { + # x <- t(x = x) + # } + # mu <- hvf.info$mean[idx] + # sd <- sqrt(x = hvf.info$variance.expected[idx]) + # hvf.info$variance.standardized[idx] <- 0 + # sdn <- which(x = sd != 0) + # hvf.info$variance.standardized[idx[sdn]] <- rowSums(x = (((x[sdn, ] - mu[sdn]) / sd[sdn]) ^ 2) / (ncells - 1L)) + # # hvf.info$variance.standardized[idx] <- vapply( + # # X = seq_along(along.with = mu), + # # FUN = function(j) { + # # if (sd[j] == 0) { + # # return(0) + # # } + # # y <- if (margin == 1L) { + # # x[j, ] + # # } else { + # # x[, j] + # # } + # # y <- y - mu[j] + # # y <- y / sd[j] + # # y[y > clip] <- clip + # # return(sum(y ^ 2) / (ncells - 1L)) + # # }, + # # FUN.VALUE = numeric(length = 1L) + # # ) if (isTRUE(x = verbose)) { setTxtProgressBar(pb = pb, value = i / length(x = grid)) } @@ -805,6 +836,8 @@ VST.DelayedMatrix <- function( if (isTRUE(x = verbose)) { close(con = pb) } + browser() + hvf.info <- do.call(what = 'rbind', args = hvf.info) # Set variable status hvf.info$variable <- FALSE hvf.info$rank <- NA_integer_ @@ -857,7 +890,7 @@ VST.dgCMatrix <- function( mu = hvf.info$mean, sd = sqrt(x = hvf.info$variance.expected), vmax = clip %||% sqrt(x = ncol(x = data)), - display_progress = TRUE + display_progress = verbose ) # Set variable features hvf.info$variable <- FALSE diff --git a/R/sketching.R b/R/sketching.R index b6c0576cc..102410ce9 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -12,6 +12,66 @@ NULL # Functions #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +DelayedLeverageScore <- function( + object, + assay = NULL, + nsketch = 5000L, + ncells = 5000L, + layer = 'data', + save = 'sketch', + method = CountSketch, + eps = 0.5, + default = TRUE, + seed = NA_integer_, + # cast = NULL, + verbose = TRUE, + ... +) { + .NotYetImplemented() + check_installed( + pkg = 'DelayedArray', + reason = 'for working with delayed matrices' + ) + assay <- assay[1L] %||% DefaultAssay(object = object) + assay <- match.arg(arg = assay, choices = Assays(object = object)) + # TODO: fix this in [[<-,Seurat5 + if (save == assay) { + abort(message = "Cannot overwrite existing assays") + } + if (save %in% Assays(object = object)) { + if (save == DefaultAssay(object = object)) { + DefaultAssay(object = object) <- assay + } + object[[save]] <- NULL + } + layer <- unique(x = layer) %||% DefaultLayer(object = object) + layer <- Layers(object = object, assay = assay, search = layer) + scores <- SeuratObject:::EmptyDF(n = ncol(x = object)) + row.names(x = scores) <- colnames(x = object) + scores[, layer] <- NA_real_ + for (i in seq_along(along.with = layer)) { + l <- layer[i] + if (isTRUE(x = verbose)) { + message("Running LeverageScore for layer ", l) + } + # scores[Cells(x = object, layer = l), l] <- LeverageScore( + # object = LayerData( + # object = object, + # layer = l, + # features = features %||% VariableFeatures(object = object, layer = l), + # fast = TRUE + # ), + # nsketch = nsketch, + # ndims = ndims %||% ncol(x = object), + # method = method, + # eps = eps, + # seed = seed, + # verbose = verbose, + # ... + # ) + } +} + #' @importFrom SeuratObject CastAssay Key Key<- Layers #' #' @export @@ -19,7 +79,7 @@ NULL LeverageScoreSampling <- function( object, assay = NULL, - ncells = 5000, + ncells = 5000L, save = 'sketch', default = TRUE, seed = NA_integer_, From 10cc751f3f322dc297ad6092d11755c57d75d034 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Mon, 22 Aug 2022 10:37:23 -0400 Subject: [PATCH 172/979] Remove renv --- .Rprofile | 1 - renv.lock | 2008 --------------------------------------------- renv/.gitignore | 6 - renv/activate.R | 933 --------------------- renv/settings.dcf | 10 - 5 files changed, 2958 deletions(-) delete mode 100644 .Rprofile delete mode 100644 renv.lock delete mode 100644 renv/.gitignore delete mode 100644 renv/activate.R delete mode 100644 renv/settings.dcf diff --git a/.Rprofile b/.Rprofile deleted file mode 100644 index 81b960f5c..000000000 --- a/.Rprofile +++ /dev/null @@ -1 +0,0 @@ -source("renv/activate.R") diff --git a/renv.lock b/renv.lock deleted file mode 100644 index b1a66d87b..000000000 --- a/renv.lock +++ /dev/null @@ -1,2008 +0,0 @@ -{ - "R": { - "Version": "4.2.0", - "Repositories": [ - { - "Name": "CRAN", - "URL": "https://cloud.r-project.org" - } - ] - }, - "Bioconductor": { - "Version": "3.15" - }, - "Packages": { - "BH": { - "Package": "BH", - "Version": "1.78.0-0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "4e348572ffcaa2fb1e610e7a941f6f3a", - "Requirements": [] - }, - "BiocGenerics": { - "Package": "BiocGenerics", - "Version": "0.42.0", - "Source": "Bioconductor", - "git_url": "https://git.bioconductor.org/packages/BiocGenerics", - "git_branch": "RELEASE_3_15", - "git_last_commit": "3582d47", - "git_last_commit_date": "2022-04-26", - "Hash": "37348ee784e82c0a6b650146275b459d", - "Requirements": [] - }, - "BiocManager": { - "Package": "BiocManager", - "Version": "1.30.17", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "0ab5f6e14502755ca875f938781909af", - "Requirements": [] - }, - "DelayedArray": { - "Package": "DelayedArray", - "Version": "0.22.0", - "Source": "Bioconductor", - "git_url": "https://git.bioconductor.org/packages/DelayedArray", - "git_branch": "RELEASE_3_15", - "git_last_commit": "4a5afd1", - "git_last_commit_date": "2022-04-26", - "Hash": "c83a6e282a9669d4e04eb2d2ce8660a8", - "Requirements": [ - "BiocGenerics", - "IRanges", - "Matrix", - "MatrixGenerics", - "S4Vectors" - ] - }, - "FNN": { - "Package": "FNN", - "Version": "1.1.3", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "b56998fff55e4a4b4860ad6e8c67e0f9", - "Requirements": [] - }, - "IRanges": { - "Package": "IRanges", - "Version": "2.30.0", - "Source": "Bioconductor", - "git_url": "https://git.bioconductor.org/packages/IRanges", - "git_branch": "RELEASE_3_15", - "git_last_commit": "9b5f3ca", - "git_last_commit_date": "2022-04-26", - "Hash": "27d19cab80fa238e21abfbd2892120f4", - "Requirements": [ - "BiocGenerics", - "S4Vectors" - ] - }, - "KernSmooth": { - "Package": "KernSmooth", - "Version": "2.23-20", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "8dcfa99b14c296bc9f1fd64d52fd3ce7", - "Requirements": [] - }, - "MASS": { - "Package": "MASS", - "Version": "7.3-57", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "71476c1d88d1ebdf31580e5a257d5d31", - "Requirements": [] - }, - "Matrix": { - "Package": "Matrix", - "Version": "1.4-1", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "699c47c606293bdfbc9fd78a93c9c8fe", - "Requirements": [ - "lattice" - ] - }, - "MatrixGenerics": { - "Package": "MatrixGenerics", - "Version": "1.8.0", - "Source": "Bioconductor", - "git_url": "https://git.bioconductor.org/packages/MatrixGenerics", - "git_branch": "RELEASE_3_15", - "git_last_commit": "e4cc34d", - "git_last_commit_date": "2022-04-26", - "Hash": "68a6ed57fd9a1d0d6beea73507b3ff59", - "Requirements": [ - "matrixStats" - ] - }, - "R6": { - "Package": "R6", - "Version": "2.5.1", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "470851b6d5d0ac559e9d01bb352b4021", - "Requirements": [] - }, - "RANN": { - "Package": "RANN", - "Version": "2.6.1", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "d128ea05a972d3e67c6f39de52c72bd7", - "Requirements": [] - }, - "RColorBrewer": { - "Package": "RColorBrewer", - "Version": "1.1-3", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "45f0398006e83a5b10b72a90663d8d8c", - "Requirements": [] - }, - "ROCR": { - "Package": "ROCR", - "Version": "1.0-11", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "cc151930e20e16427bc3d0daec62b4a9", - "Requirements": [ - "gplots" - ] - }, - "RSpectra": { - "Package": "RSpectra", - "Version": "0.16-1", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "6b5ab997fd5ff6d46a5f1d9f8b76961c", - "Requirements": [ - "Matrix", - "Rcpp", - "RcppEigen" - ] - }, - "Rcpp": { - "Package": "Rcpp", - "Version": "1.0.8.3", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "32e79b908fda56ee57fe518a8d37b864", - "Requirements": [] - }, - "RcppAnnoy": { - "Package": "RcppAnnoy", - "Version": "0.0.19", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "5681153e3eb103725e35ac5f7ebca910", - "Requirements": [ - "Rcpp" - ] - }, - "RcppArmadillo": { - "Package": "RcppArmadillo", - "Version": "0.11.0.0.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "704ac7bb6a49df5a9a2b014793fcc6cb", - "Requirements": [ - "Rcpp" - ] - }, - "RcppCCTZ": { - "Package": "RcppCCTZ", - "Version": "0.2.10", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "f41ba6c0b1b62d8f3610e70029d6fb9e", - "Requirements": [ - "Rcpp" - ] - }, - "RcppDate": { - "Package": "RcppDate", - "Version": "0.0.3", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "08cc427d6fe7a63e604cfa11aad31006", - "Requirements": [] - }, - "RcppEigen": { - "Package": "RcppEigen", - "Version": "0.3.3.9.2", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "4c86baed78388ceb06f88e3e9a1d87f5", - "Requirements": [ - "Matrix", - "Rcpp" - ] - }, - "RcppGSL": { - "Package": "RcppGSL", - "Version": "0.3.11", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "af9a4cf1669ff630aa4c5d2aa32c9732", - "Requirements": [ - "Rcpp" - ] - }, - "RcppProgress": { - "Package": "RcppProgress", - "Version": "0.4.2", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "1c0aa18b97e6aaa17f93b8b866c0ace5", - "Requirements": [] - }, - "RcppTOML": { - "Package": "RcppTOML", - "Version": "0.1.7", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "f8a578aa91321ecec1292f1e2ffadeda", - "Requirements": [ - "Rcpp" - ] - }, - "RcppZiggurat": { - "Package": "RcppZiggurat", - "Version": "0.1.6", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "75b4a36aeeed440ad03b996081190703", - "Requirements": [ - "Rcpp", - "RcppGSL" - ] - }, - "Rfast": { - "Package": "Rfast", - "Version": "2.0.6", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "330d7a6d1f7062cc49e68c7d4797bb75", - "Requirements": [ - "Rcpp", - "RcppArmadillo", - "RcppZiggurat" - ] - }, - "Rfast2": { - "Package": "Rfast2", - "Version": "0.1.3", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "28900a0181cd8a68bc65b82ba67770bd", - "Requirements": [ - "RANN", - "Rcpp", - "RcppArmadillo", - "Rfast" - ] - }, - "Rtsne": { - "Package": "Rtsne", - "Version": "0.16", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "e921b89ef921905fc89b95886675706d", - "Requirements": [ - "Rcpp" - ] - }, - "S4Vectors": { - "Package": "S4Vectors", - "Version": "0.34.0", - "Source": "Bioconductor", - "git_url": "https://git.bioconductor.org/packages/S4Vectors", - "git_branch": "RELEASE_3_15", - "git_last_commit": "f590de3", - "git_last_commit_date": "2022-04-26", - "Hash": "90677cc888563927cecacd4067ccad45", - "Requirements": [ - "BiocGenerics" - ] - }, - "SeuratObject": { - "Package": "SeuratObject", - "Version": "4.9.9.9027", - "Source": "GitHub", - "RemoteType": "github", - "RemoteHost": "api.github.com", - "RemoteUsername": "mojaveazure", - "RemoteRepo": "seurat-object", - "RemoteRef": "feat/standard", - "RemoteSha": "6d88922df63ec4a90d79a9f979b1a7c34ff6abc9", - "Hash": "59b9fdbdffbbfdebcfec27bb42725c76", - "Requirements": [ - "Matrix", - "Rcpp", - "RcppEigen", - "future", - "future.apply", - "progressr", - "rgeos", - "rlang", - "sp", - "spam" - ] - }, - "TileDBArray": { - "Package": "TileDBArray", - "Version": "1.6.0", - "Source": "Bioconductor", - "git_url": "https://git.bioconductor.org/packages/TileDBArray", - "git_branch": "RELEASE_3_15", - "git_last_commit": "9cda8da", - "git_last_commit_date": "2022-04-26", - "Hash": "2bdefe19c80ca24aeccfee96f30948de", - "Requirements": [ - "DelayedArray", - "Rcpp", - "S4Vectors", - "tiledb" - ] - }, - "VGAM": { - "Package": "VGAM", - "Version": "1.1-6", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "3a3f2e54570b48d0670c26363b6728c4", - "Requirements": [] - }, - "abind": { - "Package": "abind", - "Version": "1.4-5", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "4f57884290cc75ab22f4af9e9d4ca862", - "Requirements": [] - }, - "ape": { - "Package": "ape", - "Version": "5.6-2", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "894108412a7ec23d5de85cdcce871c8b", - "Requirements": [ - "Rcpp", - "lattice", - "nlme" - ] - }, - "askpass": { - "Package": "askpass", - "Version": "1.1", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "e8a22846fff485f0be3770c2da758713", - "Requirements": [ - "sys" - ] - }, - "base64enc": { - "Package": "base64enc", - "Version": "0.1-3", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "543776ae6848fde2f48ff3816d0628bc", - "Requirements": [] - }, - "bit": { - "Package": "bit", - "Version": "4.0.4", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "f36715f14d94678eea9933af927bc15d", - "Requirements": [] - }, - "bit64": { - "Package": "bit64", - "Version": "4.0.5", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "9fe98599ca456d6552421db0d6772d8f", - "Requirements": [ - "bit" - ] - }, - "bitops": { - "Package": "bitops", - "Version": "1.0-7", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "b7d8d8ee39869c18d8846a184dd8a1af", - "Requirements": [] - }, - "brio": { - "Package": "brio", - "Version": "1.1.3", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "976cf154dfb043c012d87cddd8bca363", - "Requirements": [] - }, - "bslib": { - "Package": "bslib", - "Version": "0.3.1", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "56ae7e1987b340186a8a5a157c2ec358", - "Requirements": [ - "htmltools", - "jquerylib", - "jsonlite", - "rlang", - "sass" - ] - }, - "caTools": { - "Package": "caTools", - "Version": "1.18.2", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "34d90fa5845004236b9eacafc51d07b2", - "Requirements": [ - "bitops" - ] - }, - "cachem": { - "Package": "cachem", - "Version": "1.0.6", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "648c5b3d71e6a37e3043617489a0a0e9", - "Requirements": [ - "fastmap", - "rlang" - ] - }, - "callr": { - "Package": "callr", - "Version": "3.7.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "461aa75a11ce2400245190ef5d3995df", - "Requirements": [ - "R6", - "processx" - ] - }, - "cli": { - "Package": "cli", - "Version": "3.3.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "23abf173c2b783dcc43379ab9bba00ee", - "Requirements": [ - "glue" - ] - }, - "cluster": { - "Package": "cluster", - "Version": "2.1.3", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "c5f8447373ec2a0f593c694024e5b7ee", - "Requirements": [] - }, - "codetools": { - "Package": "codetools", - "Version": "0.2-18", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "019388fc48e48b3da0d3a76ff94608a8", - "Requirements": [] - }, - "colorspace": { - "Package": "colorspace", - "Version": "2.0-3", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "bb4341986bc8b914f0f0acf2e4a3f2f7", - "Requirements": [] - }, - "commonmark": { - "Package": "commonmark", - "Version": "1.8.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "2ba81b120c1655ab696c935ef33ea716", - "Requirements": [] - }, - "cowplot": { - "Package": "cowplot", - "Version": "1.1.1", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "b418e8423699d11c7f2087c2bfd07da2", - "Requirements": [ - "ggplot2", - "gtable", - "rlang", - "scales" - ] - }, - "cpp11": { - "Package": "cpp11", - "Version": "0.4.2", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "fa53ce256cd280f468c080a58ea5ba8c", - "Requirements": [] - }, - "crayon": { - "Package": "crayon", - "Version": "1.5.1", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "8dc45fd8a1ee067a92b85ef274e66d6a", - "Requirements": [] - }, - "crosstalk": { - "Package": "crosstalk", - "Version": "1.2.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "6aa54f69598c32177e920eb3402e8293", - "Requirements": [ - "R6", - "htmltools", - "jsonlite", - "lazyeval" - ] - }, - "curl": { - "Package": "curl", - "Version": "4.3.2", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "022c42d49c28e95d69ca60446dbabf88", - "Requirements": [] - }, - "data.table": { - "Package": "data.table", - "Version": "1.14.2", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "36b67b5adf57b292923f5659f5f0c853", - "Requirements": [] - }, - "deldir": { - "Package": "deldir", - "Version": "1.0-6", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "65a3d4e2a1619bb85ae0fb64628da972", - "Requirements": [] - }, - "desc": { - "Package": "desc", - "Version": "1.4.1", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "eebd27ee58fcc58714eedb7aa07d8ad1", - "Requirements": [ - "R6", - "cli", - "rprojroot" - ] - }, - "diffobj": { - "Package": "diffobj", - "Version": "0.3.5", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "bcaa8b95f8d7d01a5dedfd959ce88ab8", - "Requirements": [ - "crayon" - ] - }, - "digest": { - "Package": "digest", - "Version": "0.6.29", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "cf6b206a045a684728c3267ef7596190", - "Requirements": [] - }, - "dotCall64": { - "Package": "dotCall64", - "Version": "1.0-1", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "d0ef6cd1546530da4d72179b52856e84", - "Requirements": [] - }, - "dplyr": { - "Package": "dplyr", - "Version": "1.0.9", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "f0bda1627a7f5d3f9a0b5add931596ac", - "Requirements": [ - "R6", - "generics", - "glue", - "lifecycle", - "magrittr", - "pillar", - "rlang", - "tibble", - "tidyselect", - "vctrs" - ] - }, - "dqrng": { - "Package": "dqrng", - "Version": "0.3.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "3ce2af5ead3b01c518fd453c7fe5a51a", - "Requirements": [ - "BH", - "Rcpp", - "sitmo" - ] - }, - "ellipsis": { - "Package": "ellipsis", - "Version": "0.3.2", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "bb0eec2fe32e88d9e2836c2f73ea2077", - "Requirements": [ - "rlang" - ] - }, - "enrichR": { - "Package": "enrichR", - "Version": "3.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "8cdcd06e4fa171911dc4d83feaa26467", - "Requirements": [ - "ggplot2", - "httr", - "rjson" - ] - }, - "evaluate": { - "Package": "evaluate", - "Version": "0.15", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "699a7a93d08c962d9f8950b2d7a227f1", - "Requirements": [] - }, - "fansi": { - "Package": "fansi", - "Version": "1.0.3", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "83a8afdbe71839506baa9f90eebad7ec", - "Requirements": [] - }, - "farver": { - "Package": "farver", - "Version": "2.1.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "c98eb5133d9cb9e1622b8691487f11bb", - "Requirements": [] - }, - "fastmap": { - "Package": "fastmap", - "Version": "1.1.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "77bd60a6157420d4ffa93b27cf6a58b8", - "Requirements": [] - }, - "fitdistrplus": { - "Package": "fitdistrplus", - "Version": "1.1-8", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "9de7bf56c16871adf44dc88a24c7836c", - "Requirements": [ - "MASS", - "survival" - ] - }, - "fontawesome": { - "Package": "fontawesome", - "Version": "0.2.2", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "55624ed409e46c5f358b2c060be87f67", - "Requirements": [ - "htmltools", - "rlang" - ] - }, - "fs": { - "Package": "fs", - "Version": "1.5.2", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "7c89603d81793f0d5486d91ab1fc6f1d", - "Requirements": [] - }, - "future": { - "Package": "future", - "Version": "1.25.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "877024e372cf61e41f5d13eafd8d4bac", - "Requirements": [ - "digest", - "globals", - "listenv", - "parallelly" - ] - }, - "future.apply": { - "Package": "future.apply", - "Version": "1.9.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "8ec2bd333ccd4df6bf70e68dded6c364", - "Requirements": [ - "future", - "globals" - ] - }, - "generics": { - "Package": "generics", - "Version": "0.1.2", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "177475892cf4a55865868527654a7741", - "Requirements": [] - }, - "ggplot2": { - "Package": "ggplot2", - "Version": "3.3.6", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "0fb26d0674c82705c6b701d1a61e02ea", - "Requirements": [ - "MASS", - "digest", - "glue", - "gtable", - "isoband", - "mgcv", - "rlang", - "scales", - "tibble", - "withr" - ] - }, - "ggrepel": { - "Package": "ggrepel", - "Version": "0.9.1", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "08ab869f37e6a7741a64ab9069bcb67d", - "Requirements": [ - "Rcpp", - "ggplot2", - "rlang", - "scales" - ] - }, - "ggridges": { - "Package": "ggridges", - "Version": "0.5.3", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "9d028e8f37c84dba356ce3c367a1978e", - "Requirements": [ - "ggplot2", - "plyr", - "scales", - "withr" - ] - }, - "globals": { - "Package": "globals", - "Version": "0.14.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "eca8023ed5ca6372479ebb9b3207f5ae", - "Requirements": [ - "codetools" - ] - }, - "glue": { - "Package": "glue", - "Version": "1.6.2", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "4f2596dfb05dac67b9dc558e5c6fba2e", - "Requirements": [] - }, - "goftest": { - "Package": "goftest", - "Version": "1.2-3", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "dbe0201f91eeb15918dd3fbf01ee689a", - "Requirements": [] - }, - "gplots": { - "Package": "gplots", - "Version": "3.1.3", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "75437dd4c43599f6e9418ea249495fda", - "Requirements": [ - "KernSmooth", - "caTools", - "gtools" - ] - }, - "gridExtra": { - "Package": "gridExtra", - "Version": "2.3", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "7d7f283939f563670a697165b2cf5560", - "Requirements": [ - "gtable" - ] - }, - "gtable": { - "Package": "gtable", - "Version": "0.3.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "ac5c6baf7822ce8732b343f14c072c4d", - "Requirements": [] - }, - "gtools": { - "Package": "gtools", - "Version": "3.9.2", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "2ace6c4a06297d0b364e0444384a2b82", - "Requirements": [] - }, - "hdf5r": { - "Package": "hdf5r", - "Version": "1.3.5", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "0870af1f511f6d6d1230e71856801f86", - "Requirements": [ - "R6", - "bit64" - ] - }, - "here": { - "Package": "here", - "Version": "1.0.1", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "24b224366f9c2e7534d2344d10d59211", - "Requirements": [ - "rprojroot" - ] - }, - "highr": { - "Package": "highr", - "Version": "0.9", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "8eb36c8125038e648e5d111c0d7b2ed4", - "Requirements": [ - "xfun" - ] - }, - "htmltools": { - "Package": "htmltools", - "Version": "0.5.2", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "526c484233f42522278ab06fb185cb26", - "Requirements": [ - "base64enc", - "digest", - "fastmap", - "rlang" - ] - }, - "htmlwidgets": { - "Package": "htmlwidgets", - "Version": "1.5.4", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "76147821cd3fcd8c4b04e1ef0498e7fb", - "Requirements": [ - "htmltools", - "jsonlite", - "yaml" - ] - }, - "httpuv": { - "Package": "httpuv", - "Version": "1.6.5", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "97fe71f0a4a1c9890e6c2128afa04bc0", - "Requirements": [ - "R6", - "Rcpp", - "later", - "promises" - ] - }, - "httr": { - "Package": "httr", - "Version": "1.4.3", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "88d1b310583777edf01ccd1216fb0b2b", - "Requirements": [ - "R6", - "curl", - "jsonlite", - "mime", - "openssl" - ] - }, - "ica": { - "Package": "ica", - "Version": "1.0-2", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "95ba9b882bb834ecbdad37338a11f3f8", - "Requirements": [] - }, - "igraph": { - "Package": "igraph", - "Version": "1.3.1", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "820595efd173f9bac4ea76c1be007299", - "Requirements": [ - "Matrix", - "magrittr", - "pkgconfig" - ] - }, - "irlba": { - "Package": "irlba", - "Version": "2.3.5", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "066c11bb9bc75b343f3de1ecaf3b7ba2", - "Requirements": [ - "Matrix" - ] - }, - "isoband": { - "Package": "isoband", - "Version": "0.2.5", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "7ab57a6de7f48a8dc84910d1eca42883", - "Requirements": [] - }, - "jquerylib": { - "Package": "jquerylib", - "Version": "0.1.4", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "5aab57a3bd297eee1c1d862735972182", - "Requirements": [ - "htmltools" - ] - }, - "jsonlite": { - "Package": "jsonlite", - "Version": "1.8.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "d07e729b27b372429d42d24d503613a0", - "Requirements": [] - }, - "kernlab": { - "Package": "kernlab", - "Version": "0.9-30", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "911c5e7638c514aac7d0b46840ce6a91", - "Requirements": [] - }, - "knitr": { - "Package": "knitr", - "Version": "1.39", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "029ab7c4badd3cf8af69016b2ba27493", - "Requirements": [ - "evaluate", - "highr", - "stringr", - "xfun", - "yaml" - ] - }, - "labeling": { - "Package": "labeling", - "Version": "0.4.2", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "3d5108641f47470611a32d0bdf357a72", - "Requirements": [] - }, - "later": { - "Package": "later", - "Version": "1.3.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "7e7b457d7766bc47f2a5f21cc2984f8e", - "Requirements": [ - "Rcpp", - "rlang" - ] - }, - "lattice": { - "Package": "lattice", - "Version": "0.20-45", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "b64cdbb2b340437c4ee047a1f4c4377b", - "Requirements": [] - }, - "lazyeval": { - "Package": "lazyeval", - "Version": "0.2.2", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "d908914ae53b04d4c0c0fd72ecc35370", - "Requirements": [] - }, - "leiden": { - "Package": "leiden", - "Version": "0.3.10", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "f0b75ad9505081a5a91b1c2947d70cdb", - "Requirements": [ - "Matrix", - "igraph", - "reticulate" - ] - }, - "lifecycle": { - "Package": "lifecycle", - "Version": "1.0.1", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "a6b6d352e3ed897373ab19d8395c98d0", - "Requirements": [ - "glue", - "rlang" - ] - }, - "listenv": { - "Package": "listenv", - "Version": "0.8.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "0bde42ee282efb18c7c4e63822f5b4f7", - "Requirements": [] - }, - "lmtest": { - "Package": "lmtest", - "Version": "0.9-40", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "c6fafa6cccb1e1dfe7f7d122efd6e6a7", - "Requirements": [ - "zoo" - ] - }, - "magrittr": { - "Package": "magrittr", - "Version": "2.0.3", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "7ce2733a9826b3aeb1775d56fd305472", - "Requirements": [] - }, - "matrixStats": { - "Package": "matrixStats", - "Version": "0.62.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "36ad89a805c436c5316c22490079da67", - "Requirements": [] - }, - "mgcv": { - "Package": "mgcv", - "Version": "1.8-40", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "c6b2fdb18cf68ab613bd564363e1ba0d", - "Requirements": [ - "Matrix", - "nlme" - ] - }, - "mime": { - "Package": "mime", - "Version": "0.12", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "18e9c28c1d3ca1560ce30658b22ce104", - "Requirements": [] - }, - "miniUI": { - "Package": "miniUI", - "Version": "0.1.1.1", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "fec5f52652d60615fdb3957b3d74324a", - "Requirements": [ - "htmltools", - "shiny" - ] - }, - "mixtools": { - "Package": "mixtools", - "Version": "1.2.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "4999c07351f4c5e2f323c1d6e88652b9", - "Requirements": [ - "MASS", - "kernlab", - "segmented", - "survival" - ] - }, - "munsell": { - "Package": "munsell", - "Version": "0.5.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "6dfe8bf774944bd5595785e3229d8771", - "Requirements": [ - "colorspace" - ] - }, - "nanotime": { - "Package": "nanotime", - "Version": "0.3.6", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "609658c0d87106c130e1101c3c37c4dd", - "Requirements": [ - "Rcpp", - "RcppCCTZ", - "RcppDate", - "bit64", - "zoo" - ] - }, - "nlme": { - "Package": "nlme", - "Version": "3.1-157", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "dbca60742be0c9eddc5205e5c7ca1f44", - "Requirements": [ - "lattice" - ] - }, - "openssl": { - "Package": "openssl", - "Version": "2.0.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "cf4329aac12c2c44089974559c18e446", - "Requirements": [ - "askpass" - ] - }, - "parallelly": { - "Package": "parallelly", - "Version": "1.31.1", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "115faaa1a50897c3e2339d1cb7d3d493", - "Requirements": [] - }, - "patchwork": { - "Package": "patchwork", - "Version": "1.1.1", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "c446b30cb33ec125ff02588b60660ccb", - "Requirements": [ - "ggplot2", - "gtable" - ] - }, - "pbapply": { - "Package": "pbapply", - "Version": "1.5-0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "11359a5bb73622ab3f4136bf57108b64", - "Requirements": [] - }, - "pillar": { - "Package": "pillar", - "Version": "1.7.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "51dfc97e1b7069e9f7e6f83f3589c22e", - "Requirements": [ - "cli", - "crayon", - "ellipsis", - "fansi", - "glue", - "lifecycle", - "rlang", - "utf8", - "vctrs" - ] - }, - "pkgconfig": { - "Package": "pkgconfig", - "Version": "2.0.3", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "01f28d4278f15c76cddbea05899c5d6f", - "Requirements": [] - }, - "pkgload": { - "Package": "pkgload", - "Version": "1.2.4", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "7533cd805940821bf23eaf3c8d4c1735", - "Requirements": [ - "cli", - "crayon", - "desc", - "rlang", - "rprojroot", - "rstudioapi", - "withr" - ] - }, - "plotly": { - "Package": "plotly", - "Version": "4.10.0.9001", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "166e09e58492fc26d2293c67a99a0e5b", - "Requirements": [ - "RColorBrewer", - "base64enc", - "crosstalk", - "data.table", - "digest", - "dplyr", - "ggplot2", - "htmltools", - "htmlwidgets", - "httr", - "jsonlite", - "lazyeval", - "magrittr", - "promises", - "purrr", - "rlang", - "scales", - "tibble", - "tidyr", - "vctrs", - "viridisLite" - ] - }, - "plyr": { - "Package": "plyr", - "Version": "1.8.7", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "9c17c6ee41639ebdc1d7266546d3b627", - "Requirements": [ - "Rcpp" - ] - }, - "png": { - "Package": "png", - "Version": "0.1-7", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "03b7076c234cb3331288919983326c55", - "Requirements": [] - }, - "polyclip": { - "Package": "polyclip", - "Version": "1.10-0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "cb167f328b3ada4ec5cf67a7df4c900a", - "Requirements": [] - }, - "praise": { - "Package": "praise", - "Version": "1.0.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "a555924add98c99d2f411e37e7d25e9f", - "Requirements": [] - }, - "processx": { - "Package": "processx", - "Version": "3.5.3", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "8bbae1a548d0d3fdf6647bdd9d35bf6d", - "Requirements": [ - "R6", - "ps" - ] - }, - "progressr": { - "Package": "progressr", - "Version": "0.10.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "7df448f5ae46ab0a1af0fb619349d3fd", - "Requirements": [ - "digest" - ] - }, - "promises": { - "Package": "promises", - "Version": "1.2.0.1", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "4ab2c43adb4d4699cf3690acd378d75d", - "Requirements": [ - "R6", - "Rcpp", - "later", - "magrittr", - "rlang" - ] - }, - "ps": { - "Package": "ps", - "Version": "1.7.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "eef74b13f32cae6bb0d495e53317c44c", - "Requirements": [] - }, - "purrr": { - "Package": "purrr", - "Version": "0.3.4", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "97def703420c8ab10d8f0e6c72101e02", - "Requirements": [ - "magrittr", - "rlang" - ] - }, - "rappdirs": { - "Package": "rappdirs", - "Version": "0.3.3", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "5e3c5dc0b071b21fa128676560dbe94d", - "Requirements": [] - }, - "rematch2": { - "Package": "rematch2", - "Version": "2.1.2", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "76c9e04c712a05848ae7a23d2f170a40", - "Requirements": [ - "tibble" - ] - }, - "renv": { - "Package": "renv", - "Version": "0.15.4", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "c1078316e1d4f70275fc1ea60c0bc431", - "Requirements": [] - }, - "reshape2": { - "Package": "reshape2", - "Version": "1.4.4", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "bb5996d0bd962d214a11140d77589917", - "Requirements": [ - "Rcpp", - "plyr", - "stringr" - ] - }, - "reticulate": { - "Package": "reticulate", - "Version": "1.24", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "ffdf27627a3c1537478073c43b6e7980", - "Requirements": [ - "Matrix", - "Rcpp", - "RcppTOML", - "here", - "jsonlite", - "png", - "rappdirs", - "withr" - ] - }, - "rgeos": { - "Package": "rgeos", - "Version": "0.5-9", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "7cfa4f73a070042ecf141d5c1d1bfcae", - "Requirements": [ - "sp" - ] - }, - "rjson": { - "Package": "rjson", - "Version": "0.2.21", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "f9da75e6444e95a1baf8ca24909d63b9", - "Requirements": [] - }, - "rlang": { - "Package": "rlang", - "Version": "1.0.2", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "04884d9a75d778aca22c7154b8333ec9", - "Requirements": [] - }, - "rmarkdown": { - "Package": "rmarkdown", - "Version": "2.14", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "31b60a882fabfabf6785b8599ffeb8ba", - "Requirements": [ - "bslib", - "evaluate", - "htmltools", - "jquerylib", - "jsonlite", - "knitr", - "stringr", - "tinytex", - "xfun", - "yaml" - ] - }, - "rpart": { - "Package": "rpart", - "Version": "4.1.16", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "ea3ca1d9473daabb3cd0f1b4f974c1ed", - "Requirements": [] - }, - "rprojroot": { - "Package": "rprojroot", - "Version": "2.0.3", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "1de7ab598047a87bba48434ba35d497d", - "Requirements": [] - }, - "rstudioapi": { - "Package": "rstudioapi", - "Version": "0.13", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "06c85365a03fdaf699966cc1d3cf53ea", - "Requirements": [] - }, - "rsvd": { - "Package": "rsvd", - "Version": "1.0.5", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "b462187d887abc519894874486dbd6fd", - "Requirements": [ - "Matrix" - ] - }, - "sass": { - "Package": "sass", - "Version": "0.4.1", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "f37c0028d720bab3c513fd65d28c7234", - "Requirements": [ - "R6", - "fs", - "htmltools", - "rappdirs", - "rlang" - ] - }, - "scales": { - "Package": "scales", - "Version": "1.2.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "6e8750cdd13477aa440d453da93d5cac", - "Requirements": [ - "R6", - "RColorBrewer", - "farver", - "labeling", - "lifecycle", - "munsell", - "rlang", - "viridisLite" - ] - }, - "scattermore": { - "Package": "scattermore", - "Version": "0.8", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "77ef398f338597b86a3d6853c585ce38", - "Requirements": [ - "ggplot2", - "scales" - ] - }, - "sctransform": { - "Package": "sctransform", - "Version": "0.3.3", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "85ff27d18c45a92993f6d7fd09c41f79", - "Requirements": [ - "MASS", - "Matrix", - "Rcpp", - "RcppArmadillo", - "dplyr", - "future", - "future.apply", - "ggplot2", - "gridExtra", - "magrittr", - "matrixStats", - "reshape2", - "rlang" - ] - }, - "segmented": { - "Package": "segmented", - "Version": "1.5-0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "eefd32a9f572433282949f775c7d5bf0", - "Requirements": [ - "MASS" - ] - }, - "shiny": { - "Package": "shiny", - "Version": "1.7.1", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "00344c227c7bd0ab5d78052c5d736c44", - "Requirements": [ - "R6", - "bslib", - "cachem", - "commonmark", - "crayon", - "ellipsis", - "fastmap", - "fontawesome", - "glue", - "htmltools", - "httpuv", - "jsonlite", - "later", - "lifecycle", - "mime", - "promises", - "rlang", - "sourcetools", - "withr", - "xtable" - ] - }, - "sitmo": { - "Package": "sitmo", - "Version": "2.0.2", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "c956d93f6768a9789edbc13072b70c78", - "Requirements": [ - "Rcpp" - ] - }, - "sourcetools": { - "Package": "sourcetools", - "Version": "0.1.7", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "947e4e02a79effa5d512473e10f41797", - "Requirements": [] - }, - "sp": { - "Package": "sp", - "Version": "1.4-7", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "c5dad1d43440f0c426a6d29b30b333fa", - "Requirements": [ - "lattice" - ] - }, - "spam": { - "Package": "spam", - "Version": "2.8-0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "516ffcf193fa4f07683611a4474db22b", - "Requirements": [ - "dotCall64" - ] - }, - "spatstat.core": { - "Package": "spatstat.core", - "Version": "2.4-2", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "1f996e01b9890a001823bd97818e75e9", - "Requirements": [ - "Matrix", - "abind", - "goftest", - "mgcv", - "nlme", - "rpart", - "spatstat.data", - "spatstat.geom", - "spatstat.random", - "spatstat.sparse", - "spatstat.utils", - "tensor" - ] - }, - "spatstat.data": { - "Package": "spatstat.data", - "Version": "2.2-0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "5b4f6c12d1a222680579070a045786bb", - "Requirements": [ - "Matrix", - "spatstat.utils" - ] - }, - "spatstat.geom": { - "Package": "spatstat.geom", - "Version": "2.4-0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "33f4612dc27fa9228cac4713bb2edfd7", - "Requirements": [ - "deldir", - "polyclip", - "spatstat.data", - "spatstat.utils" - ] - }, - "spatstat.random": { - "Package": "spatstat.random", - "Version": "2.2-0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "c08d6e00855f11e04ac863dce8d6779e", - "Requirements": [ - "spatstat.data", - "spatstat.geom", - "spatstat.utils" - ] - }, - "spatstat.sparse": { - "Package": "spatstat.sparse", - "Version": "2.1-1", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "cde91679dea4a2fb53833a86f32c94f5", - "Requirements": [ - "Matrix", - "abind", - "spatstat.utils", - "tensor" - ] - }, - "spatstat.utils": { - "Package": "spatstat.utils", - "Version": "2.3-0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "814e4fc977efc125d8bc51ca7b49f866", - "Requirements": [] - }, - "stringi": { - "Package": "stringi", - "Version": "1.7.6", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "bba431031d30789535745a9627ac9271", - "Requirements": [] - }, - "stringr": { - "Package": "stringr", - "Version": "1.4.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "0759e6b6c0957edb1311028a49a35e76", - "Requirements": [ - "glue", - "magrittr", - "stringi" - ] - }, - "survival": { - "Package": "survival", - "Version": "3.3-1", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "f6189c70451d3d68e0d571235576e833", - "Requirements": [ - "Matrix" - ] - }, - "sys": { - "Package": "sys", - "Version": "3.4", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "b227d13e29222b4574486cfcbde077fa", - "Requirements": [] - }, - "tensor": { - "Package": "tensor", - "Version": "1.5", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "25cfab6cf405c15bccf7e69ec39df090", - "Requirements": [] - }, - "testthat": { - "Package": "testthat", - "Version": "3.1.4", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "f76c2a02d0fdc24aa7a47ea34261a6e3", - "Requirements": [ - "R6", - "brio", - "callr", - "cli", - "crayon", - "desc", - "digest", - "ellipsis", - "evaluate", - "jsonlite", - "lifecycle", - "magrittr", - "pkgload", - "praise", - "processx", - "ps", - "rlang", - "waldo", - "withr" - ] - }, - "tibble": { - "Package": "tibble", - "Version": "3.1.7", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "08415af406e3dd75049afef9552e7355", - "Requirements": [ - "ellipsis", - "fansi", - "lifecycle", - "magrittr", - "pillar", - "pkgconfig", - "rlang", - "vctrs" - ] - }, - "tidyr": { - "Package": "tidyr", - "Version": "1.2.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "d8b95b7fee945d7da6888cf7eb71a49c", - "Requirements": [ - "cpp11", - "dplyr", - "ellipsis", - "glue", - "lifecycle", - "magrittr", - "purrr", - "rlang", - "tibble", - "tidyselect", - "vctrs" - ] - }, - "tidyselect": { - "Package": "tidyselect", - "Version": "1.1.2", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "17f6da8cfd7002760a859915ce7eef8f", - "Requirements": [ - "ellipsis", - "glue", - "purrr", - "rlang", - "vctrs" - ] - }, - "tiledb": { - "Package": "tiledb", - "Version": "0.12.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "b7fdc76172db79497f91faa14b6a6f85", - "Requirements": [ - "Rcpp", - "nanotime" - ] - }, - "tinytex": { - "Package": "tinytex", - "Version": "0.38", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "759d047596ac173433985deddf313450", - "Requirements": [ - "xfun" - ] - }, - "utf8": { - "Package": "utf8", - "Version": "1.2.2", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "c9c462b759a5cc844ae25b5942654d13", - "Requirements": [] - }, - "uwot": { - "Package": "uwot", - "Version": "0.1.11", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "f83dba1458cca3b1523e27944edb9da5", - "Requirements": [ - "FNN", - "Matrix", - "RSpectra", - "Rcpp", - "RcppAnnoy", - "RcppProgress", - "dqrng", - "irlba" - ] - }, - "vctrs": { - "Package": "vctrs", - "Version": "0.4.1", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "8b54f22e2a58c4f275479c92ce041a57", - "Requirements": [ - "cli", - "glue", - "rlang" - ] - }, - "viridisLite": { - "Package": "viridisLite", - "Version": "0.4.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "55e157e2aa88161bdb0754218470d204", - "Requirements": [] - }, - "waldo": { - "Package": "waldo", - "Version": "0.4.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "035fba89d0c86e2113120f93301b98ad", - "Requirements": [ - "cli", - "diffobj", - "fansi", - "glue", - "rematch2", - "rlang", - "tibble" - ] - }, - "withr": { - "Package": "withr", - "Version": "2.5.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "c0e49a9760983e81e55cdd9be92e7182", - "Requirements": [] - }, - "xfun": { - "Package": "xfun", - "Version": "0.30", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "e83f48136b041845e50a6658feffb197", - "Requirements": [] - }, - "xtable": { - "Package": "xtable", - "Version": "1.8-4", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "b8acdf8af494d9ec19ccb2481a9b11c2", - "Requirements": [] - }, - "yaml": { - "Package": "yaml", - "Version": "2.3.5", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "458bb38374d73bf83b1bb85e353da200", - "Requirements": [] - }, - "zoo": { - "Package": "zoo", - "Version": "1.8-10", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "277b8b4c5b7b47e664aebfe024a2092e", - "Requirements": [ - "lattice" - ] - } - } -} diff --git a/renv/.gitignore b/renv/.gitignore deleted file mode 100644 index 275e4ca34..000000000 --- a/renv/.gitignore +++ /dev/null @@ -1,6 +0,0 @@ -library/ -local/ -cellar/ -lock/ -python/ -staging/ diff --git a/renv/activate.R b/renv/activate.R deleted file mode 100644 index e96125195..000000000 --- a/renv/activate.R +++ /dev/null @@ -1,933 +0,0 @@ - -local({ - - # the requested version of renv - version <- "0.15.4" - - # the project directory - project <- getwd() - - # figure out whether the autoloader is enabled - enabled <- local({ - - # first, check config option - override <- getOption("renv.config.autoloader.enabled") - if (!is.null(override)) - return(override) - - # next, check environment variables - # TODO: prefer using the configuration one in the future - envvars <- c( - "RENV_CONFIG_AUTOLOADER_ENABLED", - "RENV_AUTOLOADER_ENABLED", - "RENV_ACTIVATE_PROJECT" - ) - - for (envvar in envvars) { - envval <- Sys.getenv(envvar, unset = NA) - if (!is.na(envval)) - return(tolower(envval) %in% c("true", "t", "1")) - } - - # enable by default - TRUE - - }) - - if (!enabled) - return(FALSE) - - # avoid recursion - if (identical(getOption("renv.autoloader.running"), TRUE)) { - warning("ignoring recursive attempt to run renv autoloader") - return(invisible(TRUE)) - } - - # signal that we're loading renv during R startup - options(renv.autoloader.running = TRUE) - on.exit(options(renv.autoloader.running = NULL), add = TRUE) - - # signal that we've consented to use renv - options(renv.consent = TRUE) - - # load the 'utils' package eagerly -- this ensures that renv shims, which - # mask 'utils' packages, will come first on the search path - library(utils, lib.loc = .Library) - - # unload renv if it's already been laoded - if ("renv" %in% loadedNamespaces()) - unloadNamespace("renv") - - # load bootstrap tools - `%||%` <- function(x, y) { - if (is.environment(x) || length(x)) x else y - } - - bootstrap <- function(version, library) { - - # attempt to download renv - tarball <- tryCatch(renv_bootstrap_download(version), error = identity) - if (inherits(tarball, "error")) - stop("failed to download renv ", version) - - # now attempt to install - status <- tryCatch(renv_bootstrap_install(version, tarball, library), error = identity) - if (inherits(status, "error")) - stop("failed to install renv ", version) - - } - - renv_bootstrap_tests_running <- function() { - getOption("renv.tests.running", default = FALSE) - } - - renv_bootstrap_repos <- function() { - - # check for repos override - repos <- Sys.getenv("RENV_CONFIG_REPOS_OVERRIDE", unset = NA) - if (!is.na(repos)) - return(repos) - - # check for lockfile repositories - repos <- tryCatch(renv_bootstrap_repos_lockfile(), error = identity) - if (!inherits(repos, "error") && length(repos)) - return(repos) - - # if we're testing, re-use the test repositories - if (renv_bootstrap_tests_running()) - return(getOption("renv.tests.repos")) - - # retrieve current repos - repos <- getOption("repos") - - # ensure @CRAN@ entries are resolved - repos[repos == "@CRAN@"] <- getOption( - "renv.repos.cran", - "https://cloud.r-project.org" - ) - - # add in renv.bootstrap.repos if set - default <- c(FALLBACK = "https://cloud.r-project.org") - extra <- getOption("renv.bootstrap.repos", default = default) - repos <- c(repos, extra) - - # remove duplicates that might've snuck in - dupes <- duplicated(repos) | duplicated(names(repos)) - repos[!dupes] - - } - - renv_bootstrap_repos_lockfile <- function() { - - lockpath <- Sys.getenv("RENV_PATHS_LOCKFILE", unset = "renv.lock") - if (!file.exists(lockpath)) - return(NULL) - - lockfile <- tryCatch(renv_json_read(lockpath), error = identity) - if (inherits(lockfile, "error")) { - warning(lockfile) - return(NULL) - } - - repos <- lockfile$R$Repositories - if (length(repos) == 0) - return(NULL) - - keys <- vapply(repos, `[[`, "Name", FUN.VALUE = character(1)) - vals <- vapply(repos, `[[`, "URL", FUN.VALUE = character(1)) - names(vals) <- keys - - return(vals) - - } - - renv_bootstrap_download <- function(version) { - - # if the renv version number has 4 components, assume it must - # be retrieved via github - nv <- numeric_version(version) - components <- unclass(nv)[[1]] - - # if this appears to be a development version of 'renv', we'll - # try to restore from github - dev <- length(components) == 4L - - # begin collecting different methods for finding renv - methods <- c( - renv_bootstrap_download_tarball, - if (dev) - renv_bootstrap_download_github - else c( - renv_bootstrap_download_cran_latest, - renv_bootstrap_download_cran_archive - ) - ) - - for (method in methods) { - path <- tryCatch(method(version), error = identity) - if (is.character(path) && file.exists(path)) - return(path) - } - - stop("failed to download renv ", version) - - } - - renv_bootstrap_download_impl <- function(url, destfile) { - - mode <- "wb" - - # https://bugs.r-project.org/bugzilla/show_bug.cgi?id=17715 - fixup <- - Sys.info()[["sysname"]] == "Windows" && - substring(url, 1L, 5L) == "file:" - - if (fixup) - mode <- "w+b" - - utils::download.file( - url = url, - destfile = destfile, - mode = mode, - quiet = TRUE - ) - - } - - renv_bootstrap_download_cran_latest <- function(version) { - - spec <- renv_bootstrap_download_cran_latest_find(version) - - message("* Downloading renv ", version, " ... ", appendLF = FALSE) - - type <- spec$type - repos <- spec$repos - - info <- tryCatch( - utils::download.packages( - pkgs = "renv", - destdir = tempdir(), - repos = repos, - type = type, - quiet = TRUE - ), - condition = identity - ) - - if (inherits(info, "condition")) { - message("FAILED") - return(FALSE) - } - - # report success and return - message("OK (downloaded ", type, ")") - info[1, 2] - - } - - renv_bootstrap_download_cran_latest_find <- function(version) { - - # check whether binaries are supported on this system - binary <- - getOption("renv.bootstrap.binary", default = TRUE) && - !identical(.Platform$pkgType, "source") && - !identical(getOption("pkgType"), "source") && - Sys.info()[["sysname"]] %in% c("Darwin", "Windows") - - types <- c(if (binary) "binary", "source") - - # iterate over types + repositories - for (type in types) { - for (repos in renv_bootstrap_repos()) { - - # retrieve package database - db <- tryCatch( - as.data.frame( - utils::available.packages(type = type, repos = repos), - stringsAsFactors = FALSE - ), - error = identity - ) - - if (inherits(db, "error")) - next - - # check for compatible entry - entry <- db[db$Package %in% "renv" & db$Version %in% version, ] - if (nrow(entry) == 0) - next - - # found it; return spec to caller - spec <- list(entry = entry, type = type, repos = repos) - return(spec) - - } - } - - # if we got here, we failed to find renv - fmt <- "renv %s is not available from your declared package repositories" - stop(sprintf(fmt, version)) - - } - - renv_bootstrap_download_cran_archive <- function(version) { - - name <- sprintf("renv_%s.tar.gz", version) - repos <- renv_bootstrap_repos() - urls <- file.path(repos, "src/contrib/Archive/renv", name) - destfile <- file.path(tempdir(), name) - - message("* Downloading renv ", version, " ... ", appendLF = FALSE) - - for (url in urls) { - - status <- tryCatch( - renv_bootstrap_download_impl(url, destfile), - condition = identity - ) - - if (identical(status, 0L)) { - message("OK") - return(destfile) - } - - } - - message("FAILED") - return(FALSE) - - } - - renv_bootstrap_download_tarball <- function(version) { - - # if the user has provided the path to a tarball via - # an environment variable, then use it - tarball <- Sys.getenv("RENV_BOOTSTRAP_TARBALL", unset = NA) - if (is.na(tarball)) - return() - - # allow directories - info <- file.info(tarball, extra_cols = FALSE) - if (identical(info$isdir, TRUE)) { - name <- sprintf("renv_%s.tar.gz", version) - tarball <- file.path(tarball, name) - } - - # bail if it doesn't exist - if (!file.exists(tarball)) - return() - - fmt <- "* Bootstrapping with tarball at path '%s'." - msg <- sprintf(fmt, tarball) - message(msg) - - tarball - - } - - renv_bootstrap_download_github <- function(version) { - - enabled <- Sys.getenv("RENV_BOOTSTRAP_FROM_GITHUB", unset = "TRUE") - if (!identical(enabled, "TRUE")) - return(FALSE) - - # prepare download options - pat <- Sys.getenv("GITHUB_PAT") - if (nzchar(Sys.which("curl")) && nzchar(pat)) { - fmt <- "--location --fail --header \"Authorization: token %s\"" - extra <- sprintf(fmt, pat) - saved <- options("download.file.method", "download.file.extra") - options(download.file.method = "curl", download.file.extra = extra) - on.exit(do.call(base::options, saved), add = TRUE) - } else if (nzchar(Sys.which("wget")) && nzchar(pat)) { - fmt <- "--header=\"Authorization: token %s\"" - extra <- sprintf(fmt, pat) - saved <- options("download.file.method", "download.file.extra") - options(download.file.method = "wget", download.file.extra = extra) - on.exit(do.call(base::options, saved), add = TRUE) - } - - message("* Downloading renv ", version, " from GitHub ... ", appendLF = FALSE) - - url <- file.path("https://api.github.com/repos/rstudio/renv/tarball", version) - name <- sprintf("renv_%s.tar.gz", version) - destfile <- file.path(tempdir(), name) - - status <- tryCatch( - renv_bootstrap_download_impl(url, destfile), - condition = identity - ) - - if (!identical(status, 0L)) { - message("FAILED") - return(FALSE) - } - - message("OK") - return(destfile) - - } - - renv_bootstrap_install <- function(version, tarball, library) { - - # attempt to install it into project library - message("* Installing renv ", version, " ... ", appendLF = FALSE) - dir.create(library, showWarnings = FALSE, recursive = TRUE) - - # invoke using system2 so we can capture and report output - bin <- R.home("bin") - exe <- if (Sys.info()[["sysname"]] == "Windows") "R.exe" else "R" - r <- file.path(bin, exe) - - args <- c( - "--vanilla", "CMD", "INSTALL", "--no-multiarch", - "-l", shQuote(path.expand(library)), - shQuote(path.expand(tarball)) - ) - - output <- system2(r, args, stdout = TRUE, stderr = TRUE) - message("Done!") - - # check for successful install - status <- attr(output, "status") - if (is.numeric(status) && !identical(status, 0L)) { - header <- "Error installing renv:" - lines <- paste(rep.int("=", nchar(header)), collapse = "") - text <- c(header, lines, output) - writeLines(text, con = stderr()) - } - - status - - } - - renv_bootstrap_platform_prefix <- function() { - - # construct version prefix - version <- paste(R.version$major, R.version$minor, sep = ".") - prefix <- paste("R", numeric_version(version)[1, 1:2], sep = "-") - - # include SVN revision for development versions of R - # (to avoid sharing platform-specific artefacts with released versions of R) - devel <- - identical(R.version[["status"]], "Under development (unstable)") || - identical(R.version[["nickname"]], "Unsuffered Consequences") - - if (devel) - prefix <- paste(prefix, R.version[["svn rev"]], sep = "-r") - - # build list of path components - components <- c(prefix, R.version$platform) - - # include prefix if provided by user - prefix <- renv_bootstrap_platform_prefix_impl() - if (!is.na(prefix) && nzchar(prefix)) - components <- c(prefix, components) - - # build prefix - paste(components, collapse = "/") - - } - - renv_bootstrap_platform_prefix_impl <- function() { - - # if an explicit prefix has been supplied, use it - prefix <- Sys.getenv("RENV_PATHS_PREFIX", unset = NA) - if (!is.na(prefix)) - return(prefix) - - # if the user has requested an automatic prefix, generate it - auto <- Sys.getenv("RENV_PATHS_PREFIX_AUTO", unset = NA) - if (auto %in% c("TRUE", "True", "true", "1")) - return(renv_bootstrap_platform_prefix_auto()) - - # empty string on failure - "" - - } - - renv_bootstrap_platform_prefix_auto <- function() { - - prefix <- tryCatch(renv_bootstrap_platform_os(), error = identity) - if (inherits(prefix, "error") || prefix %in% "unknown") { - - msg <- paste( - "failed to infer current operating system", - "please file a bug report at https://github.com/rstudio/renv/issues", - sep = "; " - ) - - warning(msg) - - } - - prefix - - } - - renv_bootstrap_platform_os <- function() { - - sysinfo <- Sys.info() - sysname <- sysinfo[["sysname"]] - - # handle Windows + macOS up front - if (sysname == "Windows") - return("windows") - else if (sysname == "Darwin") - return("macos") - - # check for os-release files - for (file in c("/etc/os-release", "/usr/lib/os-release")) - if (file.exists(file)) - return(renv_bootstrap_platform_os_via_os_release(file, sysinfo)) - - # check for redhat-release files - if (file.exists("/etc/redhat-release")) - return(renv_bootstrap_platform_os_via_redhat_release()) - - "unknown" - - } - - renv_bootstrap_platform_os_via_os_release <- function(file, sysinfo) { - - # read /etc/os-release - release <- utils::read.table( - file = file, - sep = "=", - quote = c("\"", "'"), - col.names = c("Key", "Value"), - comment.char = "#", - stringsAsFactors = FALSE - ) - - vars <- as.list(release$Value) - names(vars) <- release$Key - - # get os name - os <- tolower(sysinfo[["sysname"]]) - - # read id - id <- "unknown" - for (field in c("ID", "ID_LIKE")) { - if (field %in% names(vars) && nzchar(vars[[field]])) { - id <- vars[[field]] - break - } - } - - # read version - version <- "unknown" - for (field in c("UBUNTU_CODENAME", "VERSION_CODENAME", "VERSION_ID", "BUILD_ID")) { - if (field %in% names(vars) && nzchar(vars[[field]])) { - version <- vars[[field]] - break - } - } - - # join together - paste(c(os, id, version), collapse = "-") - - } - - renv_bootstrap_platform_os_via_redhat_release <- function() { - - # read /etc/redhat-release - contents <- readLines("/etc/redhat-release", warn = FALSE) - - # infer id - id <- if (grepl("centos", contents, ignore.case = TRUE)) - "centos" - else if (grepl("redhat", contents, ignore.case = TRUE)) - "redhat" - else - "unknown" - - # try to find a version component (very hacky) - version <- "unknown" - - parts <- strsplit(contents, "[[:space:]]")[[1L]] - for (part in parts) { - - nv <- tryCatch(numeric_version(part), error = identity) - if (inherits(nv, "error")) - next - - version <- nv[1, 1] - break - - } - - paste(c("linux", id, version), collapse = "-") - - } - - renv_bootstrap_library_root_name <- function(project) { - - # use project name as-is if requested - asis <- Sys.getenv("RENV_PATHS_LIBRARY_ROOT_ASIS", unset = "FALSE") - if (asis) - return(basename(project)) - - # otherwise, disambiguate based on project's path - id <- substring(renv_bootstrap_hash_text(project), 1L, 8L) - paste(basename(project), id, sep = "-") - - } - - renv_bootstrap_library_root <- function(project) { - - prefix <- renv_bootstrap_profile_prefix() - - path <- Sys.getenv("RENV_PATHS_LIBRARY", unset = NA) - if (!is.na(path)) - return(paste(c(path, prefix), collapse = "/")) - - path <- renv_bootstrap_library_root_impl(project) - if (!is.null(path)) { - name <- renv_bootstrap_library_root_name(project) - return(paste(c(path, prefix, name), collapse = "/")) - } - - renv_bootstrap_paths_renv("library", project = project) - - } - - renv_bootstrap_library_root_impl <- function(project) { - - root <- Sys.getenv("RENV_PATHS_LIBRARY_ROOT", unset = NA) - if (!is.na(root)) - return(root) - - type <- renv_bootstrap_project_type(project) - if (identical(type, "package")) { - userdir <- renv_bootstrap_user_dir() - return(file.path(userdir, "library")) - } - - } - - renv_bootstrap_validate_version <- function(version) { - - loadedversion <- utils::packageDescription("renv", fields = "Version") - if (version == loadedversion) - return(TRUE) - - # assume four-component versions are from GitHub; three-component - # versions are from CRAN - components <- strsplit(loadedversion, "[.-]")[[1]] - remote <- if (length(components) == 4L) - paste("rstudio/renv", loadedversion, sep = "@") - else - paste("renv", loadedversion, sep = "@") - - fmt <- paste( - "renv %1$s was loaded from project library, but this project is configured to use renv %2$s.", - "Use `renv::record(\"%3$s\")` to record renv %1$s in the lockfile.", - "Use `renv::restore(packages = \"renv\")` to install renv %2$s into the project library.", - sep = "\n" - ) - - msg <- sprintf(fmt, loadedversion, version, remote) - warning(msg, call. = FALSE) - - FALSE - - } - - renv_bootstrap_hash_text <- function(text) { - - hashfile <- tempfile("renv-hash-") - on.exit(unlink(hashfile), add = TRUE) - - writeLines(text, con = hashfile) - tools::md5sum(hashfile) - - } - - renv_bootstrap_load <- function(project, libpath, version) { - - # try to load renv from the project library - if (!requireNamespace("renv", lib.loc = libpath, quietly = TRUE)) - return(FALSE) - - # warn if the version of renv loaded does not match - renv_bootstrap_validate_version(version) - - # load the project - renv::load(project) - - TRUE - - } - - renv_bootstrap_profile_load <- function(project) { - - # if RENV_PROFILE is already set, just use that - profile <- Sys.getenv("RENV_PROFILE", unset = NA) - if (!is.na(profile) && nzchar(profile)) - return(profile) - - # check for a profile file (nothing to do if it doesn't exist) - path <- renv_bootstrap_paths_renv("profile", profile = FALSE) - if (!file.exists(path)) - return(NULL) - - # read the profile, and set it if it exists - contents <- readLines(path, warn = FALSE) - if (length(contents) == 0L) - return(NULL) - - # set RENV_PROFILE - profile <- contents[[1L]] - if (!profile %in% c("", "default")) - Sys.setenv(RENV_PROFILE = profile) - - profile - - } - - renv_bootstrap_profile_prefix <- function() { - profile <- renv_bootstrap_profile_get() - if (!is.null(profile)) - return(file.path("profiles", profile, "renv")) - } - - renv_bootstrap_profile_get <- function() { - profile <- Sys.getenv("RENV_PROFILE", unset = "") - renv_bootstrap_profile_normalize(profile) - } - - renv_bootstrap_profile_set <- function(profile) { - profile <- renv_bootstrap_profile_normalize(profile) - if (is.null(profile)) - Sys.unsetenv("RENV_PROFILE") - else - Sys.setenv(RENV_PROFILE = profile) - } - - renv_bootstrap_profile_normalize <- function(profile) { - - if (is.null(profile) || profile %in% c("", "default")) - return(NULL) - - profile - - } - - renv_bootstrap_path_absolute <- function(path) { - - substr(path, 1L, 1L) %in% c("~", "/", "\\") || ( - substr(path, 1L, 1L) %in% c(letters, LETTERS) && - substr(path, 2L, 3L) %in% c(":/", ":\\") - ) - - } - - renv_bootstrap_paths_renv <- function(..., profile = TRUE, project = NULL) { - renv <- Sys.getenv("RENV_PATHS_RENV", unset = "renv") - root <- if (renv_bootstrap_path_absolute(renv)) NULL else project - prefix <- if (profile) renv_bootstrap_profile_prefix() - components <- c(root, renv, prefix, ...) - paste(components, collapse = "/") - } - - renv_bootstrap_project_type <- function(path) { - - descpath <- file.path(path, "DESCRIPTION") - if (!file.exists(descpath)) - return("unknown") - - desc <- tryCatch( - read.dcf(descpath, all = TRUE), - error = identity - ) - - if (inherits(desc, "error")) - return("unknown") - - type <- desc$Type - if (!is.null(type)) - return(tolower(type)) - - package <- desc$Package - if (!is.null(package)) - return("package") - - "unknown" - - } - - renv_bootstrap_user_dir <- function() { - dir <- renv_bootstrap_user_dir_impl() - path.expand(chartr("\\", "/", dir)) - } - - renv_bootstrap_user_dir_impl <- function() { - - # use local override if set - override <- getOption("renv.userdir.override") - if (!is.null(override)) - return(override) - - # use R_user_dir if available - tools <- asNamespace("tools") - if (is.function(tools$R_user_dir)) - return(tools$R_user_dir("renv", "cache")) - - # try using our own backfill for older versions of R - envvars <- c("R_USER_CACHE_DIR", "XDG_CACHE_HOME") - for (envvar in envvars) { - root <- Sys.getenv(envvar, unset = NA) - if (!is.na(root)) - return(file.path(root, "R/renv")) - } - - # use platform-specific default fallbacks - if (Sys.info()[["sysname"]] == "Windows") - file.path(Sys.getenv("LOCALAPPDATA"), "R/cache/R/renv") - else if (Sys.info()[["sysname"]] == "Darwin") - "~/Library/Caches/org.R-project.R/R/renv" - else - "~/.cache/R/renv" - - } - - - renv_json_read <- function(file = NULL, text = NULL) { - - text <- paste(text %||% read(file), collapse = "\n") - - # find strings in the JSON - pattern <- '["](?:(?:\\\\.)|(?:[^"\\\\]))*?["]' - locs <- gregexpr(pattern, text)[[1]] - - # if any are found, replace them with placeholders - replaced <- text - strings <- character() - replacements <- character() - - if (!identical(c(locs), -1L)) { - - # get the string values - starts <- locs - ends <- locs + attr(locs, "match.length") - 1L - strings <- substring(text, starts, ends) - - # only keep those requiring escaping - strings <- grep("[[\\]{}:]", strings, perl = TRUE, value = TRUE) - - # compute replacements - replacements <- sprintf('"\032%i\032"', seq_along(strings)) - - # replace the strings - mapply(function(string, replacement) { - replaced <<- sub(string, replacement, replaced, fixed = TRUE) - }, strings, replacements) - - } - - # transform the JSON into something the R parser understands - transformed <- replaced - transformed <- gsub("[[{]", "list(", transformed) - transformed <- gsub("[]}]", ")", transformed) - transformed <- gsub(":", "=", transformed, fixed = TRUE) - text <- paste(transformed, collapse = "\n") - - # parse it - json <- parse(text = text, keep.source = FALSE, srcfile = NULL)[[1L]] - - # construct map between source strings, replaced strings - map <- as.character(parse(text = strings)) - names(map) <- as.character(parse(text = replacements)) - - # convert to list - map <- as.list(map) - - # remap strings in object - remapped <- renv_json_remap(json, map) - - # evaluate - eval(remapped, envir = baseenv()) - - } - - renv_json_remap <- function(json, map) { - - # fix names - if (!is.null(names(json))) { - lhs <- match(names(json), names(map), nomatch = 0L) - rhs <- match(names(map), names(json), nomatch = 0L) - names(json)[rhs] <- map[lhs] - } - - # fix values - if (is.character(json)) - return(map[[json]] %||% json) - - # handle true, false, null - if (is.name(json)) { - text <- as.character(json) - if (text == "true") - return(TRUE) - else if (text == "false") - return(FALSE) - else if (text == "null") - return(NULL) - } - - # recurse - if (is.recursive(json)) { - for (i in seq_along(json)) { - json[i] <- list(renv_json_remap(json[[i]], map)) - } - } - - json - - } - - # load the renv profile, if any - renv_bootstrap_profile_load(project) - - # construct path to library root - root <- renv_bootstrap_library_root(project) - - # construct library prefix for platform - prefix <- renv_bootstrap_platform_prefix() - - # construct full libpath - libpath <- file.path(root, prefix) - - # attempt to load - if (renv_bootstrap_load(project, libpath, version)) - return(TRUE) - - # load failed; inform user we're about to bootstrap - prefix <- paste("# Bootstrapping renv", version) - postfix <- paste(rep.int("-", 77L - nchar(prefix)), collapse = "") - header <- paste(prefix, postfix) - message(header) - - # perform bootstrap - bootstrap(version, libpath) - - # exit early if we're just testing bootstrap - if (!is.na(Sys.getenv("RENV_BOOTSTRAP_INSTALL_ONLY", unset = NA))) - return(TRUE) - - # try again to load - if (requireNamespace("renv", lib.loc = libpath, quietly = TRUE)) { - message("* Successfully installed and loaded renv ", version, ".") - return(renv::load()) - } - - # failed to download or load renv; warn the user - msg <- c( - "Failed to find an renv installation: the project will not be loaded.", - "Use `renv::activate()` to re-initialize the project." - ) - - warning(paste(msg, collapse = "\n"), call. = FALSE) - -}) diff --git a/renv/settings.dcf b/renv/settings.dcf deleted file mode 100644 index 169d82f1b..000000000 --- a/renv/settings.dcf +++ /dev/null @@ -1,10 +0,0 @@ -bioconductor.version: -external.libraries: -ignored.packages: -package.dependency.fields: Imports, Depends, LinkingTo -r.version: -snapshot.type: implicit -use.cache: TRUE -vcs.ignore.cellar: TRUE -vcs.ignore.library: TRUE -vcs.ignore.local: TRUE From 1cfa734b9243adf566da3e9ab544703cac4d8456 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Mon, 22 Aug 2022 13:29:36 -0400 Subject: [PATCH 173/979] Remove browser() call --- R/preprocessing5.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 57056212d..4f69f49de 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -836,7 +836,6 @@ VST.DelayedMatrix <- function( if (isTRUE(x = verbose)) { close(con = pb) } - browser() hvf.info <- do.call(what = 'rbind', args = hvf.info) # Set variable status hvf.info$variable <- FALSE From 192a87f3b91e412340721d64b5a4cc7f37310b15 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Mon, 22 Aug 2022 15:34:30 -0400 Subject: [PATCH 174/979] Fix VST for delayed matrices --- R/preprocessing5.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 4f69f49de..9af502b06 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -726,7 +726,7 @@ VST.DelayedMatrix <- function( # DelayedArray::colMeans(x = data) # } # Calculate variance - hvf.info$variance <- NA_real_ + # hvf.info$variance <- NA_real_ if (isTRUE(x = verbose)) { # inform(message = "Calculating feature variances") inform(message = "Identifying variable features") From 9ed93ee4eff4625fdfb42e6e0016634ee08b357b Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 24 Aug 2022 12:33:02 -0400 Subject: [PATCH 175/979] Fixes for FeaturePlot --- R/visualization.R | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/R/visualization.R b/R/visualization.R index ef7fe5943..ee58d4205 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -1105,7 +1105,7 @@ FeaturePlot <- function( } else if (!all(dims %in% colnames(x = data))) { stop("The dimensions requested were not found", call. = FALSE) } - features <- colnames(x = data)[4:ncol(x = data)] + features <- setdiff(x = names(x = data), y = c(dims, 'ident')) # Determine cutoffs min.cutoff <- mapply( FUN = function(cutoff, feature) { @@ -1137,37 +1137,32 @@ FeaturePlot <- function( if (length(x = check.lengths) != 1) { stop("There must be the same number of minimum and maximum cuttoffs as there are features") } + names(x = min.cutoff) <- names(x = max.cutoff) <- features brewer.gran <- ifelse( test = length(x = cols) == 1, yes = brewer.pal.info[cols, ]$maxcolors, no = length(x = cols) ) # Apply cutoffs - data[, 4:ncol(x = data)] <- sapply( - X = 4:ncol(x = data), - FUN = function(index) { - data.feature <- as.vector(x = data[, index]) - min.use <- SetQuantile(cutoff = min.cutoff[index - 3], data.feature) - max.use <- SetQuantile(cutoff = max.cutoff[index - 3], data.feature) - data.feature[data.feature < min.use] <- min.use - data.feature[data.feature > max.use] <- max.use - if (brewer.gran == 2) { - return(data.feature) - } - data.cut <- if (all(data.feature == 0)) { - 0 - } - else { + for (i in seq_along(along.with = features)) { + f <- features[i] + data.feature <- data[[f]] + min.use <- SetQuantile(cutoff = min.cutoff[f], data = data.feature) + max.use <- SetQuantile(cutoff = max.cutoff[f], data = data.feature) + data.feature[data.feature < min.use] <- min.use + data.feature[data.feature > max.use] <- max.use + if (brewer.gran != 2) { + data.feature <- if (all(data.feature == 0)) { + rep_len(x = 0, length.out = length(x = data.feature)) + } else { as.numeric(x = as.factor(x = cut( x = as.numeric(x = data.feature), - breaks = brewer.gran + breaks = 2 ))) } - return(data.cut) } - ) - colnames(x = data)[4:ncol(x = data)] <- features - rownames(x = data) <- cells + data[[f]] <- data.feature + } # Figure out splits (FeatureHeatmap) data$split <- if (is.null(x = split.by)) { RandomName() From 8a5543a7b82ffa9785fd86a4aa53f852230cea2f Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Fri, 2 Sep 2022 12:15:15 -0400 Subject: [PATCH 176/979] Update remotes --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 88bb09f33..8a9da8afb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -24,7 +24,7 @@ Authors@R: c( ) URL: https://satijalab.org/seurat, https://github.com/satijalab/seurat BugReports: https://github.com/satijalab/seurat/issues -Remotes: mojaveazure/seurat-object@feat/standard +Remotes: mojaveazure/seurat-object@feat/docs5 Depends: R (>= 4.0.0), methods From 246af2b25a53902fb489206847d27bdd7b4c93e9 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Wed, 7 Sep 2022 15:51:40 -0400 Subject: [PATCH 177/979] Add SCT v5 support --- R/preprocessing.R | 11 +- R/preprocessing5.R | 483 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 489 insertions(+), 5 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index c0d954e1a..9ba5b8d1f 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -1419,7 +1419,7 @@ SampleUMI <- function( #' Use regularized negative binomial regression to normalize UMI count data #' #' This function calls sctransform::vst. The sctransform package is available at -#' https://github.com/ChristophH/sctransform. +#' https://github.com/satijalab/sctransform. #' Use this function as an alternative to the NormalizeData, #' FindVariableFeatures, ScaleData workflow. Results are saved in a new assay #' (named SCT by default) with counts being (corrected) counts, data being log1p(counts), @@ -1819,7 +1819,7 @@ SCTransform.Assay <- function( #' SCTransform.Seurat <- function( object, - assay = 'RNA', + assay = NULL, new.assay.name = 'SCT', reference.SCT.model = NULL, do.correct.umi = TRUE, @@ -1838,11 +1838,12 @@ SCTransform.Seurat <- function( ... ) { assay <- assay %||% DefaultAssay(object = object) - umi <- GetAssay(object = object, assay = assay, slot = "counts") - assay.obj <- GetAssay(object = object, assay = assay) + if (verbose){ + message("Running SCTransform on assay: ", assay) + } cell.attr <- slot(object = object, name = 'meta.data') - assay.data <- SCTransform(object = assay.obj, + assay.data <- SCTransform(object = object[[assay]], cell.attr = cell.attr, reference.SCT.model = reference.SCT.model, do.correct.umi = do.correct.umi, diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 9af502b06..ca3e8cd5a 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1215,3 +1215,486 @@ VST.matrix <- function( #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # S4 Methods #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + + + +################################################################################ +################################# SCTransform ################################## +################################################################################ + + +#' @importFrom SeuratObject Cells DefaultLayer DefaultLayer<- Features +#' LayerData LayerData<- +#' +#' @method SCTransform StdAssay +#' @export +#' +SCTransform.StdAssay <- function( + object, + layer = 'counts', + cell.attr = NULL, + reference.SCT.model = NULL, + do.correct.umi = TRUE, + ncells = 5000, + residual.features = NULL, + variable.features.n = 3000, + variable.features.rv.th = 1.3, + vars.to.regress = NULL, + do.scale = FALSE, + do.center = TRUE, + clip.range = c(-sqrt(x = ncol(x = object) / 30), sqrt(x = ncol(x = object) / 30)), + conserve.memory = FALSE, + return.only.var.genes = TRUE, + seed.use = 1448145, + verbose = TRUE, + ... +) { + olayer <- layer <- unique(x = layer) %||% DefaultLayer(object = object) + layers <- Layers(object = object, search = layer) + dataset.names <- gsub(pattern = paste0(layer, "."), replacement = "", x = layers) + sct.assay.list <- list() + for (i in seq_along(along.with = layers)) { + l <- layers[i] + if (isTRUE(x = verbose)) { + message("Running SCTransform on layer: ", l) + } + counts <- LayerData( + object = object, + layer = l, + features = Features(x = object, layer = l), + cells = Cells(x = object, layer = l) + ) + feature.grid <- DelayedArray::rowAutoGrid(x = counts) + ##TODO: handle this later for boundary conditions + cells.grid <- DelayedArray::colAutoGrid(x = counts, ncol = 2000) + + ##TODO: handle this later for boundary conditions + vp <- cells.grid[[1L]] + + # Read a block from a delayed matrix + sparse <- DelayedArray::is_sparse(x = counts) # TRUE + block <- DelayedArray::read_block(x = counts, viewport = vp, as.sparse = sparse) + + counts <- as(object = block, Class = 'dgCMatrix') + cell.attr.object <- cell.attr[colnames(x = counts),] + + if (!identical(rownames(cell.attr.object), colnames(counts))) { + print(length(setdiff(rownames(cell.attr.object), colnames(counts)))) + print(length(setdiff(colnames(counts),rownames(cell.attr.object)))) + stop("cell attribute row names must match column names of count matrix") + } + vst.out <- SCTransform(object = counts, + cell.attr = cell.attr.object, + reference.SCT.model = reference.SCT.model, + do.correct.umi = do.correct.umi, + ncells = ncells, + residual.features = residual.features, + variable.features.n = variable.features.n, + variable.features.rv.th = variable.features.rv.th, + vars.to.regress = vars.to.regress, + do.scale = do.scale, + do.center = do.center, + clip.range = clip.range, + conserve.memory = conserve.memory, + return.only.var.genes = return.only.var.genes, + seed.use = seed.use, + verbose = verbose, + ...) + + residual.type <- vst.out[['residual_type']] %||% 'pearson' + sct.method <- vst.out[['sct.method']] + # create output assay and put (corrected) umi counts in count slot + if (do.correct.umi & residual.type == 'pearson') { + if (verbose) { + message('Place corrected count matrix in counts slot') + } + assay.out <- CreateAssayObject(counts = vst.out$umi_corrected) + vst.out$umi_corrected <- NULL + } else { + # TODO: restore once check.matrix is in SeuratObject + # assay.out <- CreateAssayObject(counts = umi, check.matrix = FALSE) + assay.out <- CreateAssayObject(counts = counts) + } + # set the variable genes + VariableFeatures(object = assay.out) <- vst.out$variable_features + # put log1p transformed counts in data + assay.out <- SetAssayData( + object = assay.out, + slot = 'data', + new.data = log1p(x = GetAssayData(object = assay.out, slot = 'counts')) + ) + scale.data <- vst.out$y + assay.out <- SetAssayData( + object = assay.out, + slot = 'scale.data', + new.data = scale.data + ) + vst.out$y <- NULL + # save clip.range into vst model + vst.out$arguments$sct.clip.range <- clip.range + vst.out$arguments$sct.method <- sct.method + Misc(object = assay.out, slot = 'vst.out') <- vst.out + assay.out <- as(object = assay.out, Class = "SCTAssay") + + sct.assay.list[[dataset.names[i]]] <- assay.out + } + + # Return array by merging everythin + if (length(x = sct.assay.list)>1){ + merged.assay <- merge.SCTAssay(x = sct.assay.list[[1]], y = sct.assay.list[2:length(sct.assay.list)]) + # set the names of SCTmodels to be layer names + models <- slot(object = merged.assay, name="SCTModel.list") + names(models) <- names(x = sct.assay.list) + slot(object = merged.assay, name="SCTModel.list") <- models + } else { + return (sct.assay.list[[1]]) + } + gc(verbose = FALSE) + return(merged.assay) +} + +#' @importFrom SeuratObject DefaultAssay +#' +#' @method SCTransform Seurat5 +#' @export +#' +SCTransform.Seurat5 <- function( + object, + assay = NULL, + reference.SCT.model = NULL, + do.correct.umi = TRUE, + ncells = 5000, + residual.features = NULL, + variable.features.n = 3000, + variable.features.rv.th = 1.3, + vars.to.regress = NULL, + do.scale = FALSE, + do.center = TRUE, + clip.range = c(-sqrt(x = ncol(x = object) / 30), sqrt(x = ncol(x = object) / 30)), + conserve.memory = FALSE, + return.only.var.genes = TRUE, + seed.use = 1448145, + save.data = 'data', + save.scaledata = 'scale.data', + verbose = TRUE, + ... +) { + assay <- assay[1L] %||% DefaultAssay(object = object) + assay <- match.arg(arg = assay, choices = Assays(object = object)) + cell.attr.list <- slot(object = object, name = 'meta.data') + + object[[assay]] <- SCTransform(object = object[[assay]], + cell.attr.list = cell.attr.list, + reference.SCT.model = reference.SCT.model, + do.correct.umi = do.correct.umi, + ncells = ncells, + residual.features = residual.features, + variable.features.n = variable.features.n, + variable.features.rv.th = variable.features.rv.th, + vars.to.regress = vars.to.regress, + do.scale = do.scale, + do.center = do.center, + clip.range = clip.range, + conserve.memory = conserve.memory, + return.only.var.genes = return.only.var.genes, + seed.use = seed.use, + verbose = verbose, + ...) + return(object) +} + + +#' Calculate pearson residuals of features not in the scale.data +#' +#' This function calls sctransform::get_residuals. +#' +#' @param object A seurat object +#' @param features Name of features to add into the scale.data +#' @param assay Name of the assay of the seurat object generated by SCTransform +#' @param layer Name (prefix) of the layer to pull counts from +#' @param umi.assay Name of the assay of the seurat object containing UMI matrix +#' and the default is RNA +#' @param clip.range Numeric of length two specifying the min and max values the +#' Pearson residual will be clipped to +#' @param replace.value Recalculate residuals for all features, even if they are +#' already present. Useful if you want to change the clip.range. +#' @param na.rm For features where there is no feature model stored, return NA +#' for residual value in scale.data when na.rm = FALSE. When na.rm is TRUE, only +#' return residuals for features with a model stored for all cells. +#' @param verbose Whether to print messages and progress bars +#' +#' @return Returns a Seurat object containing Pearson residuals of added +#' features in its scale.data +#' +#' @importFrom sctransform get_residuals +#' @importFrom matrixStats rowAnyNAs +#' +#' @export +#' @concept preprocessing +#' +#' @seealso \code{\link[sctransform]{get_residuals}} +#' +#' @examples +#' data("pbmc_small") +#' pbmc_small <- SCTransform(object = pbmc_small, variable.features.n = 20) +#' pbmc_small <- GetResidual(object = pbmc_small, features = c('MS4A1', 'TCL1A')) +#' +GetResidual.V5 <- function( + object, + features, + assay = NULL, + umi.assay = "RNA", + layer = "counts", + clip.range = NULL, + replace.value = FALSE, + na.rm = TRUE, + verbose = TRUE +) { + assay <- assay %||% DefaultAssay(object = object) + if (IsSCT(assay = object[[assay]])) { + object[[assay]] <- as(object[[assay]], 'SCTAssay') + } + if (!inherits(x = object[[assay]], what = "SCTAssay")) { + stop(assay, " assay was not generated by SCTransform") + } + sct.models <- levels(x = object[[assay]]) + if (length(x = sct.models) == 0) { + warning("SCT model not present in assay", call. = FALSE, immediate. = TRUE) + return(object) + } + possible.features <- unique(x = unlist(x = lapply(X = sct.models, FUN = function(x) { + rownames(x = SCTResults(object = object[[assay]], slot = "feature.attributes", model = x)) + } + ))) + bad.features <- setdiff(x = features, y = possible.features) + if (length(x = bad.features) > 0) { + warning("The following requested features are not present in any models: ", + paste(bad.features, collapse = ", "), call. = FALSE) + features <- intersect(x = features, y = possible.features) + } + features.orig <- features + if (na.rm) { + # only compute residuals when feature model info is present in all + features <- names(x = which(x = table(unlist(x = lapply( + X = sct.models, + FUN = function(x) { + rownames(x = SCTResults(object = object[[assay]], slot = "feature.attributes", model = x)) + } + ))) == length(x = sct.models))) + if (length(x = features) == 0) { + return(object) + } + } + features <- intersect(x = features.orig, y = features) + if (length(x = sct.models) > 1 & verbose) { + message("This SCTAssay contains multiple SCT models. Computing residuals for cells using") + } + cat(sct.models) + + new.residuals <- lapply( + X = sct.models, + FUN = function(x) { + GetResidualSCTModel.V5( + object = object, + umi.assay = umi.assay, + assay = assay, + layer = layer, + SCTModel = x, + new_features = features, + replace.value = replace.value, + clip.range = clip.range, + verbose = verbose + ) + } + ) + existing.data <- GetAssayData(object = object, slot = 'scale.data', assay = assay) + all.features <- union(x = rownames(x = existing.data), y = features) + new.scale <- matrix( + data = NA, + nrow = length(x = all.features), + ncol = ncol(x = object), + dimnames = list(all.features, Cells(x = object)) + ) + if (nrow(x = existing.data) > 0){ + new.scale[1:nrow(x = existing.data), ] <- existing.data + } + if (length(x = new.residuals) == 1 & is.list(x = new.residuals)) { + new.residuals <- new.residuals[[1]] + } else { + new.residuals <- Reduce(cbind, new.residuals) + } + new.scale[rownames(x = new.residuals), colnames(x = new.residuals)] <- new.residuals + if (na.rm) { + new.scale <- new.scale[!rowAnyNAs(x = new.scale), ] + } + object <- SetAssayData( + object = object, + assay = assay, + slot = "scale.data", + new.data = new.scale + ) + if (any(!features.orig %in% rownames(x = new.scale))) { + bad.features <- features.orig[which(!features.orig %in% rownames(x = new.scale))] + warning("Residuals not computed for the following requested features: ", + paste(bad.features, collapse = ", "), call. = FALSE) + } + return(object) +} + + +# Calculate pearson residuals of features not in the scale.data +# This function is the secondary function under GetResidual +# +# @param object A seurat object +# @param features Name of features to add into the scale.data +# @param assay Name of the assay of the seurat object generated by SCTransform +# @param vst_out The SCT parameter list +# @param clip.range Numeric of length two specifying the min and max values the Pearson residual +# will be clipped to +# Useful if you want to change the clip.range. +# @param verbose Whether to print messages and progress bars +# +# @return Returns a matrix containing not-centered pearson residuals of added features +# +#' @importFrom sctransform get_residuals +# +GetResidualSCTModel.V5 <- function( + object, + assay, + umi.assay, + layer, + SCTModel, + new_features, + clip.range, + replace.value, + verbose +) { + clip.range <- clip.range %||% SCTResults(object = object[[assay]], slot = "clips", model = SCTModel)$sct + model.features <- rownames(x = SCTResults(object = object[[assay]], slot = "feature.attributes", model = SCTModel)) + #umi.assay <- SCTResults(object = object[[assay]], slot = "umi.assay", model = SCTModel) + model.cells <- Cells(x = slot(object = object[[assay]], name = "SCTModel.list")[[SCTModel]]) + sct.method <- SCTResults(object = object[[assay]], slot = "arguments", model = SCTModel)$sct.method %||% "default" + scale.data.cells <- colnames(x = GetAssayData(object = object, assay = assay, slot = "scale.data")) + if (length(x = setdiff(x = model.cells, y = scale.data.cells)) == 0) { + existing_features <- names(x = which(x = ! apply( + X = GetAssayData(object = object, assay = assay, slot = "scale.data")[, model.cells], + MARGIN = 1, + FUN = anyNA) + )) + } else { + existing_features <- character() + } + if (replace.value) { + features_to_compute <- new_features + } else { + features_to_compute <- setdiff(x = new_features, y = existing_features) + } + if (sct.method == "reference.model") { + if (verbose) { + message("sct.model ", SCTModel, " is from reference, so no residuals will be recalculated") + } + features_to_compute <- character() + } + if (!umi.assay %in% Assays(object = object)) { + warning("The umi assay (", umi.assay, ") is not present in the object. ", + "Cannot compute additional residuals.", call. = FALSE, immediate. = TRUE) + return(NULL) + } + diff_features <- setdiff(x = features_to_compute, y = model.features) + intersect_features <- intersect(x = features_to_compute, y = model.features) + if (length(x = diff_features) == 0) { + #umi <- GetAssayData(object = object, assay = umi.assay, slot = "counts" )[features_to_compute, model.cells, drop = FALSE] + print(object[[umi.assay]]) + print(layer) + #layer <- LayerData(object = object[[umi.assay]], layer = layer) + + #olayer <- layer <- unique(x = layer) %||% DefaultLayer(object = object[[umi.assay]]) + layers <- Layers(object = object[[umi.assay]], search = layer) + dataset.names <- gsub(pattern = paste0(layer, "."), replacement = "", x = layers) + vst_out <- SCTModel_to_vst(SCTModel = slot(object = object[[assay]], name = "SCTModel.list")[[SCTModel]]) + + for (i in seq_along(along.with = layers)) { + l <- layers[i] + if (isTRUE(x = verbose)) { + message("Running SCTransform on layer: ", l) + } + counts <- LayerData( + object = object, + layer = l, + features = Features(x = object, layer = l), + cells = Cells(x = object, layer = l) + ) + feature.grid <- DelayedArray::rowAutoGrid(x = counts) + ##TODO: handle this later for boundary conditions + cells.grid <- DelayedArray::colAutoGrid(x = counts, ncol = 2000) + + + new_residuals <- list() + for (i in seq_len(length.out = length(x = cells.grid))) { + vp <- cells.grid[[i]] + block <- DelayedArray::read_block(x = counts, viewport = vp, as.sparse = TRUE) + umi <- as(object = block, Class = "dgCMatrix") + if (verbose) { + message("sct.model: ", SCTModel) + } + new_residual <- get_residuals( + vst_out = vst_out, + umi = umi, + residual_type = "pearson", + res_clip_range = c(clip.min, clip.max), + verbosity = as.numeric(x = verbose) * 2 + ) + new_residual <- as.matrix(x = new_residual) + # centered data + new_residuals[[i]] <- new_residual + } + + new_residual <- do.call(what = cbind, args = new_residuals) + new_residual <- new_residual - rowMeans(x = new_residual) + + } + + } else { + warning( + "In the SCTModel ", SCTModel, ", the following ", length(x = diff_features), + " features do not exist in the counts slot: ", paste(diff_features, collapse = ", ") + ) + if (length(x = intersect_features) == 0) { + return(matrix( + data = NA, + nrow = length(x = features_to_compute), + ncol = length(x = model.cells), + dimnames = list(features_to_compute, model.cells) + )) + } + umi <- GetAssayData(object = object, assay = umi.assay, slot = "counts")[intersect_features, model.cells, drop = FALSE] + } + clip.max <- max(clip.range) + clip.min <- min(clip.range) + if (nrow(x = umi) > 0) { + vst_out <- SCTModel_to_vst(SCTModel = slot(object = object[[assay]], name = "SCTModel.list")[[SCTModel]]) + if (verbose) { + message("sct.model: ", SCTModel) + } + new_residual <- get_residuals( + vst_out = vst_out, + umi = umi, + residual_type = "pearson", + res_clip_range = c(clip.min, clip.max), + verbosity = as.numeric(x = verbose) * 2 + ) + new_residual <- as.matrix(x = new_residual) + # centered data + new_residual <- new_residual - rowMeans(x = new_residual) + } else { + new_residual <- matrix(data = NA, nrow = 0, ncol = length(x = model.cells), dimnames = list(c(), model.cells)) + } + old.features <- setdiff(x = new_features, y = features_to_compute) + if (length(x = old.features) > 0) { + old_residuals <- GetAssayData(object = object[[assay]], slot = "scale.data")[old.features, model.cells, drop = FALSE] + new_residual <- rbind(new_residual, old_residuals)[new_features, ] + } + return(new_residual) +} + From c735803680bb53071e01636c60f712c354c476c6 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 7 Sep 2022 15:52:37 -0400 Subject: [PATCH 178/979] project delay array --- R/integration.R | 52 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/R/integration.R b/R/integration.R index 5dba9256b..ab1e47eea 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5749,3 +5749,55 @@ ValidateParams_IntegrateEmbeddings_TransferAnchors <- function( } } } + + +## project delayed array to reference PCA + + +ProjectCellEmbeddings_DelayedAssay <- function( + query.data, + block.size = 1e9, + reference, + assay = NULL, + reduction, + dims = NULL, + feature.mean = NULL, + feature.sd = NULL +) { + RowMeanSparse <- sparseMatrixStats::rowMeans2 + RowVarSparse <- sparseMatrixStats::rowVars + dims <- dims %||% 1:ncol(reference[[reduction]]) + assay <- assay %||% DefaultAssay(reference) + features <- intersect(rownames(query.data), + rownames(reference[[reduction]]@feature.loadings)) + query.data <- query.data[features,] + feature.mean <- feature.mean[features] %||% + RowMeanSparse(x = LayerData(object = reference[[assay]], layer = 'data')[features,]) + + feature.sd <- feature.sd[features] %||% + sqrt(RowVarSparse(x = LayerData(object = reference[[assay]], layer = 'data')[features,])) + feature.sd <- MinMax(feature.sd, max = max(feature.sd), min = 0.1) + + + setAutoBlockSize(size = block.size) # 1 GB + cells.grid <- DelayedArray::colAutoGrid(x = query.data) + + emb.list <- list() + for (i in seq_len(length.out = length(x = cells.grid))) { + vp <- cells.grid[[i]] + data.block <- DelayedArray::read_block(x = query.data, + viewport = vp, + as.sparse = TRUE) + data.block <- t(apply(data.block, MARGIN = 2, function(x) { + x <- (x - feature.mean)/feature.sd + return(x) + })) + emb.block <- data.block %*% reference[[reduction]]@feature.loadings[features,] + emb.list[[i]] <- t(emb.block) + } + # list to matrix, column has to be cells + emb.mat <- t(matrix(data = unlist(emb.list), nrow = length(dims) , ncol = ncol(query.data))) + rownames(emb.mat) <- colnames(query.data) + colnames(emb.mat) <- colnames(reference[[reduction]]@cell.embeddings)[dims] + return(emb.mat) +} \ No newline at end of file From 5bc6072dd209b27143ab6f1007c2a10ee98c4a71 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Wed, 7 Sep 2022 23:20:03 -0400 Subject: [PATCH 179/979] Add fetchresiduals --- R/preprocessing5.R | 255 ++++++++++++++++++++++++++++----------------- 1 file changed, 159 insertions(+), 96 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index ca3e8cd5a..242f0894c 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1280,8 +1280,8 @@ SCTransform.StdAssay <- function( cell.attr.object <- cell.attr[colnames(x = counts),] if (!identical(rownames(cell.attr.object), colnames(counts))) { - print(length(setdiff(rownames(cell.attr.object), colnames(counts)))) - print(length(setdiff(colnames(counts),rownames(cell.attr.object)))) + # print(length(setdiff(rownames(cell.attr.object), colnames(counts)))) + # print(length(setdiff(colnames(counts),rownames(cell.attr.object)))) stop("cell attribute row names must match column names of count matrix") } vst.out <- SCTransform(object = counts, @@ -1440,13 +1440,14 @@ SCTransform.Seurat5 <- function( #' pbmc_small <- SCTransform(object = pbmc_small, variable.features.n = 20) #' pbmc_small <- GetResidual(object = pbmc_small, features = c('MS4A1', 'TCL1A')) #' -GetResidual.V5 <- function( +FetchResiduals <- function( object, features, assay = NULL, umi.assay = "RNA", layer = "counts", clip.range = NULL, + reference.SCT.model = NULL, replace.value = FALSE, na.rm = TRUE, verbose = TRUE @@ -1490,34 +1491,70 @@ GetResidual.V5 <- function( if (length(x = sct.models) > 1 & verbose) { message("This SCTAssay contains multiple SCT models. Computing residuals for cells using") } - cat(sct.models) - new.residuals <- lapply( - X = sct.models, - FUN = function(x) { - GetResidualSCTModel.V5( - object = object, - umi.assay = umi.assay, - assay = assay, - layer = layer, - SCTModel = x, - new_features = features, - replace.value = replace.value, - clip.range = clip.range, - verbose = verbose - ) + # Get all (count) layers + layers <- Layers(object = object[[umi.assay]], search = layer) + + # iterate over layer running sct model for each of the object names + new.residuals <- list() + total_cells <- 0 + all_cells <- c() + if (!is.null(x = reference.SCT.model) ) { + + if (inherits(x = reference.SCT.model, what = "SCTModel")) { + reference.SCT.model <- SCTModel_to_vst(SCTModel = reference.SCT.model) } - ) + + if (is.list(x = reference.SCT.model) & inherits(x = reference.SCT.model[[1]], what = "SCTModel")) { + stop("reference.SCT.model must be one SCTModel rather than a list of SCTModel") + } + #print(reference.SCT.model) + #if ('latent_var' %in% names(x = vst.args)) { + # stop('custom latent variables are not supported when reference.SCT.model is given') + #} + if (reference.SCT.model$model_str != 'y ~ log_umi') { + stop('reference.SCT.model must be derived using default SCT regression formula, `y ~ log_umi`') + } + + } + for (i in seq_along(along.with = layers)){ + l <- layers[i] + sct_model <- sct.models[[i]] + # these cells belong to this layer + layer_cells <- Cells(x = object[[umi.assay]], layer = l) + all_cells <- c(all_cells, layer_cells) + total_cells <- total_cells + length(layer_cells) + #cat(layer_cells) + #stop("") + # calculate residual using this model and these cells + new.residuals[[i]] <- GetResidualSCTModel.V5( + object = object, + umi.assay = umi.assay, + assay = assay, + layer = l, + layer.cells = layer_cells, + SCTModel = sct_model, + reference.SCT.model = reference.SCT.model, + new_features = features, + replace.value = replace.value, + clip.range = clip.range, + verbose = verbose + ) + + } + existing.data <- GetAssayData(object = object, slot = 'scale.data', assay = assay) all.features <- union(x = rownames(x = existing.data), y = features) new.scale <- matrix( data = NA, nrow = length(x = all.features), - ncol = ncol(x = object), - dimnames = list(all.features, Cells(x = object)) + ncol = total_cells, + dimnames = list(all.features, all_cells) ) + + common_cells <- intersect(colnames(new.scale), colnames(existing.data)) if (nrow(x = existing.data) > 0){ - new.scale[1:nrow(x = existing.data), ] <- existing.data + new.scale[rownames(x = existing.data), common_cells] <- existing.data[, common_cells] } if (length(x = new.residuals) == 1 & is.list(x = new.residuals)) { new.residuals <- new.residuals[[1]] @@ -1528,18 +1565,19 @@ GetResidual.V5 <- function( if (na.rm) { new.scale <- new.scale[!rowAnyNAs(x = new.scale), ] } - object <- SetAssayData( - object = object, - assay = assay, - slot = "scale.data", - new.data = new.scale - ) + #object <- SetAssayData( + # object = object, + # assay = assay, + # slot = "scale.data", + # new.data = new.scale + #) + # object[[assay]]@scale.data <- new.scale if (any(!features.orig %in% rownames(x = new.scale))) { bad.features <- features.orig[which(!features.orig %in% rownames(x = new.scale))] warning("Residuals not computed for the following requested features: ", paste(bad.features, collapse = ", "), call. = FALSE) } - return(object) + return(new.scale) } @@ -1558,27 +1596,43 @@ GetResidual.V5 <- function( # @return Returns a matrix containing not-centered pearson residuals of added features # #' @importFrom sctransform get_residuals +#' @importFrom Matrix colSums # GetResidualSCTModel.V5 <- function( object, assay, umi.assay, layer, + layer.cells, SCTModel, + reference.SCT.model, new_features, clip.range, replace.value, verbose ) { clip.range <- clip.range %||% SCTResults(object = object[[assay]], slot = "clips", model = SCTModel)$sct + model.features <- rownames(x = SCTResults(object = object[[assay]], slot = "feature.attributes", model = SCTModel)) - #umi.assay <- SCTResults(object = object[[assay]], slot = "umi.assay", model = SCTModel) model.cells <- Cells(x = slot(object = object[[assay]], name = "SCTModel.list")[[SCTModel]]) + sct.method <- SCTResults(object = object[[assay]], slot = "arguments", model = SCTModel)$sct.method %||% "default" + + if (!is.null(reference.SCT.model)){ + # use reference SCT model + sct.method <- "reference" + } scale.data.cells <- colnames(x = GetAssayData(object = object, assay = assay, slot = "scale.data")) - if (length(x = setdiff(x = model.cells, y = scale.data.cells)) == 0) { + + scale.data.cells.common <- intersect(colnames(x = scale.data.cells), layer.cells) + scale.data.cells <- intersect(x = scale.data.cells, y = scale.data.cells.common) + + if (length(x = setdiff(x = layer.cells, y = scale.data.cells)) == 0) { + existing.scale.data <- suppressWarnings(GetAssayData(object = object, assay = assay, slot = "scale.data")) + full.scale.data <- matrix(data = NA, nrow = nrow(x = existing.scale.data), ncol = length(x = layer.cells), dimnames = list(rownames(x = existing.scale.data), layer.cells)) + full.scale.data[rownames(x = existing.scale.data), colnames(x = existing.scale.data)] <- existing.scale.data existing_features <- names(x = which(x = ! apply( - X = GetAssayData(object = object, assay = assay, slot = "scale.data")[, model.cells], + X = full.scale.data, MARGIN = 1, FUN = anyNA) )) @@ -1601,66 +1655,94 @@ GetResidualSCTModel.V5 <- function( "Cannot compute additional residuals.", call. = FALSE, immediate. = TRUE) return(NULL) } + # these features do not have feature attriutes diff_features <- setdiff(x = features_to_compute, y = model.features) intersect_features <- intersect(x = features_to_compute, y = model.features) - if (length(x = diff_features) == 0) { - #umi <- GetAssayData(object = object, assay = umi.assay, slot = "counts" )[features_to_compute, model.cells, drop = FALSE] - print(object[[umi.assay]]) - print(layer) - #layer <- LayerData(object = object[[umi.assay]], layer = layer) - - #olayer <- layer <- unique(x = layer) %||% DefaultLayer(object = object[[umi.assay]]) - layers <- Layers(object = object[[umi.assay]], search = layer) - dataset.names <- gsub(pattern = paste0(layer, "."), replacement = "", x = layers) + if (sct.method == "reference"){ + + vst_out <- reference.SCT.model + # override clip.range + clip.range <- vst_out$arguments$sct.clip.range + umi.field <- paste0("nCount_", assay) + # get rid of the cell attributes + vst_out$cell_attr <- NULL + all.features <- intersect( + x = rownames(x = vst_out$gene_attr), + y = Features(object[[umi.assay]], layer = "counts") + ) + vst_out$gene_attr <- vst_out$gene_attr[all.features ,] + vst_out$model_pars_fit <- vst_out$model_pars_fit[all.features,] + + } else { vst_out <- SCTModel_to_vst(SCTModel = slot(object = object[[assay]], name = "SCTModel.list")[[SCTModel]]) + } + clip.max <- max(clip.range) + clip.min <- min(clip.range) - for (i in seq_along(along.with = layers)) { - l <- layers[i] - if (isTRUE(x = verbose)) { - message("Running SCTransform on layer: ", l) - } - counts <- LayerData( - object = object, - layer = l, - features = Features(x = object, layer = l), - cells = Cells(x = object, layer = l) - ) - feature.grid <- DelayedArray::rowAutoGrid(x = counts) - ##TODO: handle this later for boundary conditions - cells.grid <- DelayedArray::colAutoGrid(x = counts, ncol = 2000) + if (length(x = diff_features) == 0) { + #new_residuals <- GetAssayData(object = object, assay = assay, slot="scale.data")[features_to_compute, , drop=FALSE] + counts <- LayerData( + object = object[[umi.assay]], + layer = layer, + cells = layer.cells + ) + cells.grid <- DelayedArray::colAutoGrid(x = counts)#, ncol = 5000) + new_residuals <- list() + cat(dim(counts)) + + for (i in seq_len(length.out = length(x = cells.grid))) { + + vp <- cells.grid[[i]] + block <- DelayedArray::read_block(x = counts, viewport = vp, as.sparse = TRUE) + ## TODO: Maybe read only interesting genes + umi.all <- as(object = block, Class = "dgCMatrix") + umi <- umi.all[features_to_compute,, drop=FALSE] - new_residuals <- list() - for (i in seq_len(length.out = length(x = cells.grid))) { - vp <- cells.grid[[i]] - block <- DelayedArray::read_block(x = counts, viewport = vp, as.sparse = TRUE) - umi <- as(object = block, Class = "dgCMatrix") - if (verbose) { + ## Add cell_attr for missing cells + cell_attr <- data.frame(umi = colSums(umi.all), log_umi = log10(x = colSums(umi.all))) + cell_attr$cells_step1 <- FALSE + #cell_attr <- as.matrix(x = cell_attr) + rownames(cell_attr) <- colnames(umi) + if (sct.method == "reference") { + vst_out$cell_attr <- cell_attr + } else { + cell_attr_existing <- vst_out$cell_attr + cells_missing <- setdiff(rownames(cell_attr), rownames(cell_attr_existing)) + vst_out$cell_attr <- rbind(cell_attr_existing, cell_attr[cells_missing,]) + vst_out$cell_attr <- vst_out$cell_attr[colnames(umi),] + } + + if (verbose) { + if (sct.method == "reference") { + message("sct.model: ", reference.SCT.model) + } else { message("sct.model: ", SCTModel) } - new_residual <- get_residuals( - vst_out = vst_out, - umi = umi, - residual_type = "pearson", - res_clip_range = c(clip.min, clip.max), - verbosity = as.numeric(x = verbose) * 2 - ) - new_residual <- as.matrix(x = new_residual) - # centered data - new_residuals[[i]] <- new_residual } - new_residual <- do.call(what = cbind, args = new_residuals) - new_residual <- new_residual - rowMeans(x = new_residual) - + new_residual <- get_residuals( + vst_out = vst_out, + umi = umi, + residual_type = "pearson", + res_clip_range = c(clip.min, clip.max), + verbosity = as.numeric(x = verbose) * 2 + ) + new_residual <- as.matrix(x = new_residual) + # centered data + new_residuals[[i]] <- new_residual } - + new_residual <- do.call(what = cbind, args = new_residuals) + new_residual <- new_residual - rowMeans(x = new_residual) + #return (new_residuals) } else { + # Some features do not exist warning( "In the SCTModel ", SCTModel, ", the following ", length(x = diff_features), " features do not exist in the counts slot: ", paste(diff_features, collapse = ", ") ) if (length(x = intersect_features) == 0) { + # No features exist return(matrix( data = NA, nrow = length(x = features_to_compute), @@ -1668,28 +1750,9 @@ GetResidualSCTModel.V5 <- function( dimnames = list(features_to_compute, model.cells) )) } - umi <- GetAssayData(object = object, assay = umi.assay, slot = "counts")[intersect_features, model.cells, drop = FALSE] - } - clip.max <- max(clip.range) - clip.min <- min(clip.range) - if (nrow(x = umi) > 0) { - vst_out <- SCTModel_to_vst(SCTModel = slot(object = object[[assay]], name = "SCTModel.list")[[SCTModel]]) - if (verbose) { - message("sct.model: ", SCTModel) - } - new_residual <- get_residuals( - vst_out = vst_out, - umi = umi, - residual_type = "pearson", - res_clip_range = c(clip.min, clip.max), - verbosity = as.numeric(x = verbose) * 2 - ) - new_residual <- as.matrix(x = new_residual) - # centered data - new_residual <- new_residual - rowMeans(x = new_residual) - } else { - new_residual <- matrix(data = NA, nrow = 0, ncol = length(x = model.cells), dimnames = list(c(), model.cells)) + } + old.features <- setdiff(x = new_features, y = features_to_compute) if (length(x = old.features) > 0) { old_residuals <- GetAssayData(object = object[[assay]], slot = "scale.data")[old.features, model.cells, drop = FALSE] From 116c6734f37b489da425a978b56d84f1c4a301a8 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 8 Sep 2022 00:32:49 -0400 Subject: [PATCH 180/979] add key --- R/integration.R | 3 +++ R/objects.R | 2 ++ 2 files changed, 5 insertions(+) diff --git a/R/integration.R b/R/integration.R index ab1e47eea..7e5496e71 100644 --- a/R/integration.R +++ b/R/integration.R @@ -4494,6 +4494,7 @@ PairwiseIntegrateReference <- function( # TODO: restore once check.matrix is in SeuratObject # merged.obj[[new.assay.name]] <- CreateAssayObject(data = integrated.matrix, check.matrix = FALSE) merged.obj[[new.assay.name]] <- CreateAssayObject(data = integrated.matrix) + merged.obj[[new.assay.name]]@key <- paste0(new.assay.name,'_') DefaultAssay(object = merged.obj) <- new.assay.name object.list[[as.character(x = ii)]] <- merged.obj object.list[[merge.pair[[1]]]] <- NULL @@ -4508,6 +4509,7 @@ PairwiseIntegrateReference <- function( integrated.data <- integrated.data[, colnames(x = unintegrated)] new.assay <- new( Class = 'Assay', + key = paste0(new.assay.name, "_"), counts = new(Class = "dgCMatrix"), data = integrated.data, scale.data = matrix(), @@ -5115,6 +5117,7 @@ TransformDataMatrix <- function( new.expression <- new.expression[, colnames(object)] new.assay <- new( Class = 'Assay', + key = paste0(new.assay.name,"_"), counts = new(Class = "dgCMatrix"), data = new.expression, scale.data = matrix(), diff --git a/R/objects.R b/R/objects.R index e20b99b85..40ec11022 100644 --- a/R/objects.R +++ b/R/objects.R @@ -467,6 +467,7 @@ DietSeurat <- function( graphs = NULL ) { object <- UpdateSlots(object = object) + return(object) assays <- assays %||% FilterObjects(object = object, classes.keep = "Assay") assays <- assays[assays %in% FilterObjects(object = object, classes.keep = 'Assay')] if (length(x = assays) == 0) { @@ -492,6 +493,7 @@ DietSeurat <- function( object[[assay]] <- NULL } } else { + browser() object[[assay]] <- subset(x = object[[assay]], features = features.assay) } } From cc0e85fc5ba3079e52c47e4bf1c7275102db4009 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Thu, 8 Sep 2022 09:36:30 -0400 Subject: [PATCH 181/979] Add fetch residuals --- NAMESPACE | 3 + R/preprocessing5.R | 189 +++++++++++++++++-------------------- man/FeaturePlot.Rd | 8 +- man/IntegrateData.Rd | 6 +- man/IntegrateEmbeddings.Rd | 6 +- man/PolyFeaturePlot.Rd | 5 +- man/SCTransform.Rd | 9 +- man/Seurat-package.Rd | 2 +- 8 files changed, 110 insertions(+), 118 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 473c1f8fd..60078c7cb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -105,6 +105,8 @@ S3method(SCTResults,SCTModel) S3method(SCTResults,Seurat) S3method(SCTransform,Assay) S3method(SCTransform,Seurat) +S3method(SCTransform,Seurat5) +S3method(SCTransform,StdAssay) S3method(SCTransform,default) S3method(ScaleData,Assay) S3method(ScaleData,Seurat) @@ -203,6 +205,7 @@ export(FeatureLocator) export(FeaturePlot) export(FeatureScatter) export(FetchData) +export(FetchResiduals) export(FilterSlideSeq) export(FindAllMarkers) export(FindClusters) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 242f0894c..bd120e879 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1440,21 +1440,19 @@ SCTransform.Seurat5 <- function( #' pbmc_small <- SCTransform(object = pbmc_small, variable.features.n = 20) #' pbmc_small <- GetResidual(object = pbmc_small, features = c('MS4A1', 'TCL1A')) #' -FetchResiduals <- function( - object, - features, - assay = NULL, - umi.assay = "RNA", - layer = "counts", - clip.range = NULL, - reference.SCT.model = NULL, - replace.value = FALSE, - na.rm = TRUE, - verbose = TRUE -) { +FetchResiduals <- function(object, + features, + assay = NULL, + umi.assay = "RNA", + layer = "counts", + clip.range = NULL, + reference.SCT.model = NULL, + replace.value = FALSE, + na.rm = TRUE, + verbose = TRUE) { assay <- assay %||% DefaultAssay(object = object) if (IsSCT(assay = object[[assay]])) { - object[[assay]] <- as(object[[assay]], 'SCTAssay') + object[[assay]] <- as(object[[assay]], "SCTAssay") } if (!inherits(x = object[[assay]], what = "SCTAssay")) { stop(assay, " assay was not generated by SCTransform") @@ -1466,12 +1464,13 @@ FetchResiduals <- function( } possible.features <- unique(x = unlist(x = lapply(X = sct.models, FUN = function(x) { rownames(x = SCTResults(object = object[[assay]], slot = "feature.attributes", model = x)) - } - ))) + }))) bad.features <- setdiff(x = features, y = possible.features) if (length(x = bad.features) > 0) { warning("The following requested features are not present in any models: ", - paste(bad.features, collapse = ", "), call. = FALSE) + paste(bad.features, collapse = ", "), + call. = FALSE + ) features <- intersect(x = features, y = possible.features) } features.orig <- features @@ -1499,35 +1498,28 @@ FetchResiduals <- function( new.residuals <- list() total_cells <- 0 all_cells <- c() - if (!is.null(x = reference.SCT.model) ) { - + if (!is.null(x = reference.SCT.model)) { if (inherits(x = reference.SCT.model, what = "SCTModel")) { reference.SCT.model <- SCTModel_to_vst(SCTModel = reference.SCT.model) } - if (is.list(x = reference.SCT.model) & inherits(x = reference.SCT.model[[1]], what = "SCTModel")) { stop("reference.SCT.model must be one SCTModel rather than a list of SCTModel") } - #print(reference.SCT.model) - #if ('latent_var' %in% names(x = vst.args)) { - # stop('custom latent variables are not supported when reference.SCT.model is given') - #} - if (reference.SCT.model$model_str != 'y ~ log_umi') { - stop('reference.SCT.model must be derived using default SCT regression formula, `y ~ log_umi`') + if (reference.SCT.model$model_str != "y ~ log_umi") { + stop("reference.SCT.model must be derived using default SCT regression formula, `y ~ log_umi`") } - } - for (i in seq_along(along.with = layers)){ + for (i in seq_along(along.with = layers)) { l <- layers[i] sct_model <- sct.models[[i]] # these cells belong to this layer layer_cells <- Cells(x = object[[umi.assay]], layer = l) all_cells <- c(all_cells, layer_cells) total_cells <- total_cells + length(layer_cells) - #cat(layer_cells) - #stop("") + # cat(layer_cells) + # stop("") # calculate residual using this model and these cells - new.residuals[[i]] <- GetResidualSCTModel.V5( + new.residuals[[i]] <- FetchResidualSCTModel( object = object, umi.assay = umi.assay, assay = assay, @@ -1540,10 +1532,9 @@ FetchResiduals <- function( clip.range = clip.range, verbose = verbose ) - } - existing.data <- GetAssayData(object = object, slot = 'scale.data', assay = assay) + existing.data <- GetAssayData(object = object, slot = "scale.data", assay = assay) all.features <- union(x = rownames(x = existing.data), y = features) new.scale <- matrix( data = NA, @@ -1551,9 +1542,8 @@ FetchResiduals <- function( ncol = total_cells, dimnames = list(all.features, all_cells) ) - common_cells <- intersect(colnames(new.scale), colnames(existing.data)) - if (nrow(x = existing.data) > 0){ + if (nrow(x = existing.data) > 0) { new.scale[rownames(x = existing.data), common_cells] <- existing.data[, common_cells] } if (length(x = new.residuals) == 1 & is.list(x = new.residuals)) { @@ -1565,60 +1555,61 @@ FetchResiduals <- function( if (na.rm) { new.scale <- new.scale[!rowAnyNAs(x = new.scale), ] } - #object <- SetAssayData( - # object = object, - # assay = assay, - # slot = "scale.data", - # new.data = new.scale - #) - # object[[assay]]@scale.data <- new.scale - if (any(!features.orig %in% rownames(x = new.scale))) { - bad.features <- features.orig[which(!features.orig %in% rownames(x = new.scale))] - warning("Residuals not computed for the following requested features: ", - paste(bad.features, collapse = ", "), call. = FALSE) - } + + # if (any(!features.orig %in% rownames(x = new.scale))) { + # bad.features <- features.orig[which(!features.orig %in% rownames(x = new.scale))] + # warning("Residuals not computed for the following requested features: ", + # paste(bad.features, collapse = ", "), call. = FALSE) + # } return(new.scale) } # Calculate pearson residuals of features not in the scale.data -# This function is the secondary function under GetResidual +# This function is the secondary function under FetchResiduals # # @param object A seurat object -# @param features Name of features to add into the scale.data -# @param assay Name of the assay of the seurat object generated by SCTransform -# @param vst_out The SCT parameter list -# @param clip.range Numeric of length two specifying the min and max values the Pearson residual -# will be clipped to -# Useful if you want to change the clip.range. +# @param assay Name of the assay of the seurat object generated by SCTransform. Default +# is "SCT" +# @param umi.assay Name of the assay of the seurat object to fetch UMIs from. Default +# is "RNA" +# @param layer Name of the layer under `umi.assay` to fetch UMIs from. Default is +# "counts" +# @param layer.cells Vector of cells to calculate the residual for. Default is NULL +# which uses all cells in the layer +# @param SCTModel Which SCTmodel to use from the object for calculating the residual. +# Will be ignored if reference.SCT.model is set +# @param reference.SCT.model If a reference SCT model should be used for calculating +# the residuals. When set to not NULL, ignores the `SCTModel` paramater. +# @param new_features A vector of features to calculate the residuals for +# @param clip.range Numeric of length two specifying the min and max values the Pearson residual will be clipped to. Useful if you want to change the clip.range. +# @param replace.value Whether to replace the value of residuals if it already exists # @param verbose Whether to print messages and progress bars # -# @return Returns a matrix containing not-centered pearson residuals of added features +# @return Returns a matrix containing centered pearson residuals of added features # #' @importFrom sctransform get_residuals #' @importFrom Matrix colSums # -GetResidualSCTModel.V5 <- function( - object, - assay, - umi.assay, - layer, - layer.cells, - SCTModel, - reference.SCT.model, - new_features, - clip.range, - replace.value, - verbose -) { +FetchResidualSCTModel <- function(object, + assay = "SCT", + umi.assay = "RNA", + layer = "counts", + layer.cells = NULL, + SCTModel = NULL, + reference.SCT.model = NULL, + new_features = NULL, + clip.range = NULL, + replace.value = FALSE, + verbose = FALSE) { clip.range <- clip.range %||% SCTResults(object = object[[assay]], slot = "clips", model = SCTModel)$sct model.features <- rownames(x = SCTResults(object = object[[assay]], slot = "feature.attributes", model = SCTModel)) model.cells <- Cells(x = slot(object = object[[assay]], name = "SCTModel.list")[[SCTModel]]) - sct.method <- SCTResults(object = object[[assay]], slot = "arguments", model = SCTModel)$sct.method %||% "default" + sct.method <- SCTResults(object = object[[assay]], slot = "arguments", model = SCTModel)$sct.method %||% "default" - if (!is.null(reference.SCT.model)){ + if (!is.null(reference.SCT.model)) { # use reference SCT model sct.method <- "reference" } @@ -1627,15 +1618,15 @@ GetResidualSCTModel.V5 <- function( scale.data.cells.common <- intersect(colnames(x = scale.data.cells), layer.cells) scale.data.cells <- intersect(x = scale.data.cells, y = scale.data.cells.common) - if (length(x = setdiff(x = layer.cells, y = scale.data.cells)) == 0) { + if (length(x = setdiff(x = layer.cells, y = scale.data.cells)) == 0) { existing.scale.data <- suppressWarnings(GetAssayData(object = object, assay = assay, slot = "scale.data")) full.scale.data <- matrix(data = NA, nrow = nrow(x = existing.scale.data), ncol = length(x = layer.cells), dimnames = list(rownames(x = existing.scale.data), layer.cells)) full.scale.data[rownames(x = existing.scale.data), colnames(x = existing.scale.data)] <- existing.scale.data - existing_features <- names(x = which(x = ! apply( + existing_features <- names(x = which(x = !apply( X = full.scale.data, MARGIN = 1, - FUN = anyNA) - )) + FUN = anyNA + ))) } else { existing_features <- character() } @@ -1652,75 +1643,74 @@ GetResidualSCTModel.V5 <- function( } if (!umi.assay %in% Assays(object = object)) { warning("The umi assay (", umi.assay, ") is not present in the object. ", - "Cannot compute additional residuals.", call. = FALSE, immediate. = TRUE) + "Cannot compute additional residuals.", + call. = FALSE, immediate. = TRUE + ) return(NULL) } # these features do not have feature attriutes diff_features <- setdiff(x = features_to_compute, y = model.features) intersect_features <- intersect(x = features_to_compute, y = model.features) - if (sct.method == "reference"){ - + if (sct.method == "reference") { vst_out <- reference.SCT.model # override clip.range clip.range <- vst_out$arguments$sct.clip.range umi.field <- paste0("nCount_", assay) # get rid of the cell attributes vst_out$cell_attr <- NULL - all.features <- intersect( - x = rownames(x = vst_out$gene_attr), - y = Features(object[[umi.assay]], layer = "counts") + all.features <- intersect( + x = rownames(x = vst_out$gene_attr), + y = features_to_compute ) - vst_out$gene_attr <- vst_out$gene_attr[all.features ,] - vst_out$model_pars_fit <- vst_out$model_pars_fit[all.features,] - + vst_out$gene_attr <- vst_out$gene_attr[all.features, , drop = FALSE] + vst_out$model_pars_fit <- vst_out$model_pars_fit[all.features, , drop = FALSE] } else { vst_out <- SCTModel_to_vst(SCTModel = slot(object = object[[assay]], name = "SCTModel.list")[[SCTModel]]) } clip.max <- max(clip.range) clip.min <- min(clip.range) + layer.cells <- layer.cells %||% Cells(x = object[[umi.assay]], layer = layer) if (length(x = diff_features) == 0) { - #new_residuals <- GetAssayData(object = object, assay = assay, slot="scale.data")[features_to_compute, , drop=FALSE] - counts <- LayerData( object = object[[umi.assay]], layer = layer, cells = layer.cells ) - cells.grid <- DelayedArray::colAutoGrid(x = counts)#, ncol = 5000) + cells.grid <- DelayedArray::colAutoGrid(x = counts) # , ncol = 5000) new_residuals <- list() - cat(dim(counts)) + # cat(dim(counts)) for (i in seq_len(length.out = length(x = cells.grid))) { - vp <- cells.grid[[i]] block <- DelayedArray::read_block(x = counts, viewport = vp, as.sparse = TRUE) ## TODO: Maybe read only interesting genes umi.all <- as(object = block, Class = "dgCMatrix") - umi <- umi.all[features_to_compute,, drop=FALSE] + umi <- umi.all[features_to_compute, , drop = FALSE] ## Add cell_attr for missing cells - cell_attr <- data.frame(umi = colSums(umi.all), log_umi = log10(x = colSums(umi.all))) + cell_attr <- data.frame( + umi = colSums(umi.all), + log_umi = log10(x = colSums(umi.all)) + ) cell_attr$cells_step1 <- FALSE - #cell_attr <- as.matrix(x = cell_attr) + # cell_attr <- as.matrix(x = cell_attr) rownames(cell_attr) <- colnames(umi) if (sct.method == "reference") { - vst_out$cell_attr <- cell_attr + vst_out$cell_attr <- cell_attr[colnames(umi), ] } else { cell_attr_existing <- vst_out$cell_attr cells_missing <- setdiff(rownames(cell_attr), rownames(cell_attr_existing)) - vst_out$cell_attr <- rbind(cell_attr_existing, cell_attr[cells_missing,]) - vst_out$cell_attr <- vst_out$cell_attr[colnames(umi),] + vst_out$cell_attr <- rbind(cell_attr_existing, cell_attr[cells_missing, ]) + vst_out$cell_attr <- vst_out$cell_attr[colnames(umi), ] } - if (verbose) { if (sct.method == "reference") { - message("sct.model: ", reference.SCT.model) + message("using reference sct model") } else { message("sct.model: ", SCTModel) } } - new_residual <- get_residuals( vst_out = vst_out, umi = umi, @@ -1729,12 +1719,12 @@ GetResidualSCTModel.V5 <- function( verbosity = as.numeric(x = verbose) * 2 ) new_residual <- as.matrix(x = new_residual) - # centered data new_residuals[[i]] <- new_residual } new_residual <- do.call(what = cbind, args = new_residuals) + # centered data new_residual <- new_residual - rowMeans(x = new_residual) - #return (new_residuals) + # return (new_residuals) } else { # Some features do not exist warning( @@ -1750,9 +1740,7 @@ GetResidualSCTModel.V5 <- function( dimnames = list(features_to_compute, model.cells) )) } - } - old.features <- setdiff(x = new_features, y = features_to_compute) if (length(x = old.features) > 0) { old_residuals <- GetAssayData(object = object[[assay]], slot = "scale.data")[old.features, model.cells, drop = FALSE] @@ -1761,3 +1749,4 @@ GetResidualSCTModel.V5 <- function( return(new_residual) } + diff --git a/man/FeaturePlot.Rd b/man/FeaturePlot.Rd index f869e4c98..c68a85a93 100644 --- a/man/FeaturePlot.Rd +++ b/man/FeaturePlot.Rd @@ -10,8 +10,12 @@ FeaturePlot( features, dims = c(1, 2), cells = NULL, - cols = if (blend) { c("lightgrey", "#ff0000", "#00ff00") } else { - c("lightgrey", "blue") }, + cols = if (blend) { + c("lightgrey", "#ff0000", "#00ff00") + } else { + + c("lightgrey", "blue") + }, pt.size = NULL, order = FALSE, min.cutoff = NA, diff --git a/man/IntegrateData.Rd b/man/IntegrateData.Rd index c02543005..e08bd682e 100644 --- a/man/IntegrateData.Rd +++ b/man/IntegrateData.Rd @@ -64,10 +64,12 @@ should be encoded in a matrix, where each row represents one of the pairwise integration steps. Negative numbers specify a dataset, positive numbers specify the integration results from a given row (the format of the merge matrix included in the \code{\link{hclust}} function output). For example: -\code{matrix(c(-2, 1, -3, -1), ncol = 2)} gives:\preformatted{ [,1] [,2] +\code{matrix(c(-2, 1, -3, -1), ncol = 2)} gives: + +\if{html}{\out{
}}\preformatted{ [,1] [,2] [1,] -2 -3 [2,] 1 -1 -} +}\if{html}{\out{
}} Which would cause dataset 2 and 3 to be integrated first, then the resulting object integrated with dataset 1. diff --git a/man/IntegrateEmbeddings.Rd b/man/IntegrateEmbeddings.Rd index c3f96ffa5..dc0469132 100644 --- a/man/IntegrateEmbeddings.Rd +++ b/man/IntegrateEmbeddings.Rd @@ -75,10 +75,12 @@ should be encoded in a matrix, where each row represents one of the pairwise integration steps. Negative numbers specify a dataset, positive numbers specify the integration results from a given row (the format of the merge matrix included in the \code{\link{hclust}} function output). For example: -\code{matrix(c(-2, 1, -3, -1), ncol = 2)} gives:\preformatted{ [,1] [,2] +\code{matrix(c(-2, 1, -3, -1), ncol = 2)} gives: + +\if{html}{\out{
}}\preformatted{ [,1] [,2] [1,] -2 -3 [2,] 1 -1 -} +}\if{html}{\out{
}} Which would cause dataset 2 and 3 to be integrated first, then the resulting object integrated with dataset 1. diff --git a/man/PolyFeaturePlot.Rd b/man/PolyFeaturePlot.Rd index 1eacd0ecd..a2b2fc588 100644 --- a/man/PolyFeaturePlot.Rd +++ b/man/PolyFeaturePlot.Rd @@ -33,10 +33,7 @@ PolyFeaturePlot( \item{ncol}{Number of columns to split the plot into} -\item{min.cutoff}{Vector of minimum and maximum cutoff values for each feature, -may specify quantile in the form of 'q##' where '##' is the quantile (eg, 'q1', 'q10')} - -\item{max.cutoff}{Vector of minimum and maximum cutoff values for each feature, +\item{min.cutoff, max.cutoff}{Vector of minimum and maximum cutoff values for each feature, may specify quantile in the form of 'q##' where '##' is the quantile (eg, 'q1', 'q10')} \item{common.scale}{...} diff --git a/man/SCTransform.Rd b/man/SCTransform.Rd index f3841bc59..849f51180 100644 --- a/man/SCTransform.Rd +++ b/man/SCTransform.Rd @@ -51,7 +51,7 @@ SCTransform(object, ...) \method{SCTransform}{Seurat}( object, - assay = "RNA", + assay = NULL, new.assay.name = "SCT", reference.SCT.model = NULL, do.correct.umi = TRUE, @@ -133,17 +133,12 @@ slot of the new assay. } \description{ This function calls sctransform::vst. The sctransform package is available at -https://github.com/ChristophH/sctransform. +https://github.com/satijalab/sctransform. Use this function as an alternative to the NormalizeData, FindVariableFeatures, ScaleData workflow. Results are saved in a new assay (named SCT by default) with counts being (corrected) counts, data being log1p(counts), scale.data being pearson residuals; sctransform::vst intermediate results are saved in misc slot of new assay. -} -\examples{ -data("pbmc_small") -SCTransform(object = pbmc_small) - } \seealso{ \code{\link[sctransform]{correct_counts}} \code{\link[sctransform]{get_residuals}} diff --git a/man/Seurat-package.Rd b/man/Seurat-package.Rd index 9b3fc3749..351af75c9 100644 --- a/man/Seurat-package.Rd +++ b/man/Seurat-package.Rd @@ -6,7 +6,7 @@ \alias{Seurat-package} \title{Seurat: Tools for Single Cell Genomics} \description{ -A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. +A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) \doi{10.1038/nbt.3192}, Macosko E, Basu A, Satija R, et al (2015) \doi{10.1016/j.cell.2015.05.002}, Stuart T, Butler A, et al (2019) \doi{10.1016/j.cell.2019.05.031}, and Hao, Hao, et al (2020) \doi{10.1101/2020.10.12.335331} for more details. } \section{Package options}{ From 1e2e36345045b7ad817981253e11c24abee97e24 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Thu, 8 Sep 2022 10:34:12 -0400 Subject: [PATCH 182/979] fix dispatch --- R/preprocessing5.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index bd120e879..5066648e9 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1341,8 +1341,8 @@ SCTransform.StdAssay <- function( } # Return array by merging everythin - if (length(x = sct.assay.list)>1){ - merged.assay <- merge.SCTAssay(x = sct.assay.list[[1]], y = sct.assay.list[2:length(sct.assay.list)]) + if (length(x = sct.assay.list) > 1){ + merged.assay <- merge(x = sct.assay.list[[1]], y = sct.assay.list[2:length(sct.assay.list)]) # set the names of SCTmodels to be layer names models <- slot(object = merged.assay, name="SCTModel.list") names(models) <- names(x = sct.assay.list) From c1e3ba691e356361e7543c401684d90663f188cb Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Thu, 8 Sep 2022 10:35:44 -0400 Subject: [PATCH 183/979] cleanup --- R/preprocessing5.R | 7 ------- 1 file changed, 7 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 5066648e9..f5243f3c0 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1516,8 +1516,6 @@ FetchResiduals <- function(object, layer_cells <- Cells(x = object[[umi.assay]], layer = l) all_cells <- c(all_cells, layer_cells) total_cells <- total_cells + length(layer_cells) - # cat(layer_cells) - # stop("") # calculate residual using this model and these cells new.residuals[[i]] <- FetchResidualSCTModel( object = object, @@ -1556,11 +1554,6 @@ FetchResiduals <- function(object, new.scale <- new.scale[!rowAnyNAs(x = new.scale), ] } - # if (any(!features.orig %in% rownames(x = new.scale))) { - # bad.features <- features.orig[which(!features.orig %in% rownames(x = new.scale))] - # warning("Residuals not computed for the following requested features: ", - # paste(bad.features, collapse = ", "), call. = FALSE) - # } return(new.scale) } From 8f62ea8738e2c2964d8f5422fe11eab2e2398b0e Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Thu, 8 Sep 2022 11:27:18 -0400 Subject: [PATCH 184/979] fix for min_variance --- R/preprocessing5.R | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index f5243f3c0..a428827b0 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1304,6 +1304,7 @@ SCTransform.StdAssay <- function( residual.type <- vst.out[['residual_type']] %||% 'pearson' sct.method <- vst.out[['sct.method']] + variable.feature.list <- list() # create output assay and put (corrected) umi counts in count slot if (do.correct.umi & residual.type == 'pearson') { if (verbose) { @@ -1318,6 +1319,7 @@ SCTransform.StdAssay <- function( } # set the variable genes VariableFeatures(object = assay.out) <- vst.out$variable_features + variable.feature.list[[dataset.names[i]]] <- vst.out$variable_features # put log1p transformed counts in data assay.out <- SetAssayData( object = assay.out, @@ -1342,7 +1344,11 @@ SCTransform.StdAssay <- function( # Return array by merging everythin if (length(x = sct.assay.list) > 1){ + merged.assay <- merge(x = sct.assay.list[[1]], y = sct.assay.list[2:length(sct.assay.list)]) + # set variable features as the union of the features + variable.features <- Reduce(f = union, x = variable.feature.list) + VariableFeatures(object = merged.assay) <- variable.features # set the names of SCTmodels to be layer names models <- slot(object = merged.assay, name="SCTModel.list") names(models) <- names(x = sct.assay.list) @@ -1679,6 +1685,16 @@ FetchResidualSCTModel <- function(object, block <- DelayedArray::read_block(x = counts, viewport = vp, as.sparse = TRUE) ## TODO: Maybe read only interesting genes umi.all <- as(object = block, Class = "dgCMatrix") + + # calcluclate min_variance for get_residuals + # required when vst_out$arguments$min_variance == "umi_median" + # only calculated once + if (i==1){ + nz_median <- median(umi.all@x) + min_var_custom <- (nz_median / 5)^2 + print(paste("min_var_custom", min_var_custom)) + } + umi <- umi.all[features_to_compute, , drop = FALSE] ## Add cell_attr for missing cells @@ -1704,10 +1720,16 @@ FetchResidualSCTModel <- function(object, message("sct.model: ", SCTModel) } } + if (vst_out$arguments$min_variance == "umi_median"){ + min_var <- min_var_custom + } else { + min_var <- vst_out$arguments$min_variance + } new_residual <- get_residuals( vst_out = vst_out, umi = umi, residual_type = "pearson", + min_variance = min_var, res_clip_range = c(clip.min, clip.max), verbosity = as.numeric(x = verbose) * 2 ) From 5ba485f32e702caa19316a44e3bcbf232bb4cbd5 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Thu, 8 Sep 2022 11:57:45 -0400 Subject: [PATCH 185/979] add message --- R/preprocessing5.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index a428827b0..f34056519 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1676,7 +1676,9 @@ FetchResidualSCTModel <- function(object, layer = layer, cells = layer.cells ) - cells.grid <- DelayedArray::colAutoGrid(x = counts) # , ncol = 5000) + + # iterate over 2k cells at once + cells.grid <- DelayedArray::colAutoGrid(x = counts, ncol = 2000) new_residuals <- list() # cat(dim(counts)) @@ -1717,7 +1719,8 @@ FetchResidualSCTModel <- function(object, if (sct.method == "reference") { message("using reference sct model") } else { - message("sct.model: ", SCTModel) + message("sct.model: ", SCTModel, " on ", ncol(x = umi), " cells: ", + colnames(x = umi.all)[1], " .. ", colnames(x = umi.all)[ncol(umi.all)]) } } if (vst_out$arguments$min_variance == "umi_median"){ From 2779f8dd2d33de06e93dd5bcdc64451c50bf0da8 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Thu, 8 Sep 2022 12:03:08 -0400 Subject: [PATCH 186/979] add FetchResiduals --- man/FetchResiduals.Rd | 60 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 man/FetchResiduals.Rd diff --git a/man/FetchResiduals.Rd b/man/FetchResiduals.Rd new file mode 100644 index 000000000..81536048f --- /dev/null +++ b/man/FetchResiduals.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/preprocessing5.R +\name{FetchResiduals} +\alias{FetchResiduals} +\title{Calculate pearson residuals of features not in the scale.data} +\usage{ +FetchResiduals( + object, + features, + assay = NULL, + umi.assay = "RNA", + layer = "counts", + clip.range = NULL, + reference.SCT.model = NULL, + replace.value = FALSE, + na.rm = TRUE, + verbose = TRUE +) +} +\arguments{ +\item{object}{A seurat object} + +\item{features}{Name of features to add into the scale.data} + +\item{assay}{Name of the assay of the seurat object generated by SCTransform} + +\item{umi.assay}{Name of the assay of the seurat object containing UMI matrix +and the default is RNA} + +\item{layer}{Name (prefix) of the layer to pull counts from} + +\item{clip.range}{Numeric of length two specifying the min and max values the +Pearson residual will be clipped to} + +\item{replace.value}{Recalculate residuals for all features, even if they are +already present. Useful if you want to change the clip.range.} + +\item{na.rm}{For features where there is no feature model stored, return NA +for residual value in scale.data when na.rm = FALSE. When na.rm is TRUE, only +return residuals for features with a model stored for all cells.} + +\item{verbose}{Whether to print messages and progress bars} +} +\value{ +Returns a Seurat object containing Pearson residuals of added +features in its scale.data +} +\description{ +This function calls sctransform::get_residuals. +} +\examples{ +data("pbmc_small") +pbmc_small <- SCTransform(object = pbmc_small, variable.features.n = 20) +pbmc_small <- GetResidual(object = pbmc_small, features = c('MS4A1', 'TCL1A')) + +} +\seealso{ +\code{\link[sctransform]{get_residuals}} +} +\concept{preprocessing} From 2bf0d749073d2b7f43c39555a688e227dcf18881 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Thu, 8 Sep 2022 14:16:08 -0400 Subject: [PATCH 187/979] default to ncells --- R/preprocessing5.R | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index f34056519..d02162fe1 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1265,16 +1265,14 @@ SCTransform.StdAssay <- function( features = Features(x = object, layer = l), cells = Cells(x = object, layer = l) ) - feature.grid <- DelayedArray::rowAutoGrid(x = counts) - ##TODO: handle this later for boundary conditions - cells.grid <- DelayedArray::colAutoGrid(x = counts, ncol = 2000) + ## Sample cells + cells.grid <- DelayedArray::colAutoGrid(x = counts, ncol = ncells) - ##TODO: handle this later for boundary conditions vp <- cells.grid[[1L]] - - # Read a block from a delayed matrix sparse <- DelayedArray::is_sparse(x = counts) # TRUE - block <- DelayedArray::read_block(x = counts, viewport = vp, as.sparse = sparse) + block <- DelayedArray::read_block(x = counts, + viewport = vp, + as.sparse = sparse) counts <- as(object = block, Class = 'dgCMatrix') cell.attr.object <- cell.attr[colnames(x = counts),] From 04c83876b3492bfb8eabb02c1ba4cc02af0859c6 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 8 Sep 2022 17:10:14 -0400 Subject: [PATCH 188/979] delay array integration --- NAMESPACE | 1 + R/integration.R | 199 +++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 171 insertions(+), 29 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 60078c7cb..e8f7a1d2c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -681,6 +681,7 @@ importFrom(rlang,caller_env) importFrom(rlang,check_installed) importFrom(rlang,enquo) importFrom(rlang,inform) +importFrom(rlang,invoke) importFrom(rlang,is_quosure) importFrom(rlang,is_scalar_character) importFrom(rlang,quo_get_env) diff --git a/R/integration.R b/R/integration.R index 7e5496e71..4d3efe65a 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1826,6 +1826,7 @@ IntegrateEmbeddings.TransferAnchorSet <- function( IntegrateSketchEmbeddings <- function( object, atoms = 'sketch', # DefaultAssay(object) + atoms.layers = NULL, orig = 'RNA', features = NULL, # VF from object[[atom.assay]] reduction = 'integrated_dr', # harmony; rerun UMAP on this @@ -1837,6 +1838,7 @@ IntegrateSketchEmbeddings <- function( verbose = TRUE ) { # Check input and output dimensional reductions + atoms.layers <- atoms.layers %||% layers reduction <- match.arg(arg = reduction, choices = Reductions(object = object)) reduction.name <- reduction.name %||% paste0(reduction, '.orig') reduction.key <- reduction.key %||% Key(object = reduction.name, quiet = TRUE) @@ -1888,7 +1890,7 @@ IntegrateSketchEmbeddings <- function( features.atom <- Reduce( f = intersect, x = lapply( - X = layers, + X = atoms.layers, FUN = function(lyr) { return(Features(x = object[[atoms]], layer = lyr)) } @@ -1915,8 +1917,16 @@ IntegrateSketchEmbeddings <- function( return((sum(ncells[1:x]) + 1):sum(ncells[1:(x + 1)])) } ) + if (length(atoms.layers) == 1) { + atoms.layers <- rep(atoms.layers, length(layers)) + } for (i in seq_along(along.with = layers)) { - cells.sketch <- Cells(x = object[[atoms]], layer = layers[i]) + if (length(unique(atoms.layers)) == length(layers)) { + cells.sketch <- Cells(x = object[[atoms]], layer = atoms.layers[i]) + } else if (length(unique(atoms.layers)) == 1) { + cells.sketch <- intersect(Cells(x = object[[atoms]][[atoms.layers[[1]]]]), + Cells(object[[orig]][layers[i]])) + } if (isTRUE(x = verbose)) { message( length(x = cells.sketch), @@ -1924,6 +1934,12 @@ IntegrateSketchEmbeddings <- function( ) message("Correcting embeddings") } + if (inherits(x = object[[orig]][[layers[i]]], what = 'DelayedMatrix') ) { + matrix.prod.function <- crossprod_DelayedAssay + } else { + matrix.prod.function <- crossprod + } + emb <- switch( EXPR = method, 'data' = { @@ -1938,17 +1954,12 @@ IntegrateSketchEmbeddings <- function( ) sketch.transform <- ginv(X = exp.mat) %*% Embeddings(object = object[[reduction]])[cells.sketch ,] - emb <- as.matrix( - # TODO: update as.sparse to have default method with `as(x, "CsparseMatrix")` - x = t(x = as( - object = LayerData( - object = object[[orig]], - layer = layers[i], - features = features - ), - Class = "CsparseMatrix")) %*% - sketch.transform - ) + emb <- matrix.prod.function(x = sketch.transform, + y = LayerData( + object = object[[orig]], + layer = layers[i], + features = features + )) emb }, 'sketch' = { @@ -1958,24 +1969,18 @@ IntegrateSketchEmbeddings <- function( )) exp.mat <- as.matrix(x = t(x = LayerData( object = object[[atoms]], - layer = layers[i], + layer = atoms.layers[i], features = features - )) %*% R) + )[,cells.sketch]) %*% R) sketch.transform <- ginv(X = exp.mat) %*% Embeddings(object = object[[reduction]])[cells.sketch ,] - emb <- as.matrix( - x = ( - t(as( - object = LayerData( - object = object[[orig]], - layer = layers[i], - features = features - ), - Class = "CsparseMatrix")) - %*% - R) %*% - sketch.transform - ) + emb <- matrix.prod.function(x = R %*% sketch.transform, + y = LayerData( + object = object[[orig]], + layer = layers[i], + features = features + )) + emb <- t(emb) emb } ) @@ -5803,4 +5808,140 @@ ProjectCellEmbeddings_DelayedAssay <- function( rownames(emb.mat) <- colnames(query.data) colnames(emb.mat) <- colnames(reference[[reduction]]@cell.embeddings)[dims] return(emb.mat) -} \ No newline at end of file +} + + +#' Perform integration on the joint PCA cell embeddings. +#' +#' This is a convenience wrapper function around the following three functions +#' that are often run together when perform integration. +#' #' \code{\link{FindIntegrationAnchors}}, \code{\link{RunPCA}}, +#' \code{\link{IntegrateEmbeddings}}. +#' +#' @inheritParams FindIntegrationAnchors +#' @param new.reduction.name Name of integrated dimensional reduction +#' @param npcs Total Number of PCs to compute and store (50 by default) +#' @param findintegrationanchors.args A named list of additional arguments to +#' \code{\link{FindIntegrationAnchors}} +#' @param verbose Print messages and progress +#' +#' @importFrom rlang invoke +#' @return Returns a Seurat object with integrated dimensional reduction +#' @export +#' +FastRPCAIntegration <- function( + object.list, + reference = NULL, + anchor.features = 2000, + k.anchor = 20, + dims = 1:30, + scale = TRUE, + normalization.method = c("LogNormalize", "SCT"), + new.reduction.name = 'integrated_dr', + npcs = 50, + findintegrationanchors.args = list(), + verbose = TRUE +) { + npcs <- max(npcs, dims) + my.lapply <- ifelse( + test = verbose && nbrOfWorkers() == 1, + yes = pblapply, + no = future_lapply + ) + reduction <- 'rpca' + if (is.numeric(x = anchor.features)) { + anchor.features <- SelectIntegrationFeatures( + object.list = object.list, + nfeatures = anchor.features, + verbose = FALSE + ) + } + if (normalization.method == 'SCT') { + scale <- FALSE + object.list <- PrepSCTIntegration(object.list = object.list, + anchor.features = anchor.features + ) + } + if (verbose) { + message('Performing PCA for each object') + } + object.list <- my.lapply(X = object.list, + FUN = function(x) { + if (normalization.method != 'SCT') { + x <- ScaleData(x, features = anchor.features, do.scale = scale, verbose = FALSE) + } + x <- RunPCA(x, features = anchor.features, verbose = FALSE, npcs = npcs) + return(x) + } + ) + + anchor <- invoke( + .fn = FindIntegrationAnchors, + .args = c(list( + object.list = object.list, + reference = reference, + anchor.features = anchor.features, + reduction = reduction, + normalization.method = normalization.method, + scale = scale, + k.anchor = k.anchor, + dims = dims, + verbose = verbose + ), findintegrationanchors.args + ) + ) + object_merged <- merge(x = object.list[[1]], + y = object.list[2:length(object.list)] + ) + anchor.feature <- slot(object = anchor, name = 'anchor.features') + if (normalization.method != 'SCT') { + object_merged <- ScaleData(object = object_merged, + features = anchor.feature, + do.scale = scale, + verbose = FALSE + ) + } + object_merged <- RunPCA(object_merged, + features = anchor.feature, + verbose = FALSE, + npcs = npcs + ) + temp <- object_merged[["pca"]] + object_merged <- IntegrateEmbeddings( + anchorset = anchor, + reductions = object_merged[['pca']], + new.reduction.name = new.reduction.name, + verbose = verbose) + object_merged[['pca']] <- temp + VariableFeatures(object = object_merged) <- anchor.feature + return(object_merged) +} + +crossprod_DelayedAssay <- function(x, y, block.size = 1e9) { + # perform t(x) %*% y in blocks for y + if (!inherits(x = y, 'DelayedMatrix')) { + stop('y should a DelayedMatrix') + } + if (nrow(x) != nrow(y)) { + stop('row of x and y should be the same') + } + sparse <- DelayedArray::is_sparse(x = y) + suppressMessages(setAutoBlockSize(size = block.size)) + cells.grid <- DelayedArray::colAutoGrid(x = y) + product.list <- list() + for (i in seq_len(length.out = length(x = cells.grid))) { + vp <- cells.grid[[i]] + block <- DelayedArray::read_block(x = y, viewport = vp, as.sparse = sparse) + if (sparse) { + block <- as(object = block, Class = 'dgCMatrix') + } else { + block <- as(object = block, Class = 'Matrix') + } + product.list[[i]] <- as.matrix(t(x) %*% block) + } + product.mat <- matrix(data = unlist(product.list), nrow = ncol(x) , ncol = ncol(y)) + colnames(product.mat) <- colnames(y) + rownames(product.mat) <- rownames(x) + return(product.mat) +} + From db77258386747906135c77bd149bf87e635853ee Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Thu, 8 Sep 2022 17:53:44 -0400 Subject: [PATCH 189/979] Fix SCTransform.default --- R/preprocessing.R | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index 9ba5b8d1f..7787534c2 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -1465,6 +1465,7 @@ SampleUMI <- function( #' slot of the new assay. #' #' @importFrom stats setNames +#' @importFrom Matrix colSums #' @importFrom sctransform vst get_residual_var get_residuals correct_counts #' #' @seealso \code{\link[sctransform]{correct_counts}} \code{\link[sctransform]{get_residuals}} @@ -1575,13 +1576,10 @@ SCTransform.default <- function( do.correct.umi <- FALSE vst.out <- reference.SCT.model clip.range <- vst.out$arguments$sct.clip.range - umi.field <- paste0("nCount_", assay) - vst.out$cell_attr <- - if (umi.field %in% colnames(x = object[[]])) { - data.frame(log_umi = log10(x = object[[umi.field, drop = T]])) - } else { - data.frame(log_umi = log10(x = CalcN(object = object[[assay]])$nCount)) - } + cell_attr <- data.frame(log_umi = log10(x = colSums(umi))) + rownames(cell_attr) <- colnames(x = umi) + vst.out$cell_attr <- cell_attr + all.features <- intersect( x = rownames(x = vst.out$gene_attr), y = rownames(x = umi) From 6c70287b62df68bfd09854c331024e6d4f78bdfc Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 8 Sep 2022 17:58:47 -0400 Subject: [PATCH 190/979] add query layers --- R/integration.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/integration.R b/R/integration.R index 4d3efe65a..f898308a9 100644 --- a/R/integration.R +++ b/R/integration.R @@ -727,6 +727,7 @@ FindTransferAnchors <- function( reference.assay = NULL, reference.neighbors = NULL, query.assay = NULL, + query.layers = NULL, reduction = "pcaproject", reference.reduction = NULL, project.query = FALSE, @@ -755,6 +756,7 @@ FindTransferAnchors <- function( reference.assay = reference.assay, reference.neighbors = reference.neighbors, query.assay = query.assay, + query.layers = query.layers, reduction = reduction, reference.reduction = reference.reduction, project.query = project.query, @@ -5144,6 +5146,7 @@ ValidateParams_FindTransferAnchors <- function( reference.assay, reference.neighbors, query.assay, + query.layers, reduction, reference.reduction, project.query, From 2d84bf13cbaf1621172ee3217054611c6c486388 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Thu, 8 Sep 2022 18:25:05 -0400 Subject: [PATCH 191/979] Fix for reference sct model --- R/preprocessing5.R | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index d02162fe1..cf9150e08 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1308,8 +1308,13 @@ SCTransform.StdAssay <- function( if (verbose) { message('Place corrected count matrix in counts slot') } - assay.out <- CreateAssayObject(counts = vst.out$umi_corrected) - vst.out$umi_corrected <- NULL + if (is.null(reference.SCT.model)){ + assay.out <- CreateAssayObject(counts = vst.out$umi_corrected) + vst.out$umi_corrected <- NULL + } else { + assay.out <- CreateAssayObject(counts = counts) + } + } else { # TODO: restore once check.matrix is in SeuratObject # assay.out <- CreateAssayObject(counts = umi, check.matrix = FALSE) From 20462d13f3e87b043e9ff1b6ccfadf08b2498d37 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Thu, 8 Sep 2022 19:39:09 -0400 Subject: [PATCH 192/979] Fix for ref sct model --- R/preprocessing5.R | 160 ++++++++++++++++++++++++++------------------- 1 file changed, 94 insertions(+), 66 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index cf9150e08..139bf0e5b 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1250,6 +1250,10 @@ SCTransform.StdAssay <- function( verbose = TRUE, ... ) { + if (!is.null(reference.SCT.model)){ + do.correct.umi <- FALSE + do.center <- FALSE + } olayer <- layer <- unique(x = layer) %||% DefaultLayer(object = object) layers <- Layers(object = object, search = layer) dataset.names <- gsub(pattern = paste0(layer, "."), replacement = "", x = layers) @@ -1268,81 +1272,105 @@ SCTransform.StdAssay <- function( ## Sample cells cells.grid <- DelayedArray::colAutoGrid(x = counts, ncol = ncells) - vp <- cells.grid[[1L]] - sparse <- DelayedArray::is_sparse(x = counts) # TRUE - block <- DelayedArray::read_block(x = counts, - viewport = vp, - as.sparse = sparse) + # if there is no reference model we randomly select a subset of cells + # TODO: randomize this set of cells + variable.feature.list <- list() - counts <- as(object = block, Class = 'dgCMatrix') - cell.attr.object <- cell.attr[colnames(x = counts),] + GetSCT.Chunked <- function(vp){ + sparse <- DelayedArray::is_sparse(x = counts) # TRUE + block <- DelayedArray::read_block(x = counts, + viewport = vp, + as.sparse = sparse) - if (!identical(rownames(cell.attr.object), colnames(counts))) { - # print(length(setdiff(rownames(cell.attr.object), colnames(counts)))) - # print(length(setdiff(colnames(counts),rownames(cell.attr.object)))) - stop("cell attribute row names must match column names of count matrix") - } - vst.out <- SCTransform(object = counts, - cell.attr = cell.attr.object, - reference.SCT.model = reference.SCT.model, - do.correct.umi = do.correct.umi, - ncells = ncells, - residual.features = residual.features, - variable.features.n = variable.features.n, - variable.features.rv.th = variable.features.rv.th, - vars.to.regress = vars.to.regress, - do.scale = do.scale, - do.center = do.center, - clip.range = clip.range, - conserve.memory = conserve.memory, - return.only.var.genes = return.only.var.genes, - seed.use = seed.use, - verbose = verbose, - ...) - - residual.type <- vst.out[['residual_type']] %||% 'pearson' - sct.method <- vst.out[['sct.method']] - variable.feature.list <- list() - # create output assay and put (corrected) umi counts in count slot - if (do.correct.umi & residual.type == 'pearson') { - if (verbose) { - message('Place corrected count matrix in counts slot') + counts <- as(object = block, Class = 'dgCMatrix') + cell.attr.object <- cell.attr[colnames(x = counts),] + + if (!identical(rownames(cell.attr.object), colnames(counts))) { + # print(length(setdiff(rownames(cell.attr.object), colnames(counts)))) + # print(length(setdiff(colnames(counts),rownames(cell.attr.object)))) + stop("cell attribute row names must match column names of count matrix") } - if (is.null(reference.SCT.model)){ - assay.out <- CreateAssayObject(counts = vst.out$umi_corrected) - vst.out$umi_corrected <- NULL + vst.out <- SCTransform(object = counts, + cell.attr = cell.attr.object, + reference.SCT.model = reference.SCT.model, + do.correct.umi = do.correct.umi, + ncells = ncells, + residual.features = residual.features, + variable.features.n = variable.features.n, + variable.features.rv.th = variable.features.rv.th, + vars.to.regress = vars.to.regress, + do.scale = do.scale, + do.center = do.center, + clip.range = clip.range, + conserve.memory = conserve.memory, + return.only.var.genes = return.only.var.genes, + seed.use = seed.use, + verbose = verbose, + ...) + + residual.type <- vst.out[['residual_type']] %||% 'pearson' + sct.method <- vst.out[['sct.method']] + # create output assay and put (corrected) umi counts in count slot + if (do.correct.umi & residual.type == 'pearson') { + if (verbose) { + message('Place corrected count matrix in counts slot') + } + if (is.null(reference.SCT.model)){ + assay.out <- CreateAssayObject(counts = vst.out$umi_corrected) + vst.out$umi_corrected <- NULL + } else { + assay.out <- CreateAssayObject(counts = counts) + } + } else { + # TODO: restore once check.matrix is in SeuratObject + # assay.out <- CreateAssayObject(counts = umi, check.matrix = FALSE) assay.out <- CreateAssayObject(counts = counts) } + # set the variable genes + VariableFeatures(object = assay.out) <- vst.out$variable_features + # put log1p transformed counts in data + assay.out <- SetAssayData( + object = assay.out, + slot = 'data', + new.data = log1p(x = GetAssayData(object = assay.out, slot = 'counts')) + ) + scale.data <- vst.out$y + assay.out <- SetAssayData( + object = assay.out, + slot = 'scale.data', + new.data = scale.data + ) + vst.out$y <- NULL + # save clip.range into vst model + vst.out$arguments$sct.clip.range <- clip.range + vst.out$arguments$sct.method <- sct.method + Misc(object = assay.out, slot = 'vst.out') <- vst.out + assay.out <- as(object = assay.out, Class = "SCTAssay") + return (assay.out) + } + if (is.null(reference.SCT.model)){ + # No reference model so just select the first block of cells + vp <- cells.grid[[1L]] + assay.out <- GetSCT.Chunked(vp = vp) + variable.feature.list[[dataset.names[i]]] <- VariableFeatures(assay.out) + sct.assay.list[[dataset.names[i]]] <- assay.out } else { - # TODO: restore once check.matrix is in SeuratObject - # assay.out <- CreateAssayObject(counts = umi, check.matrix = FALSE) - assay.out <- CreateAssayObject(counts = counts) + sct.assay.list.temp <- list() + for (i in seq_len(length.out = length(x = cells.grid))) { + vp <- cells.grid[[i]] + assay.out <- GetSCT.Chunked(vp = vp) + sct.assay.list.temp[[paste0("chunk", i)]] <- assay.out + } + if (length(sct.assay.list.temp)>1){ + assay.out <- merge(x = sct.assay.list.temp[[1]], + y = sct.assay.list.temp[2:length(sct.assay.list.temp)]) + } else { + assay.out <- sct.assay.list.temp[[1]] + } + sct.assay.list[[dataset.names[i]]] <- assay.out } - # set the variable genes - VariableFeatures(object = assay.out) <- vst.out$variable_features - variable.feature.list[[dataset.names[i]]] <- vst.out$variable_features - # put log1p transformed counts in data - assay.out <- SetAssayData( - object = assay.out, - slot = 'data', - new.data = log1p(x = GetAssayData(object = assay.out, slot = 'counts')) - ) - scale.data <- vst.out$y - assay.out <- SetAssayData( - object = assay.out, - slot = 'scale.data', - new.data = scale.data - ) - vst.out$y <- NULL - # save clip.range into vst model - vst.out$arguments$sct.clip.range <- clip.range - vst.out$arguments$sct.method <- sct.method - Misc(object = assay.out, slot = 'vst.out') <- vst.out - assay.out <- as(object = assay.out, Class = "SCTAssay") - - sct.assay.list[[dataset.names[i]]] <- assay.out } # Return array by merging everythin From 07be9f71802fa77a58a27e94d809585d9a7c207e Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Fri, 9 Sep 2022 08:54:46 -0400 Subject: [PATCH 193/979] add key in GetSCT.chunked, speedup FetchResiduals --- R/preprocessing5.R | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 139bf0e5b..95e391e96 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1347,6 +1347,9 @@ SCTransform.StdAssay <- function( vst.out$arguments$sct.method <- sct.method Misc(object = assay.out, slot = 'vst.out') <- vst.out assay.out <- as(object = assay.out, Class = "SCTAssay") + #TODO: Add a key to prevent hitting a bug in merge.StdAssay which + # does not like character(0) keys being merged + assay.out@key <- "sct" return (assay.out) } @@ -1364,8 +1367,12 @@ SCTransform.StdAssay <- function( sct.assay.list.temp[[paste0("chunk", i)]] <- assay.out } if (length(sct.assay.list.temp)>1){ + # this currently fails in merge.StdAssay step + # assignment of an object of class “list” is not valid for + # slot ‘key’ in an object of class “Assay”; is(value, "character") is not TRUE assay.out <- merge(x = sct.assay.list.temp[[1]], y = sct.assay.list.temp[2:length(sct.assay.list.temp)]) + } else { assay.out <- sct.assay.list.temp[[1]] } @@ -1584,9 +1591,13 @@ FetchResiduals <- function(object, if (length(x = new.residuals) == 1 & is.list(x = new.residuals)) { new.residuals <- new.residuals[[1]] } else { - new.residuals <- Reduce(cbind, new.residuals) + #new.residuals <- Reduce(cbind, new.residuals) + new.residuals <- matrix(data = unlist(new.residuals), nrow = nrow(new.scale) , ncol = ncol(new.scale)) + colnames(new.residuals) <- colnames(new.scale) + rownames(new.residuals) <- rownames(new.scale) } new.scale[rownames(x = new.residuals), colnames(x = new.residuals)] <- new.residuals + if (na.rm) { new.scale <- new.scale[!rowAnyNAs(x = new.scale), ] } From 6df5b71efd3dd1b8f6915b5b9a1fe41383c3cd9f Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sun, 11 Sep 2022 20:27:37 -0400 Subject: [PATCH 194/979] fix minor error --- R/objects.R | 5 ++++- R/preprocessing.R | 2 +- R/preprocessing5.R | 7 +++---- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/R/objects.R b/R/objects.R index 40ec11022..59e0ac525 100644 --- a/R/objects.R +++ b/R/objects.R @@ -2816,7 +2816,10 @@ UpdateSlots <- function(object) { ) object.list <- Filter(f = Negate(f = is.null), x = object.list) object.list <- c('Class' = class(x = object)[1], object.list) - object <- do.call(what = 'new', args = object.list) + object <- rlang::invoke( + .fn = new, + .args = object.list + ) for (x in setdiff(x = slotNames(x = object), y = names(x = object.list))) { xobj <- slot(object = object, name = x) if (is.vector(x = xobj) && !is.list(x = xobj) && length(x = xobj) == 0) { diff --git a/R/preprocessing.R b/R/preprocessing.R index 7787534c2..b813a15c6 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -426,7 +426,7 @@ GetResidual <- function( ) existing.data <- GetAssayData(object = object, slot = 'scale.data', assay = assay) all.features <- union(x = rownames(x = existing.data), y = features) - new.scale <- matrix( + new.scale <- matrix( data = NA, nrow = length(x = all.features), ncol = ncol(x = object), diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 139bf0e5b..56ac80683 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1250,6 +1250,7 @@ SCTransform.StdAssay <- function( verbose = TRUE, ... ) { + if (!is.null(reference.SCT.model)){ do.correct.umi <- FALSE do.center <- FALSE @@ -1270,7 +1271,7 @@ SCTransform.StdAssay <- function( cells = Cells(x = object, layer = l) ) ## Sample cells - cells.grid <- DelayedArray::colAutoGrid(x = counts, ncol = ncells) + cells.grid <- DelayedArray::colAutoGrid(x = counts, ncol = min(ncells, ncol(counts))) # if there is no reference model we randomly select a subset of cells # TODO: randomize this set of cells @@ -1283,7 +1284,7 @@ SCTransform.StdAssay <- function( as.sparse = sparse) counts <- as(object = block, Class = 'dgCMatrix') - cell.attr.object <- cell.attr[colnames(x = counts),] + cell.attr.object <- cell.attr[colnames(x = counts),, drop = FALSE] if (!identical(rownames(cell.attr.object), colnames(counts))) { # print(length(setdiff(rownames(cell.attr.object), colnames(counts)))) @@ -1372,10 +1373,8 @@ SCTransform.StdAssay <- function( sct.assay.list[[dataset.names[i]]] <- assay.out } } - # Return array by merging everythin if (length(x = sct.assay.list) > 1){ - merged.assay <- merge(x = sct.assay.list[[1]], y = sct.assay.list[2:length(sct.assay.list)]) # set variable features as the union of the features variable.features <- Reduce(f = union, x = variable.feature.list) From 712dbf3ebcbfb1c42852160002fa99c71dca8d15 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sun, 11 Sep 2022 23:38:25 -0400 Subject: [PATCH 195/979] fix sct key --- R/preprocessing5.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index a1346273a..3596fdb3a 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1350,7 +1350,6 @@ SCTransform.StdAssay <- function( assay.out <- as(object = assay.out, Class = "SCTAssay") #TODO: Add a key to prevent hitting a bug in merge.StdAssay which # does not like character(0) keys being merged - assay.out@key <- "sct" return (assay.out) } From 5a15192dbc5d90f832c026995f39b1c1af7e1a84 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Mon, 12 Sep 2022 13:20:05 -0400 Subject: [PATCH 196/979] Fix assay creation with ref model --- R/preprocessing.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/preprocessing.R b/R/preprocessing.R index b813a15c6..f04fe0ba5 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -1752,6 +1752,10 @@ SCTransform.Assay <- function( if (!is.null(x = seed.use)) { set.seed(seed = seed.use) } + if (!is.null(reference.SCT.model)){ + do.correct.umi <- FALSE + do.center <- FALSE + } umi <- GetAssayData(object = object, slot = 'counts') vst.out <- SCTransform(object = umi, cell.attr = cell.attr, From 74fa1d68f06e8d2c100847e23a42d3cb3ebe8f74 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 13 Sep 2022 17:39:09 -0400 Subject: [PATCH 197/979] fix layer cells --- R/integration.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/integration.R b/R/integration.R index f898308a9..c79ce4084 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1927,7 +1927,7 @@ IntegrateSketchEmbeddings <- function( cells.sketch <- Cells(x = object[[atoms]], layer = atoms.layers[i]) } else if (length(unique(atoms.layers)) == 1) { cells.sketch <- intersect(Cells(x = object[[atoms]][[atoms.layers[[1]]]]), - Cells(object[[orig]][layers[i]])) + Cells(object[[orig]][[layers[i] ]] )) } if (isTRUE(x = verbose)) { message( From bfd76aa0e3a4a859fcb211f95d5fae451574c1a7 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 14 Sep 2022 00:00:26 -0400 Subject: [PATCH 198/979] set back uwot version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 93651d2e1..a95b1d200 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -75,7 +75,7 @@ Imports: tibble, tools, utils, - uwot (>= 0.1.14) + uwot (>= 0.1.10) LinkingTo: Rcpp (>= 0.11.0), RcppEigen, RcppProgress License: MIT + file LICENSE LazyData: true From d8d930706e3e048ab52dba637bdd5415c890128e Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Wed, 14 Sep 2022 03:32:54 -0400 Subject: [PATCH 199/979] Multiple SCT integration --- NAMESPACE | 2 +- R/integration.R | 98 ++++++++---- R/objects.R | 10 +- R/preprocessing5.R | 378 +++++++++++++++++++++++---------------------- 4 files changed, 269 insertions(+), 219 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index e8f7a1d2c..b401ba2ee 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -105,7 +105,6 @@ S3method(SCTResults,SCTModel) S3method(SCTResults,Seurat) S3method(SCTransform,Assay) S3method(SCTransform,Seurat) -S3method(SCTransform,Seurat5) S3method(SCTransform,StdAssay) S3method(SCTransform,default) S3method(ScaleData,Assay) @@ -200,6 +199,7 @@ export(Embeddings) export(ExpMean) export(ExpSD) export(ExpVar) +export(FastRPCAIntegration) export(FastRowScale) export(FeatureLocator) export(FeaturePlot) diff --git a/R/integration.R b/R/integration.R index f898308a9..abc294b92 100644 --- a/R/integration.R +++ b/R/integration.R @@ -782,19 +782,30 @@ FindTransferAnchors <- function( reference.reduction.init <- reference.reduction if (normalization.method == "SCT") { # ensure all residuals required are computed - query <- suppressWarnings(expr = GetResidual(object = query, assay = query.assay, features = features, verbose = FALSE)) + #query <- suppressWarnings(expr = GetResidual(object = query, assay = query.assay, features = features, verbose = FALSE)) + query.sct.scaledata <- suppressWarnings(expr = FetchResiduals(object = query, assay = query.assay, features = features, verbose = FALSE)) if (is.null(x = reference.reduction)) { - reference <- suppressWarnings(expr = GetResidual(object = reference, assay = reference.assay, features = features, verbose = FALSE)) + #reference <- suppressWarnings(expr = GetResidual(object = reference, assay = reference.assay, features = features, verbose = FALSE)) + reference.sct.scaledata <- suppressWarnings(expr = FetchResiduals(object = reference, assay = reference.assay, features = features, verbose = FALSE)) + # features <- intersect( + # x = features, + # y = intersect( + # x = rownames(x = GetAssayData(object = query[[query.assay]], slot = "scale.data")), + # y = rownames(x = GetAssayData(object = reference[[reference.assay]], slot = "scale.data")) + # ) + # ) features <- intersect( x = features, y = intersect( - x = rownames(x = GetAssayData(object = query[[query.assay]], slot = "scale.data")), - y = rownames(x = GetAssayData(object = reference[[reference.assay]], slot = "scale.data")) + x = rownames(x = query.sct.scaledata), + y = rownames(x = reference.sct.scaledata) ) ) + reference[[reference.assay]] <- as( object = CreateAssayObject( - data = GetAssayData(object = reference[[reference.assay]], slot = "scale.data")[features, ]), + #data = GetAssayData(object = reference[[reference.assay]], slot = "scale.data")[features, ]), + data = reference.sct.scaledata[features, ]), Class = "SCTAssay" ) reference <- SetAssayData( @@ -806,7 +817,8 @@ FindTransferAnchors <- function( } query[[query.assay]] <- as( object = CreateAssayObject( - data = GetAssayData(object = query[[query.assay]], slot = "scale.data")[features, ]), + #data = GetAssayData(object = query[[query.assay]], slot = "scale.data")[features, ]), + data = query.sct.scaledata[features, ]), Class = "SCTAssay" ) query <- SetAssayData( @@ -1374,21 +1386,31 @@ IntegrateData <- function( } if (normalization.method == "SCT") { model.list <- list() + scale.data <- list() for (i in 1:length(x = object.list)) { assay <- DefaultAssay(object = object.list[[i]]) if (length(x = setdiff(x = features.to.integrate, y = features)) != 0) { - object.list[[i]] <- GetResidual( + # object.list[[i]] <- GetResidual( + # object = object.list[[i]], + # features = setdiff(x = features.to.integrate, y = features), + # verbose = verbose + # ) + scale.data[[i]] <- FetchResiduals( object = object.list[[i]], features = setdiff(x = features.to.integrate, y = features), verbose = verbose ) } model.list[[i]] <- slot(object = object.list[[i]][[assay]], name = "SCTModel.list") + # object.list[[i]][[assay]] <- suppressWarnings(expr = CreateSCTAssayObject( + # data = GetAssayData( + # object = object.list[[i]], + # assay = assay, + # slot = "scale.data") + # ) + # ) object.list[[i]][[assay]] <- suppressWarnings(expr = CreateSCTAssayObject( - data = GetAssayData( - object = object.list[[i]], - assay = assay, - slot = "scale.data") + data = scale.data[[i]] ) ) } @@ -1938,10 +1960,10 @@ IntegrateSketchEmbeddings <- function( } if (inherits(x = object[[orig]][[layers[i]]], what = 'DelayedMatrix') ) { matrix.prod.function <- crossprod_DelayedAssay - } else { + } else { matrix.prod.function <- crossprod } - + emb <- switch( EXPR = method, 'data' = { @@ -2776,23 +2798,33 @@ PrepSCTIntegration <- function( object.list <- my.lapply( X = 1:length(x = object.list), FUN = function(i) { - obj <- GetResidual( - object = object.list[[i]], + # obj <- GetResidual( + # object = object.list[[i]], + # assay = assay[i], + # features = anchor.features, + # replace.value = ifelse(test = is.null(x = sct.clip.range), yes = FALSE, no = TRUE), + # clip.range = sct.clip.range, + # verbose = FALSE + # ) + # scale.data <- GetAssayData( + # object = obj, + # assay = assay[i], + # slot = 'scale.data' + # ) + obj <- object.list[[i]] + scale.data <- FetchResiduals( + object = obj, assay = assay[i], features = anchor.features, replace.value = ifelse(test = is.null(x = sct.clip.range), yes = FALSE, no = TRUE), clip.range = sct.clip.range, verbose = FALSE ) - scale.data <- GetAssayData( - object = obj, - assay = assay[i], - slot = 'scale.data' - ) + cells <- Cells(x = obj) obj <- SetAssayData( object = obj, slot = 'scale.data', - new.data = scale.data[anchor.features, ], + new.data = scale.data[anchor.features, cells], assay = assay[i] ) return(obj) @@ -5775,24 +5807,24 @@ ProjectCellEmbeddings_DelayedAssay <- function( feature.mean = NULL, feature.sd = NULL ) { - RowMeanSparse <- sparseMatrixStats::rowMeans2 - RowVarSparse <- sparseMatrixStats::rowVars + RowMeanSparse <- sparseMatrixStats::rowMeans2 + RowVarSparse <- sparseMatrixStats::rowVars dims <- dims %||% 1:ncol(reference[[reduction]]) assay <- assay %||% DefaultAssay(reference) - features <- intersect(rownames(query.data), + features <- intersect(rownames(query.data), rownames(reference[[reduction]]@feature.loadings)) query.data <- query.data[features,] - feature.mean <- feature.mean[features] %||% + feature.mean <- feature.mean[features] %||% RowMeanSparse(x = LayerData(object = reference[[assay]], layer = 'data')[features,]) - - feature.sd <- feature.sd[features] %||% - sqrt(RowVarSparse(x = LayerData(object = reference[[assay]], layer = 'data')[features,])) + + feature.sd <- feature.sd[features] %||% + sqrt(RowVarSparse(x = LayerData(object = reference[[assay]], layer = 'data')[features,])) feature.sd <- MinMax(feature.sd, max = max(feature.sd), min = 0.1) - - + + setAutoBlockSize(size = block.size) # 1 GB cells.grid <- DelayedArray::colAutoGrid(x = query.data) - + emb.list <- list() for (i in seq_len(length.out = length(x = cells.grid))) { vp <- cells.grid[[i]] @@ -5877,7 +5909,7 @@ FastRPCAIntegration <- function( return(x) } ) - + anchor <- invoke( .fn = FindIntegrationAnchors, .args = c(list( @@ -5928,7 +5960,7 @@ crossprod_DelayedAssay <- function(x, y, block.size = 1e9) { if (nrow(x) != nrow(y)) { stop('row of x and y should be the same') } - sparse <- DelayedArray::is_sparse(x = y) + sparse <- DelayedArray::is_sparse(x = y) suppressMessages(setAutoBlockSize(size = block.size)) cells.grid <- DelayedArray::colAutoGrid(x = y) product.list <- list() diff --git a/R/objects.R b/R/objects.R index 59e0ac525..ba42573dd 100644 --- a/R/objects.R +++ b/R/objects.R @@ -1927,7 +1927,15 @@ merge.SCTAssay <- function( if (inherits(x = assays[[assay]], what = "SCTAssay")) { parent.environ <- sys.frame(which = parent.call[1]) seurat.object <- parent.environ$objects[[assay]] - seurat.object <- suppressWarnings(expr = GetResidual(object = seurat.object, features = all.features, assay = parent.environ$assay, verbose = FALSE)) + #seurat.object <- suppressWarnings(expr = GetResidual(object = seurat.object, features = all.features, assay = parent.environ$assay, verbose = FALSE)) + scale.data <- suppressWarnings(expr = FetchResiduals(object = seurat.object, features = all.features, assay = parent.environ$assay, verbose = FALSE)) + cells <- Cells(x = seurat.object) + seurat.object <- SetAssayData( + object = seurat.object, + slot = 'scale.data', + new.data = scale.data[all.features, cells], + assay = parent.environ$assay + ) return(seurat.object[[parent.environ$assay]]) } return(assays[[assay]]) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 3596fdb3a..2a94de2ff 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -751,84 +751,84 @@ VST.DelayedMatrix <- function( verbose = FALSE, ... ) - # if (margin == 2L) { - # x <- t(x = x) - # } - # mu <- hvf.info$mean[idx] - # hvf.info$variance[idx] <- rowSums(x = ((x - mu) ^ 2) / (ncells - 1L)) - # # hvf.info$variance[idx] <- vapply( - # # X = seq_along(along.with = mu), - # # FUN = function(j) { - # # y <- if (margin == 1L) { - # # x[j, ] - # # } else { - # # x[, j] - # # } - # # y <- y - mu[j] - # # return(sum(y ^ 2) / (ncells - 1L)) - # # }, - # # FUN.VALUE = numeric(length = 1L) - # # ) - # if (isTRUE(x = verbose)) { - # setTxtProgressBar(pb = pb, value = i / length(x = grid)) - # } - # } - # if (isTRUE(x = verbose)) { - # close(con = pb) - # } - # hvf.info$variance.expected <- 0 - # not.const <- hvf.info$variance > 0 - # fit <- loess( - # formula = log10(x = variance) ~ log10(x = mean), - # data = hvf.info[not.const, , drop = FALSE], - # span = span - # ) - # hvf.info$variance.expected[not.const] <- 10 ^ fit$fitted - # # Calculate standardized variance - # hvf.info$variance.standardized <- NA_real_ - # if (isTRUE(x = verbose)) { - # inform( - # message = "Calculating feature variances of standardized and clipped values" - # ) - # pb <- txtProgressBar(style = 3L, file = stderr()) - # } - # clip <- clip %||% sqrt(x = ncells) - # for (i in seq_len(length.out = length(x = grid))) { - # vp <- grid[[i]] - # idx <- seq.int( - # from = IRanges::start(x = slot(object = vp, name = 'ranges')[margin]), - # to = IRanges::end(x = slot(object = vp, name = 'ranges')[margin]) - # ) - # x <- DelayedArray::read_block(x = data, viewport = vp, as.sparse = sparse) - # if (isTRUE(x = sparse)) { - # x <- as(object = x, Class = "CsparseMatrix") - # } - # if (margin == 2L) { - # x <- t(x = x) - # } - # mu <- hvf.info$mean[idx] - # sd <- sqrt(x = hvf.info$variance.expected[idx]) - # hvf.info$variance.standardized[idx] <- 0 - # sdn <- which(x = sd != 0) - # hvf.info$variance.standardized[idx[sdn]] <- rowSums(x = (((x[sdn, ] - mu[sdn]) / sd[sdn]) ^ 2) / (ncells - 1L)) - # # hvf.info$variance.standardized[idx] <- vapply( - # # X = seq_along(along.with = mu), - # # FUN = function(j) { - # # if (sd[j] == 0) { - # # return(0) - # # } - # # y <- if (margin == 1L) { - # # x[j, ] - # # } else { - # # x[, j] - # # } - # # y <- y - mu[j] - # # y <- y / sd[j] - # # y[y > clip] <- clip - # # return(sum(y ^ 2) / (ncells - 1L)) - # # }, - # # FUN.VALUE = numeric(length = 1L) - # # ) + # if (margin == 2L) { + # x <- t(x = x) + # } + # mu <- hvf.info$mean[idx] + # hvf.info$variance[idx] <- rowSums(x = ((x - mu) ^ 2) / (ncells - 1L)) + # # hvf.info$variance[idx] <- vapply( + # # X = seq_along(along.with = mu), + # # FUN = function(j) { + # # y <- if (margin == 1L) { + # # x[j, ] + # # } else { + # # x[, j] + # # } + # # y <- y - mu[j] + # # return(sum(y ^ 2) / (ncells - 1L)) + # # }, + # # FUN.VALUE = numeric(length = 1L) + # # ) + # if (isTRUE(x = verbose)) { + # setTxtProgressBar(pb = pb, value = i / length(x = grid)) + # } + # } + # if (isTRUE(x = verbose)) { + # close(con = pb) + # } + # hvf.info$variance.expected <- 0 + # not.const <- hvf.info$variance > 0 + # fit <- loess( + # formula = log10(x = variance) ~ log10(x = mean), + # data = hvf.info[not.const, , drop = FALSE], + # span = span + # ) + # hvf.info$variance.expected[not.const] <- 10 ^ fit$fitted + # # Calculate standardized variance + # hvf.info$variance.standardized <- NA_real_ + # if (isTRUE(x = verbose)) { + # inform( + # message = "Calculating feature variances of standardized and clipped values" + # ) + # pb <- txtProgressBar(style = 3L, file = stderr()) + # } + # clip <- clip %||% sqrt(x = ncells) + # for (i in seq_len(length.out = length(x = grid))) { + # vp <- grid[[i]] + # idx <- seq.int( + # from = IRanges::start(x = slot(object = vp, name = 'ranges')[margin]), + # to = IRanges::end(x = slot(object = vp, name = 'ranges')[margin]) + # ) + # x <- DelayedArray::read_block(x = data, viewport = vp, as.sparse = sparse) + # if (isTRUE(x = sparse)) { + # x <- as(object = x, Class = "CsparseMatrix") + # } + # if (margin == 2L) { + # x <- t(x = x) + # } + # mu <- hvf.info$mean[idx] + # sd <- sqrt(x = hvf.info$variance.expected[idx]) + # hvf.info$variance.standardized[idx] <- 0 + # sdn <- which(x = sd != 0) + # hvf.info$variance.standardized[idx[sdn]] <- rowSums(x = (((x[sdn, ] - mu[sdn]) / sd[sdn]) ^ 2) / (ncells - 1L)) + # # hvf.info$variance.standardized[idx] <- vapply( + # # X = seq_along(along.with = mu), + # # FUN = function(j) { + # # if (sd[j] == 0) { + # # return(0) + # # } + # # y <- if (margin == 1L) { + # # x[j, ] + # # } else { + # # x[, j] + # # } + # # y <- y - mu[j] + # # y <- y / sd[j] + # # y[y > clip] <- clip + # # return(sum(y ^ 2) / (ncells - 1L)) + # # }, + # # FUN.VALUE = numeric(length = 1L) + # # ) if (isTRUE(x = verbose)) { setTxtProgressBar(pb = pb, value = i / length(x = grid)) } @@ -1231,26 +1231,25 @@ VST.matrix <- function( #' @export #' SCTransform.StdAssay <- function( - object, - layer = 'counts', - cell.attr = NULL, - reference.SCT.model = NULL, - do.correct.umi = TRUE, - ncells = 5000, - residual.features = NULL, - variable.features.n = 3000, - variable.features.rv.th = 1.3, - vars.to.regress = NULL, - do.scale = FALSE, - do.center = TRUE, - clip.range = c(-sqrt(x = ncol(x = object) / 30), sqrt(x = ncol(x = object) / 30)), - conserve.memory = FALSE, - return.only.var.genes = TRUE, - seed.use = 1448145, - verbose = TRUE, - ... + object, + layer = 'counts', + cell.attr = NULL, + reference.SCT.model = NULL, + do.correct.umi = TRUE, + ncells = 5000, + residual.features = NULL, + variable.features.n = 3000, + variable.features.rv.th = 1.3, + vars.to.regress = NULL, + do.scale = FALSE, + do.center = TRUE, + clip.range = c(-sqrt(x = ncol(x = object) / 30), sqrt(x = ncol(x = object) / 30)), + conserve.memory = FALSE, + return.only.var.genes = TRUE, + seed.use = 1448145, + verbose = TRUE, + ... ) { - if (!is.null(reference.SCT.model)){ do.correct.umi <- FALSE do.center <- FALSE @@ -1264,27 +1263,28 @@ SCTransform.StdAssay <- function( if (isTRUE(x = verbose)) { message("Running SCTransform on layer: ", l) } + all_cells <- Cells(x = object, layer = l) + all_features <- Features(x = object, layer = l) counts <- LayerData( object = object, layer = l, - features = Features(x = object, layer = l), - cells = Cells(x = object, layer = l) - ) + features = all_features, + cells = all_cells + ) ## Sample cells cells.grid <- DelayedArray::colAutoGrid(x = counts, ncol = min(ncells, ncol(counts))) - # if there is no reference model we randomly select a subset of cells # TODO: randomize this set of cells variable.feature.list <- list() - GetSCT.Chunked <- function(vp){ + GetSCT.Chunked <- function(vp, reference.SCT.model = NULL, do.correct.umi = TRUE){ sparse <- DelayedArray::is_sparse(x = counts) # TRUE block <- DelayedArray::read_block(x = counts, viewport = vp, as.sparse = sparse) counts <- as(object = block, Class = 'dgCMatrix') - cell.attr.object <- cell.attr[colnames(x = counts),, drop = FALSE] + cell.attr.object <- cell.attr[colnames(x = counts),, drop=FALSE] if (!identical(rownames(cell.attr.object), colnames(counts))) { # print(length(setdiff(rownames(cell.attr.object), colnames(counts)))) @@ -1316,13 +1316,8 @@ SCTransform.StdAssay <- function( if (verbose) { message('Place corrected count matrix in counts slot') } - if (is.null(reference.SCT.model)){ - assay.out <- CreateAssayObject(counts = vst.out$umi_corrected) - vst.out$umi_corrected <- NULL - } else { - assay.out <- CreateAssayObject(counts = counts) - } - + assay.out <- CreateAssayObject(counts = vst.out$umi_corrected) + vst.out$umi_corrected <- NULL } else { # TODO: restore once check.matrix is in SeuratObject # assay.out <- CreateAssayObject(counts = umi, check.matrix = FALSE) @@ -1350,102 +1345,118 @@ SCTransform.StdAssay <- function( assay.out <- as(object = assay.out, Class = "SCTAssay") #TODO: Add a key to prevent hitting a bug in merge.StdAssay which # does not like character(0) keys being merged + assay.out@key <- "sct" return (assay.out) } - + local.reference.SCT.model <- NULL if (is.null(reference.SCT.model)){ # No reference model so just select the first block of cells vp <- cells.grid[[1L]] - assay.out <- GetSCT.Chunked(vp = vp) - variable.feature.list[[dataset.names[i]]] <- VariableFeatures(assay.out) - sct.assay.list[[dataset.names[i]]] <- assay.out + assay.out <- GetSCT.Chunked(vp = vp, do.correct.umi = FALSE) + local.reference.SCT.model <- assay.out@SCTModel.list[[1]] + variable.features <- VariableFeatures(assay.out) + #residuals <- FetchResiduals(object, features = VariableFeatures(assay.out), assay = ) + # once we have the model, just calculate residuals for all + # cells + vst_out.reference <- SCTModel_to_vst(SCTModel = local.reference.SCT.model) + min_var <- vst_out.reference$arguments$min_variance + if (min_var == "umi_median"){ + block <- DelayedArray::read_block(x = counts, + viewport = vp, + as.sparse = TRUE) + + counts.x <- as(object = block, Class = 'dgCMatrix') + min_var <- (median(counts.x@x)/5)^2 + } + res_clip_range <- vst_out.reference$arguments$res_clip_range + residuals <- list() + corrected_counts <- list() + cell_attrs <- list() + for (i in seq_len(length.out = length(x = cells.grid))) { + vp <- cells.grid[[i]] + block <- DelayedArray::read_block(x = counts, + viewport = vp, + as.sparse = TRUE) + + counts.vp <- as(object = block, Class = 'dgCMatrix') + cell.attr.object <- cell.attr[colnames(x = counts.vp),, drop=FALSE] + vst_out <- vst_out.reference + cell_attr <- data.frame( + umi = colSums(counts.vp), + log_umi = log10(x = colSums(counts.vp)) + ) + rownames(cell_attr) <- colnames(counts.vp) + vst_out$cell_attr <- cell_attr + + new_residual <- get_residuals( + vst_out = vst_out, + umi = counts.vp[all_features,], + residual_type = "pearson", + min_variance = min_var, + res_clip_range = res_clip_range, + verbosity = as.numeric(x = verbose) * 2 + ) + corrected_counts[[i]] <- correct_counts( + x = vst_out, + umi = counts.vp[all_features,], + verbosity = as.numeric(x = verbose) * 2 + ) + residuals[[i]] <- new_residual + cell_attrs[[i]] <- cell_attr + } + + + new.residuals <- Reduce(cbind, residuals) + corrected_counts <- Reduce(cbind, corrected_counts) + cell_attrs <- Reduce(rbind, cell_attrs) + + vst_out.reference$cell_attr <- cell_attrs[colnames(new.residuals),] + SCTModel.list <- PrepVSTResults( + vst.res = vst_out.reference, + cell.names = all_cells + ) + SCTModel.list <- list(model1 = SCTModel.list) + + merged.assay <- CreateSCTAssayObject(scale.data = new.residuals, data = corrected_counts, SCTModel.list=SCTModel.list) + VariableFeatures(merged.assay) <- variable.features } else { sct.assay.list.temp <- list() for (i in seq_len(length.out = length(x = cells.grid))) { vp <- cells.grid[[i]] - assay.out <- GetSCT.Chunked(vp = vp) + assay.out <- GetSCT.Chunked(vp = vp, + reference.SCT.model = reference.SCT.model %||% local.reference.SCT.model, + do.correct.umi = do.correct.umi) sct.assay.list.temp[[paste0("chunk", i)]] <- assay.out - } + } if (length(sct.assay.list.temp)>1){ # this currently fails in merge.StdAssay step # assignment of an object of class “list” is not valid for # slot ‘key’ in an object of class “Assay”; is(value, "character") is not TRUE assay.out <- merge(x = sct.assay.list.temp[[1]], y = sct.assay.list.temp[2:length(sct.assay.list.temp)]) - + } else { + assay.out <- sct.assay.list.temp[[1]] + } + sct.assay.list[[dataset.names[i]]] <- assay.out + # Return array by merging everythin + if (length(x = sct.assay.list) > 1){ + merged.assay <- merge(x = sct.assay.list[[1]], y = sct.assay.list[2:length(sct.assay.list)]) + # set variable features as the union of the features + variable.features <- Reduce(f = union, x = variable.feature.list) + VariableFeatures(object = merged.assay) <- variable.features + # set the names of SCTmodels to be layer names + models <- slot(object = merged.assay, name="SCTModel.list") + names(models) <- names(x = sct.assay.list) + slot(object = merged.assay, name="SCTModel.list") <- models } else { - assay.out <- sct.assay.list.temp[[1]] + merged.assay <- sct.assay.list[[1]] } - sct.assay.list[[dataset.names[i]]] <- assay.out } } - # Return array by merging everythin - if (length(x = sct.assay.list) > 1){ - merged.assay <- merge(x = sct.assay.list[[1]], y = sct.assay.list[2:length(sct.assay.list)]) - # set variable features as the union of the features - variable.features <- Reduce(f = union, x = variable.feature.list) - VariableFeatures(object = merged.assay) <- variable.features - # set the names of SCTmodels to be layer names - models <- slot(object = merged.assay, name="SCTModel.list") - names(models) <- names(x = sct.assay.list) - slot(object = merged.assay, name="SCTModel.list") <- models - } else { - return (sct.assay.list[[1]]) - } gc(verbose = FALSE) return(merged.assay) } -#' @importFrom SeuratObject DefaultAssay -#' -#' @method SCTransform Seurat5 -#' @export -#' -SCTransform.Seurat5 <- function( - object, - assay = NULL, - reference.SCT.model = NULL, - do.correct.umi = TRUE, - ncells = 5000, - residual.features = NULL, - variable.features.n = 3000, - variable.features.rv.th = 1.3, - vars.to.regress = NULL, - do.scale = FALSE, - do.center = TRUE, - clip.range = c(-sqrt(x = ncol(x = object) / 30), sqrt(x = ncol(x = object) / 30)), - conserve.memory = FALSE, - return.only.var.genes = TRUE, - seed.use = 1448145, - save.data = 'data', - save.scaledata = 'scale.data', - verbose = TRUE, - ... -) { - assay <- assay[1L] %||% DefaultAssay(object = object) - assay <- match.arg(arg = assay, choices = Assays(object = object)) - cell.attr.list <- slot(object = object, name = 'meta.data') - - object[[assay]] <- SCTransform(object = object[[assay]], - cell.attr.list = cell.attr.list, - reference.SCT.model = reference.SCT.model, - do.correct.umi = do.correct.umi, - ncells = ncells, - residual.features = residual.features, - variable.features.n = variable.features.n, - variable.features.rv.th = variable.features.rv.th, - vars.to.regress = vars.to.regress, - do.scale = do.scale, - do.center = do.center, - clip.range = clip.range, - conserve.memory = conserve.memory, - return.only.var.genes = return.only.var.genes, - seed.use = seed.use, - verbose = verbose, - ...) - return(object) -} - #' Calculate pearson residuals of features not in the scale.data #' @@ -1718,7 +1729,7 @@ FetchResidualSCTModel <- function(object, ) # iterate over 2k cells at once - cells.grid <- DelayedArray::colAutoGrid(x = counts, ncol = 2000) + cells.grid <- DelayedArray::colAutoGrid(x = counts, ncol = min(2000, length(x = layer.cells))) new_residuals <- list() # cat(dim(counts)) @@ -1744,7 +1755,6 @@ FetchResidualSCTModel <- function(object, umi = colSums(umi.all), log_umi = log10(x = colSums(umi.all)) ) - cell_attr$cells_step1 <- FALSE # cell_attr <- as.matrix(x = cell_attr) rownames(cell_attr) <- colnames(umi) if (sct.method == "reference") { From 58989f23f7e216f4e488a1881242a280b4e1ab86 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Wed, 14 Sep 2022 10:32:51 -0400 Subject: [PATCH 200/979] Fixes for multiple sct integration --- R/integration.R | 16 ++++++++++++---- R/objects.R | 21 +++++++++++++++------ R/preprocessing5.R | 2 +- 3 files changed, 28 insertions(+), 11 deletions(-) diff --git a/R/integration.R b/R/integration.R index abc294b92..057885b9f 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1368,6 +1368,13 @@ IntegrateData <- function( anchors <- slot(object = anchorset, name = 'anchors') ref <- object.list[reference.datasets] features <- features %||% slot(object = anchorset, name = "anchor.features") + #print(Idents(object.list[[1]])) + #print(Idents(object.list[[2]])) + # for (obj in seq_along(along.with = object.list)){ + # object <- object.list[[obj]] + # idents <- Idents(object = object.list[[obj]]) + # print(all(names(x = idents) == colnames(x = object))) + # } unintegrated <- suppressWarnings(expr = merge( x = object.list[[1]], y = object.list[2:length(x = object.list)] @@ -1400,6 +1407,8 @@ IntegrateData <- function( features = setdiff(x = features.to.integrate, y = features), verbose = verbose ) + } else { + scale.data[[i]] <- GetAssayData(object = object.list[[i]], assay = assay, slot = "scale.data") } model.list[[i]] <- slot(object = object.list[[i]][[assay]], name = "SCTModel.list") # object.list[[i]][[assay]] <- suppressWarnings(expr = CreateSCTAssayObject( @@ -1409,15 +1418,13 @@ IntegrateData <- function( # slot = "scale.data") # ) # ) - object.list[[i]][[assay]] <- suppressWarnings(expr = CreateSCTAssayObject( - data = scale.data[[i]] - ) - ) + object.list[[i]][[assay]] <- suppressWarnings(expr = CreateSCTAssayObject(data = scale.data[[i]], scale.data = scale.data[[i]])) } model.list <- unlist(x = model.list) slot(object = anchorset, name = "object.list") <- object.list } # perform pairwise integration of reference objects + #browser() reference.integrated <- PairwiseIntegrateReference( anchorset = anchorset, new.assay.name = new.assay.name, @@ -4457,6 +4464,7 @@ PairwiseIntegrateReference <- function( for (ii in 1:length(x = object.list)) { cellnames.list[[ii]] <- colnames(x = object.list[[ii]]) } + print(object.list[[reference.objects[[1]]]]) unintegrated <- suppressWarnings(expr = merge( x = object.list[[reference.objects[[1]]]], y = object.list[reference.objects[2:length(x = reference.objects)]] diff --git a/R/objects.R b/R/objects.R index ba42573dd..4feb52f7e 100644 --- a/R/objects.R +++ b/R/objects.R @@ -1930,12 +1930,21 @@ merge.SCTAssay <- function( #seurat.object <- suppressWarnings(expr = GetResidual(object = seurat.object, features = all.features, assay = parent.environ$assay, verbose = FALSE)) scale.data <- suppressWarnings(expr = FetchResiduals(object = seurat.object, features = all.features, assay = parent.environ$assay, verbose = FALSE)) cells <- Cells(x = seurat.object) - seurat.object <- SetAssayData( - object = seurat.object, - slot = 'scale.data', - new.data = scale.data[all.features, cells], - assay = parent.environ$assay - ) + obj.models <- seurat.object[[parent.environ$assay]]@SCTModel.list + obj.data <- GetAssayData(object = seurat.object, assay = parent.environ$assay, slot = "data") + seurat.object[[parent.environ$assay]] <- CreateSCTAssayObject(scale.data = scale.data, data = obj.data, SCTModel.list = obj.models) + # seurat.object <- SetAssayData( + # object = seurat.object, + # slot = 'scale.data', + # new.data = scale.data[all.features, cells], + # assay = parent.environ$assay + # ) + # seurat.object <- SetAssayData( + # object = seurat.object, + # slot = 'data', + # new.data = scale.data[all.features, cells], + # assay = parent.environ$assay + # ) return(seurat.object[[parent.environ$assay]]) } return(assays[[assay]]) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 2a94de2ff..4087abce3 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1495,7 +1495,7 @@ SCTransform.StdAssay <- function( #' FetchResiduals <- function(object, features, - assay = NULL, + assay = "SCT", umi.assay = "RNA", layer = "counts", clip.range = NULL, From 17813717f9c653135839a794ac0811c21efadddc Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Wed, 14 Sep 2022 15:02:23 -0400 Subject: [PATCH 201/979] Handle V5 assays inside GetResidual --- R/integration.R | 42 ++++++++++++++++++------------------------ R/objects.R | 12 ++++++------ R/preprocessing.R | 46 +++++++++++++++++++++++++++++++--------------- R/preprocessing5.R | 2 +- 4 files changed, 56 insertions(+), 46 deletions(-) diff --git a/R/integration.R b/R/integration.R index 057885b9f..057bf1e69 100644 --- a/R/integration.R +++ b/R/integration.R @@ -782,30 +782,30 @@ FindTransferAnchors <- function( reference.reduction.init <- reference.reduction if (normalization.method == "SCT") { # ensure all residuals required are computed - #query <- suppressWarnings(expr = GetResidual(object = query, assay = query.assay, features = features, verbose = FALSE)) - query.sct.scaledata <- suppressWarnings(expr = FetchResiduals(object = query, assay = query.assay, features = features, verbose = FALSE)) + query <- suppressWarnings(expr = GetResidual(object = query, assay = query.assay, features = features, verbose = FALSE)) + #query.sct.scaledata <- suppressWarnings(expr = FetchResiduals(object = query, assay = query.assay, features = features, verbose = FALSE)) if (is.null(x = reference.reduction)) { - #reference <- suppressWarnings(expr = GetResidual(object = reference, assay = reference.assay, features = features, verbose = FALSE)) - reference.sct.scaledata <- suppressWarnings(expr = FetchResiduals(object = reference, assay = reference.assay, features = features, verbose = FALSE)) - # features <- intersect( - # x = features, - # y = intersect( - # x = rownames(x = GetAssayData(object = query[[query.assay]], slot = "scale.data")), - # y = rownames(x = GetAssayData(object = reference[[reference.assay]], slot = "scale.data")) - # ) - # ) + reference <- suppressWarnings(expr = GetResidual(object = reference, assay = reference.assay, features = features, verbose = FALSE)) + #reference.sct.scaledata <- suppressWarnings(expr = FetchResiduals(object = reference, assay = reference.assay, features = features, verbose = FALSE)) features <- intersect( x = features, y = intersect( - x = rownames(x = query.sct.scaledata), - y = rownames(x = reference.sct.scaledata) + x = rownames(x = GetAssayData(object = query[[query.assay]], slot = "scale.data")), + y = rownames(x = GetAssayData(object = reference[[reference.assay]], slot = "scale.data")) ) ) + # features <- intersect( + # x = features, + # y = intersect( + # x = rownames(x = query.sct.scaledata), + # y = rownames(x = reference.sct.scaledata) + # ) + # ) reference[[reference.assay]] <- as( object = CreateAssayObject( - #data = GetAssayData(object = reference[[reference.assay]], slot = "scale.data")[features, ]), - data = reference.sct.scaledata[features, ]), + data = GetAssayData(object = reference[[reference.assay]], slot = "scale.data")[features, ]), + #data = reference.sct.scaledata[features, ]), Class = "SCTAssay" ) reference <- SetAssayData( @@ -817,8 +817,8 @@ FindTransferAnchors <- function( } query[[query.assay]] <- as( object = CreateAssayObject( - #data = GetAssayData(object = query[[query.assay]], slot = "scale.data")[features, ]), - data = query.sct.scaledata[features, ]), + data = GetAssayData(object = query[[query.assay]], slot = "scale.data")[features, ]), + #data = query.sct.scaledata[features, ]), Class = "SCTAssay" ) query <- SetAssayData( @@ -1368,13 +1368,7 @@ IntegrateData <- function( anchors <- slot(object = anchorset, name = 'anchors') ref <- object.list[reference.datasets] features <- features %||% slot(object = anchorset, name = "anchor.features") - #print(Idents(object.list[[1]])) - #print(Idents(object.list[[2]])) - # for (obj in seq_along(along.with = object.list)){ - # object <- object.list[[obj]] - # idents <- Idents(object = object.list[[obj]]) - # print(all(names(x = idents) == colnames(x = object))) - # } + unintegrated <- suppressWarnings(expr = merge( x = object.list[[1]], y = object.list[2:length(x = object.list)] diff --git a/R/objects.R b/R/objects.R index 4feb52f7e..f36aeb8c1 100644 --- a/R/objects.R +++ b/R/objects.R @@ -1927,12 +1927,12 @@ merge.SCTAssay <- function( if (inherits(x = assays[[assay]], what = "SCTAssay")) { parent.environ <- sys.frame(which = parent.call[1]) seurat.object <- parent.environ$objects[[assay]] - #seurat.object <- suppressWarnings(expr = GetResidual(object = seurat.object, features = all.features, assay = parent.environ$assay, verbose = FALSE)) - scale.data <- suppressWarnings(expr = FetchResiduals(object = seurat.object, features = all.features, assay = parent.environ$assay, verbose = FALSE)) - cells <- Cells(x = seurat.object) - obj.models <- seurat.object[[parent.environ$assay]]@SCTModel.list - obj.data <- GetAssayData(object = seurat.object, assay = parent.environ$assay, slot = "data") - seurat.object[[parent.environ$assay]] <- CreateSCTAssayObject(scale.data = scale.data, data = obj.data, SCTModel.list = obj.models) + seurat.object <- suppressWarnings(expr = GetResidual(object = seurat.object, features = all.features, assay = parent.environ$assay, verbose = FALSE)) + #scale.data <- suppressWarnings(expr = FetchResiduals(object = seurat.object, features = all.features, assay = parent.environ$assay, verbose = FALSE)) + #cells <- Cells(x = seurat.object) + #obj.models <- seurat.object[[parent.environ$assay]]@SCTModel.list + #obj.data <- GetAssayData(object = seurat.object, assay = parent.environ$assay, slot = "data") + ##seurat.object[[parent.environ$assay]] <- CreateSCTAssayObject(scale.data = scale.data, data = obj.data, SCTModel.list = obj.models) # seurat.object <- SetAssayData( # object = seurat.object, # slot = 'scale.data', diff --git a/R/preprocessing.R b/R/preprocessing.R index f04fe0ba5..8ef93d4c4 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -365,7 +365,7 @@ GetResidual <- function( object, features, assay = NULL, - umi.assay = NULL, + umi.assay = "RNA", clip.range = NULL, replace.value = FALSE, na.rm = TRUE, @@ -410,20 +410,36 @@ GetResidual <- function( if (length(x = sct.models) > 1 & verbose) { message("This SCTAssay contains multiple SCT models. Computing residuals for cells using") } - new.residuals <- lapply( - X = sct.models, - FUN = function(x) { - GetResidualSCTModel( - object = object, - assay = assay, - SCTModel = x, - new_features = features, - replace.value = replace.value, - clip.range = clip.range, - verbose = verbose - ) - } - ) + if (class(x = object[[umi.assay]]) == "Assay"){ + new.residuals <- lapply( + X = sct.models, + FUN = function(x) { + GetResidualSCTModel( + object = object, + assay = assay, + SCTModel = x, + new_features = features, + replace.value = replace.value, + clip.range = clip.range, + verbose = verbose + ) + } + ) + } else if (class(x = object[[umi.assay]]) == "Assay5"){ + new.residuals <- lapply( + X = sct.models, + FUN = function(x) { + FetchResidualSCTModel(object = object, + umi.assay = umi.assay, + SCTModel = x, + new_features = features, + replace.value = replace.value, + clip.range = clip.range, + verbose = verbose) + } + ) + } + existing.data <- GetAssayData(object = object, slot = 'scale.data', assay = assay) all.features <- union(x = rownames(x = existing.data), y = features) new.scale <- matrix( diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 4087abce3..ba1e76f4d 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1658,7 +1658,7 @@ FetchResidualSCTModel <- function(object, model.cells <- Cells(x = slot(object = object[[assay]], name = "SCTModel.list")[[SCTModel]]) sct.method <- SCTResults(object = object[[assay]], slot = "arguments", model = SCTModel)$sct.method %||% "default" - + layer.cells <- layer.cells %||% Cells(x = object[[umi.assay]], layer = layer) if (!is.null(reference.SCT.model)) { # use reference SCT model sct.method <- "reference" From c2d9600b7b1e20f286a98e9ae0466f452167a880 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Wed, 14 Sep 2022 15:21:51 -0400 Subject: [PATCH 202/979] Cleanup --- R/integration.R | 70 +++++++++++----------------------------------- R/objects.R | 17 ----------- R/preprocessing5.R | 5 ++-- 3 files changed, 18 insertions(+), 74 deletions(-) diff --git a/R/integration.R b/R/integration.R index 057bf1e69..84fbc5577 100644 --- a/R/integration.R +++ b/R/integration.R @@ -783,10 +783,8 @@ FindTransferAnchors <- function( if (normalization.method == "SCT") { # ensure all residuals required are computed query <- suppressWarnings(expr = GetResidual(object = query, assay = query.assay, features = features, verbose = FALSE)) - #query.sct.scaledata <- suppressWarnings(expr = FetchResiduals(object = query, assay = query.assay, features = features, verbose = FALSE)) if (is.null(x = reference.reduction)) { reference <- suppressWarnings(expr = GetResidual(object = reference, assay = reference.assay, features = features, verbose = FALSE)) - #reference.sct.scaledata <- suppressWarnings(expr = FetchResiduals(object = reference, assay = reference.assay, features = features, verbose = FALSE)) features <- intersect( x = features, y = intersect( @@ -794,18 +792,9 @@ FindTransferAnchors <- function( y = rownames(x = GetAssayData(object = reference[[reference.assay]], slot = "scale.data")) ) ) - # features <- intersect( - # x = features, - # y = intersect( - # x = rownames(x = query.sct.scaledata), - # y = rownames(x = reference.sct.scaledata) - # ) - # ) - reference[[reference.assay]] <- as( object = CreateAssayObject( data = GetAssayData(object = reference[[reference.assay]], slot = "scale.data")[features, ]), - #data = reference.sct.scaledata[features, ]), Class = "SCTAssay" ) reference <- SetAssayData( @@ -818,7 +807,6 @@ FindTransferAnchors <- function( query[[query.assay]] <- as( object = CreateAssayObject( data = GetAssayData(object = query[[query.assay]], slot = "scale.data")[features, ]), - #data = query.sct.scaledata[features, ]), Class = "SCTAssay" ) query <- SetAssayData( @@ -1391,34 +1379,25 @@ IntegrateData <- function( for (i in 1:length(x = object.list)) { assay <- DefaultAssay(object = object.list[[i]]) if (length(x = setdiff(x = features.to.integrate, y = features)) != 0) { - # object.list[[i]] <- GetResidual( - # object = object.list[[i]], - # features = setdiff(x = features.to.integrate, y = features), - # verbose = verbose - # ) - scale.data[[i]] <- FetchResiduals( - object = object.list[[i]], - features = setdiff(x = features.to.integrate, y = features), - verbose = verbose + object.list[[i]] <- GetResidual( + object = object.list[[i]], + features = setdiff(x = features.to.integrate, y = features), + verbose = verbose ) - } else { - scale.data[[i]] <- GetAssayData(object = object.list[[i]], assay = assay, slot = "scale.data") } model.list[[i]] <- slot(object = object.list[[i]][[assay]], name = "SCTModel.list") - # object.list[[i]][[assay]] <- suppressWarnings(expr = CreateSCTAssayObject( - # data = GetAssayData( - # object = object.list[[i]], - # assay = assay, - # slot = "scale.data") - # ) - # ) - object.list[[i]][[assay]] <- suppressWarnings(expr = CreateSCTAssayObject(data = scale.data[[i]], scale.data = scale.data[[i]])) + object.list[[i]][[assay]] <- suppressWarnings(expr = CreateSCTAssayObject( + data = GetAssayData( + object = object.list[[i]], + assay = assay, + slot = "scale.data") + ) + ) } model.list <- unlist(x = model.list) slot(object = anchorset, name = "object.list") <- object.list } # perform pairwise integration of reference objects - #browser() reference.integrated <- PairwiseIntegrateReference( anchorset = anchorset, new.assay.name = new.assay.name, @@ -2799,34 +2778,18 @@ PrepSCTIntegration <- function( object.list <- my.lapply( X = 1:length(x = object.list), FUN = function(i) { - # obj <- GetResidual( - # object = object.list[[i]], - # assay = assay[i], - # features = anchor.features, - # replace.value = ifelse(test = is.null(x = sct.clip.range), yes = FALSE, no = TRUE), - # clip.range = sct.clip.range, - # verbose = FALSE - # ) - # scale.data <- GetAssayData( - # object = obj, - # assay = assay[i], - # slot = 'scale.data' - # ) - obj <- object.list[[i]] - scale.data <- FetchResiduals( - object = obj, + obj <- GetResidual( + object = object.list[[i]], assay = assay[i], features = anchor.features, replace.value = ifelse(test = is.null(x = sct.clip.range), yes = FALSE, no = TRUE), clip.range = sct.clip.range, verbose = FALSE ) - cells <- Cells(x = obj) - obj <- SetAssayData( + scale.data <- GetAssayData( object = obj, - slot = 'scale.data', - new.data = scale.data[anchor.features, cells], - assay = assay[i] + assay = assay[i], + slot = 'scale.data' ) return(obj) } @@ -4458,7 +4421,6 @@ PairwiseIntegrateReference <- function( for (ii in 1:length(x = object.list)) { cellnames.list[[ii]] <- colnames(x = object.list[[ii]]) } - print(object.list[[reference.objects[[1]]]]) unintegrated <- suppressWarnings(expr = merge( x = object.list[[reference.objects[[1]]]], y = object.list[reference.objects[2:length(x = reference.objects)]] diff --git a/R/objects.R b/R/objects.R index f36aeb8c1..59e0ac525 100644 --- a/R/objects.R +++ b/R/objects.R @@ -1928,23 +1928,6 @@ merge.SCTAssay <- function( parent.environ <- sys.frame(which = parent.call[1]) seurat.object <- parent.environ$objects[[assay]] seurat.object <- suppressWarnings(expr = GetResidual(object = seurat.object, features = all.features, assay = parent.environ$assay, verbose = FALSE)) - #scale.data <- suppressWarnings(expr = FetchResiduals(object = seurat.object, features = all.features, assay = parent.environ$assay, verbose = FALSE)) - #cells <- Cells(x = seurat.object) - #obj.models <- seurat.object[[parent.environ$assay]]@SCTModel.list - #obj.data <- GetAssayData(object = seurat.object, assay = parent.environ$assay, slot = "data") - ##seurat.object[[parent.environ$assay]] <- CreateSCTAssayObject(scale.data = scale.data, data = obj.data, SCTModel.list = obj.models) - # seurat.object <- SetAssayData( - # object = seurat.object, - # slot = 'scale.data', - # new.data = scale.data[all.features, cells], - # assay = parent.environ$assay - # ) - # seurat.object <- SetAssayData( - # object = seurat.object, - # slot = 'data', - # new.data = scale.data[all.features, cells], - # assay = parent.environ$assay - # ) return(seurat.object[[parent.environ$assay]]) } return(assays[[assay]]) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index ba1e76f4d..ab3be0e61 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1355,7 +1355,6 @@ SCTransform.StdAssay <- function( assay.out <- GetSCT.Chunked(vp = vp, do.correct.umi = FALSE) local.reference.SCT.model <- assay.out@SCTModel.list[[1]] variable.features <- VariableFeatures(assay.out) - #residuals <- FetchResiduals(object, features = VariableFeatures(assay.out), assay = ) # once we have the model, just calculate residuals for all # cells vst_out.reference <- SCTModel_to_vst(SCTModel = local.reference.SCT.model) @@ -1417,14 +1416,14 @@ SCTransform.StdAssay <- function( ) SCTModel.list <- list(model1 = SCTModel.list) - merged.assay <- CreateSCTAssayObject(scale.data = new.residuals, data = corrected_counts, SCTModel.list=SCTModel.list) + merged.assay <- CreateSCTAssayObject(scale.data = new.residuals, data = corrected_counts, SCTModel.list = SCTModel.list) VariableFeatures(merged.assay) <- variable.features } else { sct.assay.list.temp <- list() for (i in seq_len(length.out = length(x = cells.grid))) { vp <- cells.grid[[i]] assay.out <- GetSCT.Chunked(vp = vp, - reference.SCT.model = reference.SCT.model %||% local.reference.SCT.model, + reference.SCT.model = reference.SCT.model, do.correct.umi = do.correct.umi) sct.assay.list.temp[[paste0("chunk", i)]] <- assay.out } From 4a7ee12df3899e850b657ca2b6551c5d299ffe62 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Wed, 14 Sep 2022 15:26:34 -0400 Subject: [PATCH 203/979] Cleanup fixes --- R/integration.R | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/R/integration.R b/R/integration.R index 84fbc5577..4801c0d33 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1375,14 +1375,13 @@ IntegrateData <- function( } if (normalization.method == "SCT") { model.list <- list() - scale.data <- list() for (i in 1:length(x = object.list)) { assay <- DefaultAssay(object = object.list[[i]]) if (length(x = setdiff(x = features.to.integrate, y = features)) != 0) { object.list[[i]] <- GetResidual( - object = object.list[[i]], - features = setdiff(x = features.to.integrate, y = features), - verbose = verbose + object = object.list[[i]], + features = setdiff(x = features.to.integrate, y = features), + verbose = verbose ) } model.list[[i]] <- slot(object = object.list[[i]][[assay]], name = "SCTModel.list") @@ -2791,6 +2790,12 @@ PrepSCTIntegration <- function( assay = assay[i], slot = 'scale.data' ) + obj <- SetAssayData( + object = obj, + slot = 'scale.data', + new.data = scale.data[anchor.features, ], + assay = assay[i] + ) return(obj) } ) From 10712fc86437232692e2fe43e50757c89e2993e3 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Wed, 14 Sep 2022 15:31:38 -0400 Subject: [PATCH 204/979] Remove debug message --- R/preprocessing5.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index ab3be0e61..1942217f4 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1738,13 +1738,13 @@ FetchResidualSCTModel <- function(object, ## TODO: Maybe read only interesting genes umi.all <- as(object = block, Class = "dgCMatrix") - # calcluclate min_variance for get_residuals + # calculate min_variance for get_residuals # required when vst_out$arguments$min_variance == "umi_median" # only calculated once if (i==1){ nz_median <- median(umi.all@x) min_var_custom <- (nz_median / 5)^2 - print(paste("min_var_custom", min_var_custom)) + # print(paste("min_var_custom", min_var_custom)) } umi <- umi.all[features_to_compute, , drop = FALSE] From ce94aaaaaa98c823660d3d00f6d4a0dd3b7a42d0 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 15 Sep 2022 00:38:03 -0400 Subject: [PATCH 205/979] fix find variable genes --- R/integration.R | 3 +-- R/preprocessing.R | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/R/integration.R b/R/integration.R index 3d14f798b..917b4fa29 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5795,8 +5795,7 @@ ProjectCellEmbeddings_DelayedAssay <- function( feature.sd <- feature.sd[features] %||% sqrt(RowVarSparse(x = LayerData(object = reference[[assay]], layer = 'data')[features,])) feature.sd <- MinMax(feature.sd, max = max(feature.sd), min = 0.1) - - + setAutoBlockSize(size = block.size) # 1 GB cells.grid <- DelayedArray::colAutoGrid(x = query.data) diff --git a/R/preprocessing.R b/R/preprocessing.R index ee4f184e1..3edc07dc7 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -2111,7 +2111,7 @@ FindVariableFeatures.Assay <- function( verbose = verbose, ... ) - object[[names(x = hvf.info)]] <- hvf.info + object[names(x = hvf.info)] <- hvf.info hvf.info <- hvf.info[which(x = hvf.info[, 1, drop = TRUE] != 0), ] if (selection.method == "vst") { hvf.info <- hvf.info[order(hvf.info$vst.variance.standardized, decreasing = TRUE), , drop = FALSE] @@ -2142,7 +2142,7 @@ FindVariableFeatures.Assay <- function( no = 'mvp' ) vf.name <- paste0(vf.name, '.variable') - object[[vf.name]] <- rownames(x = object[[]]) %in% top.features + object[vf.name] <- rownames(x = object[]) %in% top.features return(object) } From 30fabde15eb2bfeee873c161e8876711ff018195 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Thu, 15 Sep 2022 11:13:07 -0400 Subject: [PATCH 206/979] Optimize SCT v5 --- R/preprocessing5.R | 143 +++++++++++++++++++++++++++++---------------- 1 file changed, 93 insertions(+), 50 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 1942217f4..ef5d3729a 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1278,7 +1278,7 @@ SCTransform.StdAssay <- function( variable.feature.list <- list() GetSCT.Chunked <- function(vp, reference.SCT.model = NULL, do.correct.umi = TRUE){ - sparse <- DelayedArray::is_sparse(x = counts) # TRUE + sparse <- DelayedArray::is_sparse(x = counts) block <- DelayedArray::read_block(x = counts, viewport = vp, as.sparse = sparse) @@ -1300,8 +1300,8 @@ SCTransform.StdAssay <- function( variable.features.n = variable.features.n, variable.features.rv.th = variable.features.rv.th, vars.to.regress = vars.to.regress, - do.scale = do.scale, - do.center = do.center, + do.scale = FALSE, + do.center = FALSE, clip.range = clip.range, conserve.memory = conserve.memory, return.only.var.genes = return.only.var.genes, @@ -1345,13 +1345,15 @@ SCTransform.StdAssay <- function( assay.out <- as(object = assay.out, Class = "SCTAssay") #TODO: Add a key to prevent hitting a bug in merge.StdAssay which # does not like character(0) keys being merged - assay.out@key <- "sct" + assay.out@key <- "sct_" return (assay.out) } local.reference.SCT.model <- NULL if (is.null(reference.SCT.model)){ - # No reference model so just select the first block of cells - vp <- cells.grid[[1L]] + # No reference model so just select the some block of cells + set.seed(seed = seed.use) + selected.block <- sample(x = seq.int(from = 1, to = length(cells.grid)), size = 1) + vp <- cells.grid[[selected.block]] assay.out <- GetSCT.Chunked(vp = vp, do.correct.umi = FALSE) local.reference.SCT.model <- assay.out@SCTModel.list[[1]] variable.features <- VariableFeatures(assay.out) @@ -1371,53 +1373,75 @@ SCTransform.StdAssay <- function( residuals <- list() corrected_counts <- list() cell_attrs <- list() - for (i in seq_len(length.out = length(x = cells.grid))) { - vp <- cells.grid[[i]] - block <- DelayedArray::read_block(x = counts, - viewport = vp, - as.sparse = TRUE) - - counts.vp <- as(object = block, Class = 'dgCMatrix') - cell.attr.object <- cell.attr[colnames(x = counts.vp),, drop=FALSE] - vst_out <- vst_out.reference - cell_attr <- data.frame( - umi = colSums(counts.vp), - log_umi = log10(x = colSums(counts.vp)) - ) - rownames(cell_attr) <- colnames(counts.vp) - vst_out$cell_attr <- cell_attr - - new_residual <- get_residuals( - vst_out = vst_out, - umi = counts.vp[all_features,], - residual_type = "pearson", - min_variance = min_var, - res_clip_range = res_clip_range, - verbosity = as.numeric(x = verbose) * 2 + if (length(cells.grid)==1){ + merged.assay <- assay.out + corrected_counts[[1]] <- GetAssayData(object = assay.out, slot="data") + residuals[[1]] <- GetAssayData(object = assay.out, slot="scale.data") + cell_attrs[[1]] <- vst_out.reference$cell_attr + } else { + for (i in seq_len(length.out = length(x = cells.grid))) { + vp <- cells.grid[[i]] + print(paste0("chunk ", i)) + block <- DelayedArray::read_block(x = counts, + viewport = vp, + as.sparse = TRUE) + + counts.vp <- as(object = block, Class = 'dgCMatrix') + cell.attr.object <- cell.attr[colnames(x = counts.vp),, drop=FALSE] + vst_out <- vst_out.reference + cell_attr <- data.frame( + umi = colSums(counts.vp), + log_umi = log10(x = colSums(counts.vp)) + ) + rownames(cell_attr) <- colnames(counts.vp) + vst_out$cell_attr <- cell_attr + + new_residual <- get_residuals( + vst_out = vst_out, + umi = counts.vp[all_features,], + residual_type = "pearson", + min_variance = min_var, + res_clip_range = res_clip_range, + verbosity = as.numeric(x = verbose) * 2 + ) + corrected_counts[[i]] <- correct_counts( + x = vst_out, + umi = counts.vp[all_features,], + verbosity = as.numeric(x = verbose) * 2 + ) + residuals[[i]] <- new_residual + cell_attrs[[i]] <- cell_attr + } + new.residuals <- Reduce(cbind, residuals) + corrected_counts <- Reduce(cbind, corrected_counts) + cell_attrs <- Reduce(rbind, cell_attrs) + + vst_out.reference$cell_attr <- cell_attrs[colnames(new.residuals),] + SCTModel.list <- PrepVSTResults( + vst.res = vst_out.reference, + cell.names = all_cells ) - corrected_counts[[i]] <- correct_counts( - x = vst_out, - umi = counts.vp[all_features,], - verbosity = as.numeric(x = verbose) * 2 + SCTModel.list <- list(model1 = SCTModel.list) + + # scale data here as do.center and do.scale are set to FALSE inside + new.residuals <- ScaleData( + new.residuals, + features = NULL, + #vars.to.regress = vars.to.regress, + #latent.data = cell.attr[, vars.to.regress, drop = FALSE], + model.use = 'linear', + use.umi = FALSE, + do.scale = do.scale, + do.center = do.center, + scale.max = Inf, + block.size = 750, + min.cells.to.block = 3000, + verbose = verbose ) - residuals[[i]] <- new_residual - cell_attrs[[i]] <- cell_attr + merged.assay <- CreateSCTAssayObject(scale.data = new.residuals, data = corrected_counts, SCTModel.list = SCTModel.list) + VariableFeatures(merged.assay) <- variable.features } - - new.residuals <- Reduce(cbind, residuals) - corrected_counts <- Reduce(cbind, corrected_counts) - cell_attrs <- Reduce(rbind, cell_attrs) - - vst_out.reference$cell_attr <- cell_attrs[colnames(new.residuals),] - SCTModel.list <- PrepVSTResults( - vst.res = vst_out.reference, - cell.names = all_cells - ) - SCTModel.list <- list(model1 = SCTModel.list) - - merged.assay <- CreateSCTAssayObject(scale.data = new.residuals, data = corrected_counts, SCTModel.list = SCTModel.list) - VariableFeatures(merged.assay) <- variable.features } else { sct.assay.list.temp <- list() for (i in seq_len(length.out = length(x = cells.grid))) { @@ -1433,9 +1457,28 @@ SCTransform.StdAssay <- function( # slot ‘key’ in an object of class “Assay”; is(value, "character") is not TRUE assay.out <- merge(x = sct.assay.list.temp[[1]], y = sct.assay.list.temp[2:length(sct.assay.list.temp)]) + } else { assay.out <- sct.assay.list.temp[[1]] - } + } + + scale.data <- GetAssayData(object = assay.out, slot = "scale.data") + # scale data here as do.center and do.scale are set to FALSE inside + scale.data <- ScaleData( + scale.data, + features = NULL, + #vars.to.regress = vars.to.regress, + #latent.data = cell.attr[, vars.to.regress, drop = FALSE], + model.use = 'linear', + use.umi = FALSE, + do.scale = do.scale, + do.center = do.center, + scale.max = Inf, + block.size = 750, + min.cells.to.block = 3000, + verbose = verbose + ) + assay.out <- SetAssayData(object = assay.out, slot = "scale.data", new.data = scale.data) sct.assay.list[[dataset.names[i]]] <- assay.out # Return array by merging everythin if (length(x = sct.assay.list) > 1){ From 8b072e84848d22a31a414f17dc1313015d766be2 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 15 Sep 2022 14:39:42 -0400 Subject: [PATCH 207/979] fix project bug --- R/integration.R | 17 +++++++---------- R/visualization.R | 3 +++ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/R/integration.R b/R/integration.R index 917b4fa29..f9ccc6ddc 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5782,35 +5782,32 @@ ProjectCellEmbeddings_DelayedAssay <- function( feature.mean = NULL, feature.sd = NULL ) { - RowMeanSparse <- sparseMatrixStats::rowMeans2 - RowVarSparse <- sparseMatrixStats::rowVars dims <- dims %||% 1:ncol(reference[[reduction]]) assay <- assay %||% DefaultAssay(reference) features <- intersect(rownames(query.data), rownames(reference[[reduction]]@feature.loadings)) query.data <- query.data[features,] feature.mean <- feature.mean[features] %||% - RowMeanSparse(x = LayerData(object = reference[[assay]], layer = 'data')[features,]) - + RowMeanSparse(mat = LayerData(object = reference[[assay]], layer = 'data')[features,]) + feature.sd <- feature.sd[features] %||% - sqrt(RowVarSparse(x = LayerData(object = reference[[assay]], layer = 'data')[features,])) + sqrt(RowVarSparse(mat = LayerData(object = reference[[assay]], layer = 'data')[features,])) feature.sd <- MinMax(feature.sd, max = max(feature.sd), min = 0.1) setAutoBlockSize(size = block.size) # 1 GB cells.grid <- DelayedArray::colAutoGrid(x = query.data) - emb.list <- list() for (i in seq_len(length.out = length(x = cells.grid))) { vp <- cells.grid[[i]] data.block <- DelayedArray::read_block(x = query.data, viewport = vp, as.sparse = TRUE) - data.block <- t(apply(data.block, MARGIN = 2, function(x) { + data.block <- apply(data.block, MARGIN = 2, function(x) { x <- (x - feature.mean)/feature.sd return(x) - })) - emb.block <- data.block %*% reference[[reduction]]@feature.loadings[features,] - emb.list[[i]] <- t(emb.block) + }) + emb.block <- t(reference[[reduction]]@feature.loadings[features,dims]) %*% data.block + emb.list[[i]] <- emb.block } # list to matrix, column has to be cells emb.mat <- t(matrix(data = unlist(emb.list), nrow = length(dims) , ncol = ncol(query.data))) diff --git a/R/visualization.R b/R/visualization.R index 20a619ba3..bd6fc8f2c 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -823,6 +823,9 @@ DimPlot <- function( } reduction <- reduction %||% DefaultDimReduc(object = object) # cells <- cells %||% colnames(x = object) + + ##### Cells for all cells in the assay. + #### Cells function should not only get default layer cells <- cells %||% Cells( x = object, assay = DefaultAssay(object = object[[reduction]]) From 8a4a40ea176c9ce0ab3db795eb44f807a39faa10 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 15 Sep 2022 15:20:55 -0400 Subject: [PATCH 208/979] solve conflict --- R/integration.R | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/R/integration.R b/R/integration.R index e69311c8a..6cd772c36 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5783,37 +5783,20 @@ ProjectCellEmbeddings_DelayedAssay <- function( feature.mean = NULL, feature.sd = NULL ) { -<<<<<<< HEAD -======= - RowMeanSparse <- sparseMatrixStats::rowMeans2 - RowVarSparse <- sparseMatrixStats::rowVars ->>>>>>> 0a3e6a971f87cd6913f30ca8ef27cea89c5e0637 + dims <- dims %||% 1:ncol(reference[[reduction]]) assay <- assay %||% DefaultAssay(reference) features <- intersect(rownames(query.data), rownames(reference[[reduction]]@feature.loadings)) query.data <- query.data[features,] -<<<<<<< HEAD + feature.mean <- feature.mean[features] %||% RowMeanSparse(mat = LayerData(object = reference[[assay]], layer = 'data')[features,]) feature.sd <- feature.sd[features] %||% sqrt(RowVarSparse(mat = LayerData(object = reference[[assay]], layer = 'data')[features,])) feature.sd <- MinMax(feature.sd, max = max(feature.sd), min = 0.1) -======= - feature.mean <- feature.mean[features] %||% - RowMeanSparse(x = LayerData(object = reference[[assay]], layer = 'data')[features,]) ->>>>>>> 0a3e6a971f87cd6913f30ca8ef27cea89c5e0637 - - feature.sd <- feature.sd[features] %||% - sqrt(RowVarSparse(x = LayerData(object = reference[[assay]], layer = 'data')[features,])) - feature.sd <- MinMax(feature.sd, max = max(feature.sd), min = 0.1) - setAutoBlockSize(size = block.size) # 1 GB - cells.grid <- DelayedArray::colAutoGrid(x = query.data) -<<<<<<< HEAD -======= - ->>>>>>> 0a3e6a971f87cd6913f30ca8ef27cea89c5e0637 + emb.list <- list() for (i in seq_len(length.out = length(x = cells.grid))) { vp <- cells.grid[[i]] From 5d2c753ecaf54c1b0a62c15b69b861003425fbf9 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 15 Sep 2022 15:25:35 -0400 Subject: [PATCH 209/979] add cells grid --- R/integration.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/integration.R b/R/integration.R index 6cd772c36..8ba2e8532 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5797,6 +5797,8 @@ ProjectCellEmbeddings_DelayedAssay <- function( sqrt(RowVarSparse(mat = LayerData(object = reference[[assay]], layer = 'data')[features,])) feature.sd <- MinMax(feature.sd, max = max(feature.sd), min = 0.1) + suppressMessages(setAutoBlockSize(size = block.size)) + cells.grid <- DelayedArray::colAutoGrid(x = query.data) emb.list <- list() for (i in seq_len(length.out = length(x = cells.grid))) { vp <- cells.grid[[i]] From 1d0c87015c5bce816f5850174b40de0ce2695e3b Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Thu, 15 Sep 2022 18:07:57 -0400 Subject: [PATCH 210/979] Fixes for SCT on multiple layers --- R/differential_expression.R | 14 ++++ R/preprocessing5.R | 133 +++++++++++++++++++++++------------- 2 files changed, 99 insertions(+), 48 deletions(-) diff --git a/R/differential_expression.R b/R/differential_expression.R index a84940545..c9cf884ee 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -2127,6 +2127,20 @@ PrepSCTFindMarkers <- function(object, assay = "SCT", verbose = TRUE) { return(object) } +PrepSCTFindMarkers.V5 <- function(object, assay = "SCT", umi.assay = "RNA", layer = "counts", verbose = TRUE) { + + layers <- Layers(object = object[[umi.assay]], search = layer) + dataset.names <- gsub(pattern = paste0(layer, "."), replacement = "", x = layers) + for (i in seq_along(along.with = layers)) { + l <- layers[i] + counts <- LayerData( + object = object[[umi.assay]], + layer = l + ) + } + cells.grid <- DelayedArray::colAutoGrid(x = counts, ncol = min(ncells, ncol(counts))) +} + # given a UMI count matrix, estimate NB theta parameter for each gene # and use fit of relationship with mean to assign regularized theta to each gene # diff --git a/R/preprocessing5.R b/R/preprocessing5.R index ef5d3729a..c52365581 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1258,8 +1258,8 @@ SCTransform.StdAssay <- function( layers <- Layers(object = object, search = layer) dataset.names <- gsub(pattern = paste0(layer, "."), replacement = "", x = layers) sct.assay.list <- list() - for (i in seq_along(along.with = layers)) { - l <- layers[i] + for (dataset.index in seq_along(along.with = layers)) { + l <- layers[dataset.index] if (isTRUE(x = verbose)) { message("Running SCTransform on layer: ", l) } @@ -1271,27 +1271,28 @@ SCTransform.StdAssay <- function( features = all_features, cells = all_cells ) + sparse <- DelayedArray::is_sparse(x = counts) ## Sample cells cells.grid <- DelayedArray::colAutoGrid(x = counts, ncol = min(ncells, ncol(counts))) # if there is no reference model we randomly select a subset of cells # TODO: randomize this set of cells variable.feature.list <- list() - GetSCT.Chunked <- function(vp, reference.SCT.model = NULL, do.correct.umi = TRUE){ - sparse <- DelayedArray::is_sparse(x = counts) + # counts here is global block <- DelayedArray::read_block(x = counts, viewport = vp, as.sparse = sparse) - - counts <- as(object = block, Class = 'dgCMatrix') - cell.attr.object <- cell.attr[colnames(x = counts),, drop=FALSE] - - if (!identical(rownames(cell.attr.object), colnames(counts))) { - # print(length(setdiff(rownames(cell.attr.object), colnames(counts)))) - # print(length(setdiff(colnames(counts),rownames(cell.attr.object)))) + counts.chunk <- as(object = block, Class = 'dgCMatrix') + cell.attr.object <- cell.attr[colnames(x = counts.chunk),, drop=FALSE] + + if (!identical(rownames(cell.attr.object), colnames(counts.chunk))) { + print(length(setdiff(rownames(cell.attr.object), colnames(counts.chunk)))) + print(length(setdiff(colnames(counts.chunk),rownames(cell.attr.object)))) + print(rownames(cell.attr.object)[1:5]) + print(colnames(counts.chunk)[1:5]) stop("cell attribute row names must match column names of count matrix") } - vst.out <- SCTransform(object = counts, + vst.out <- SCTransform(object = counts.chunk, cell.attr = cell.attr.object, reference.SCT.model = reference.SCT.model, do.correct.umi = do.correct.umi, @@ -1306,7 +1307,7 @@ SCTransform.StdAssay <- function( conserve.memory = conserve.memory, return.only.var.genes = return.only.var.genes, seed.use = seed.use, - verbose = verbose, + verbose = FALSE, ...) residual.type <- vst.out[['residual_type']] %||% 'pearson' @@ -1321,7 +1322,7 @@ SCTransform.StdAssay <- function( } else { # TODO: restore once check.matrix is in SeuratObject # assay.out <- CreateAssayObject(counts = umi, check.matrix = FALSE) - assay.out <- CreateAssayObject(counts = counts) + assay.out <- CreateAssayObject(counts = counts.chunk) } # set the variable genes VariableFeatures(object = assay.out) <- vst.out$variable_features @@ -1353,6 +1354,9 @@ SCTransform.StdAssay <- function( # No reference model so just select the some block of cells set.seed(seed = seed.use) selected.block <- sample(x = seq.int(from = 1, to = length(cells.grid)), size = 1) + if (verbose){ + message("Using block", selected.block, " from ", dataset.names[[dataset.index]], " to learn model.") + } vp <- cells.grid[[selected.block]] assay.out <- GetSCT.Chunked(vp = vp, do.correct.umi = FALSE) local.reference.SCT.model <- assay.out@SCTModel.list[[1]] @@ -1378,10 +1382,14 @@ SCTransform.StdAssay <- function( corrected_counts[[1]] <- GetAssayData(object = assay.out, slot="data") residuals[[1]] <- GetAssayData(object = assay.out, slot="scale.data") cell_attrs[[1]] <- vst_out.reference$cell_attr + sct.assay.list[[dataset.names[dataset.index]]] <- assay.out } else { + # iterate over chunks to get residuals for (i in seq_len(length.out = length(x = cells.grid))) { vp <- cells.grid[[i]] - print(paste0("chunk ", i)) + if (verbose){ + message("Getting residuals for block ", i, "(of ", length(cells.grid), ") for ", dataset.names[[dataset.index]], " dataset") + } block <- DelayedArray::read_block(x = counts, viewport = vp, as.sparse = TRUE) @@ -1395,19 +1403,31 @@ SCTransform.StdAssay <- function( ) rownames(cell_attr) <- colnames(counts.vp) vst_out$cell_attr <- cell_attr - - new_residual <- get_residuals( - vst_out = vst_out, - umi = counts.vp[all_features,], - residual_type = "pearson", - min_variance = min_var, - res_clip_range = res_clip_range, - verbosity = as.numeric(x = verbose) * 2 - ) + vst_out$gene_attr <- vst_out$gene_attr[variable.features,] + if (return.only.var.genes){ + new_residual <- get_residuals( + vst_out = vst_out, + umi = counts.vp[variable.features,], + residual_type = "pearson", + min_variance = min_var, + res_clip_range = res_clip_range, + verbosity = FALSE#as.numeric(x = verbose) * 2 + ) + } else { + new_residual <- get_residuals( + vst_out = vst_out, + umi = counts.vp[all.features,], + residual_type = "pearson", + min_variance = min_var, + res_clip_range = res_clip_range, + verbosity = FALSE#as.numeric(x = verbose) * 2 + ) + } + vst_out$y <- new_residual corrected_counts[[i]] <- correct_counts( x = vst_out, umi = counts.vp[all_features,], - verbosity = as.numeric(x = verbose) * 2 + verbosity = FALSE# as.numeric(x = verbose) * 2 ) residuals[[i]] <- new_residual cell_attrs[[i]] <- cell_attr @@ -1438,14 +1458,24 @@ SCTransform.StdAssay <- function( min.cells.to.block = 3000, verbose = verbose ) - merged.assay <- CreateSCTAssayObject(scale.data = new.residuals, data = corrected_counts, SCTModel.list = SCTModel.list) - VariableFeatures(merged.assay) <- variable.features + assay.out <- CreateSCTAssayObject(scale.data = new.residuals, data = corrected_counts, SCTModel.list = SCTModel.list) + VariableFeatures(assay.out) <- variable.features + # one assay per dataset + if (verbose){ + message("Finished calculating residuals for ", dataset.names[dataset.index]) + } + sct.assay.list[[dataset.names[dataset.index]]] <- assay.out + variable.feature.list[[dataset.names[dataset.index]]] <- VariableFeatures(assay.out) } - - } else { + } else { ### With reference model sct.assay.list.temp <- list() for (i in seq_len(length.out = length(x = cells.grid))) { vp <- cells.grid[[i]] + + if (verbose){ + message("Getting residuals for block ", i, "(of ", length(cells.grid), ") for ", dataset.names[[dataset.index]], " dataset") + } + assay.out <- GetSCT.Chunked(vp = vp, reference.SCT.model = reference.SCT.model, do.correct.umi = do.correct.umi) @@ -1461,7 +1491,7 @@ SCTransform.StdAssay <- function( } else { assay.out <- sct.assay.list.temp[[1]] } - + ## DoScaling scale.data <- GetAssayData(object = assay.out, slot = "scale.data") # scale data here as do.center and do.scale are set to FALSE inside scale.data <- ScaleData( @@ -1479,22 +1509,26 @@ SCTransform.StdAssay <- function( verbose = verbose ) assay.out <- SetAssayData(object = assay.out, slot = "scale.data", new.data = scale.data) - sct.assay.list[[dataset.names[i]]] <- assay.out - # Return array by merging everythin - if (length(x = sct.assay.list) > 1){ - merged.assay <- merge(x = sct.assay.list[[1]], y = sct.assay.list[2:length(sct.assay.list)]) - # set variable features as the union of the features - variable.features <- Reduce(f = union, x = variable.feature.list) - VariableFeatures(object = merged.assay) <- variable.features - # set the names of SCTmodels to be layer names - models <- slot(object = merged.assay, name="SCTModel.list") - names(models) <- names(x = sct.assay.list) - slot(object = merged.assay, name="SCTModel.list") <- models - } else { - merged.assay <- sct.assay.list[[1]] + if (verbose){ + message("Finished calculating residuals for ", dataset.names[dataset.index]) } + sct.assay.list[[dataset.names[dataset.index]]] <- assay.out + variable.feature.list[[dataset.names[dataset.index]]] <- rownames(assay.out) } } + # Return array by merging everythin + if (length(x = sct.assay.list) > 1){ + merged.assay <- merge(x = sct.assay.list[[1]], y = sct.assay.list[2:length(sct.assay.list)]) + # set variable features as the union of the features + variable.features <- Reduce(f = union, x = variable.feature.list) + VariableFeatures(object = merged.assay) <- variable.features + # set the names of SCTmodels to be layer names + models <- slot(object = merged.assay, name="SCTModel.list") + names(models) <- names(x = sct.assay.list) + slot(object = merged.assay, name="SCTModel.list") <- models + } else { + merged.assay <- sct.assay.list[[1]] + } gc(verbose = FALSE) return(merged.assay) } @@ -1553,6 +1587,9 @@ FetchResiduals <- function(object, stop(assay, " assay was not generated by SCTransform") } sct.models <- levels(x = object[[assay]]) + if (length(sct.models)==1){ + sct.models <- list(sct.models) + } if (length(x = sct.models) == 0) { warning("SCT model not present in assay", call. = FALSE, immediate. = TRUE) return(object) @@ -1642,10 +1679,10 @@ FetchResiduals <- function(object, if (length(x = new.residuals) == 1 & is.list(x = new.residuals)) { new.residuals <- new.residuals[[1]] } else { - #new.residuals <- Reduce(cbind, new.residuals) - new.residuals <- matrix(data = unlist(new.residuals), nrow = nrow(new.scale) , ncol = ncol(new.scale)) - colnames(new.residuals) <- colnames(new.scale) - rownames(new.residuals) <- rownames(new.scale) + new.residuals <- Reduce(cbind, new.residuals) + #new.residuals <- matrix(data = unlist(new.residuals), nrow = nrow(new.scale) , ncol = ncol(new.scale)) + #colnames(new.residuals) <- colnames(new.scale) + #rownames(new.residuals) <- rownames(new.scale) } new.scale[rownames(x = new.residuals), colnames(x = new.residuals)] <- new.residuals @@ -1653,7 +1690,7 @@ FetchResiduals <- function(object, new.scale <- new.scale[!rowAnyNAs(x = new.scale), ] } - return(new.scale) + return(new.scale[features,]) } From c83fbba477b90b7adc42b067d4e0198c62c638e9 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 15 Sep 2022 20:50:49 -0400 Subject: [PATCH 211/979] fix cell.attr --- R/integration.R | 55 ++++++++++++++++++++++++++--------------------- R/preprocessing.R | 2 +- 2 files changed, 31 insertions(+), 26 deletions(-) diff --git a/R/integration.R b/R/integration.R index 8ba2e8532..6fc6309e9 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5789,33 +5789,38 @@ ProjectCellEmbeddings_DelayedAssay <- function( features <- intersect(rownames(query.data), rownames(reference[[reduction]]@feature.loadings)) query.data <- query.data[features,] - - feature.mean <- feature.mean[features] %||% - RowMeanSparse(mat = LayerData(object = reference[[assay]], layer = 'data')[features,]) + if (IsSCT(object[[assay]])) { +# TODO: SCT reiduals projection + + } else { + feature.mean <- feature.mean[features] %||% + RowMeanSparse(mat = LayerData(object = reference[[assay]], layer = 'data')[features,]) + + feature.sd <- feature.sd[features] %||% + sqrt(RowVarSparse(mat = LayerData(object = reference[[assay]], layer = 'data')[features,])) + feature.sd <- MinMax(feature.sd, max = max(feature.sd), min = 0.1) - feature.sd <- feature.sd[features] %||% - sqrt(RowVarSparse(mat = LayerData(object = reference[[assay]], layer = 'data')[features,])) - feature.sd <- MinMax(feature.sd, max = max(feature.sd), min = 0.1) + suppressMessages(setAutoBlockSize(size = block.size)) + cells.grid <- DelayedArray::colAutoGrid(x = query.data) + emb.list <- list() + for (i in seq_len(length.out = length(x = cells.grid))) { + vp <- cells.grid[[i]] + data.block <- DelayedArray::read_block(x = query.data, + viewport = vp, + as.sparse = TRUE) + data.block <- apply(data.block, MARGIN = 2, function(x) { + x <- (x - feature.mean)/feature.sd + return(x) + }) + emb.block <- t(reference[[reduction]]@feature.loadings[features,dims]) %*% data.block + emb.list[[i]] <- emb.block + } + # list to matrix, column has to be cells + emb.mat <- t(matrix(data = unlist(emb.list), nrow = length(dims) , ncol = ncol(query.data))) + rownames(emb.mat) <- colnames(query.data) + colnames(emb.mat) <- colnames(reference[[reduction]]@cell.embeddings)[dims] + } - suppressMessages(setAutoBlockSize(size = block.size)) - cells.grid <- DelayedArray::colAutoGrid(x = query.data) - emb.list <- list() - for (i in seq_len(length.out = length(x = cells.grid))) { - vp <- cells.grid[[i]] - data.block <- DelayedArray::read_block(x = query.data, - viewport = vp, - as.sparse = TRUE) - data.block <- apply(data.block, MARGIN = 2, function(x) { - x <- (x - feature.mean)/feature.sd - return(x) - }) - emb.block <- t(reference[[reduction]]@feature.loadings[features,dims]) %*% data.block - emb.list[[i]] <- emb.block - } - # list to matrix, column has to be cells - emb.mat <- t(matrix(data = unlist(emb.list), nrow = length(dims) , ncol = ncol(query.data))) - rownames(emb.mat) <- colnames(query.data) - colnames(emb.mat) <- colnames(reference[[reduction]]@cell.embeddings)[dims] return(emb.mat) } diff --git a/R/preprocessing.R b/R/preprocessing.R index ba37747a7..f1336349f 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -1876,7 +1876,7 @@ SCTransform.Seurat <- function( if (verbose){ message("Running SCTransform on assay: ", assay) } - cell.attr <- slot(object = object, name = 'meta.data') + cell.attr <- slot(object = object, name = 'meta.data')[colnames(object[[assay]]),] assay.data <- SCTransform(object = object[[assay]], cell.attr = cell.attr, From d6eeb4fcb55aeaed2faad4c2257bcf0cc1101a64 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 15 Sep 2022 21:28:55 -0400 Subject: [PATCH 212/979] fix cells name SCT --- R/preprocessing5.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 1942217f4..874e6dad1 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1345,7 +1345,6 @@ SCTransform.StdAssay <- function( assay.out <- as(object = assay.out, Class = "SCTAssay") #TODO: Add a key to prevent hitting a bug in merge.StdAssay which # does not like character(0) keys being merged - assay.out@key <- "sct" return (assay.out) } local.reference.SCT.model <- NULL @@ -1662,9 +1661,9 @@ FetchResidualSCTModel <- function(object, # use reference SCT model sct.method <- "reference" } + scale.data.cells <- colnames(x = GetAssayData(object = object, assay = assay, slot = "scale.data")) - - scale.data.cells.common <- intersect(colnames(x = scale.data.cells), layer.cells) + scale.data.cells.common <- intersect( scale.data.cells, layer.cells) scale.data.cells <- intersect(x = scale.data.cells, y = scale.data.cells.common) if (length(x = setdiff(x = layer.cells, y = scale.data.cells)) == 0) { From bc425c17e70c1d1151de6838d420046dfcd93dd3 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 15 Sep 2022 22:52:36 -0400 Subject: [PATCH 213/979] fix std variable gene --- R/preprocessing5.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 1942217f4..830c8370a 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -145,7 +145,7 @@ FindVariableFeatures.StdAssay <- function( sep = '_' ) rownames(x = hvf.info) <- Features(x = object, layer = layer[i]) - object[[colnames(x = hvf.info)]] <- hvf.info + object[colnames(x = hvf.info)] <- hvf.info } return(object) } From e47f52d8f76e7e607b5c6260e540bc5e7308e0ae Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Fri, 16 Sep 2022 15:20:07 -0400 Subject: [PATCH 214/979] Fix FetchResidualSCTmodel --- R/preprocessing5.R | 59 +++++++++++++++++++++++++++++++--------------- 1 file changed, 40 insertions(+), 19 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index c52365581..ece856fd4 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1737,16 +1737,16 @@ FetchResidualSCTModel <- function(object, model.cells <- Cells(x = slot(object = object[[assay]], name = "SCTModel.list")[[SCTModel]]) sct.method <- SCTResults(object = object[[assay]], slot = "arguments", model = SCTModel)$sct.method %||% "default" + + layer.cells <- layer.cells %||% Cells(x = object[[umi.assay]], layer = layer) if (!is.null(reference.SCT.model)) { # use reference SCT model sct.method <- "reference" } scale.data.cells <- colnames(x = GetAssayData(object = object, assay = assay, slot = "scale.data")) - - scale.data.cells.common <- intersect(colnames(x = scale.data.cells), layer.cells) + scale.data.cells.common <- intersect(scale.data.cells, layer.cells) scale.data.cells <- intersect(x = scale.data.cells, y = scale.data.cells.common) - if (length(x = setdiff(x = layer.cells, y = scale.data.cells)) == 0) { existing.scale.data <- suppressWarnings(GetAssayData(object = object, assay = assay, slot = "scale.data")) full.scale.data <- matrix(data = NA, nrow = nrow(x = existing.scale.data), ncol = length(x = layer.cells), dimnames = list(rownames(x = existing.scale.data), layer.cells)) @@ -1764,6 +1764,16 @@ FetchResidualSCTModel <- function(object, } else { features_to_compute <- setdiff(x = new_features, y = existing_features) } + scale.data.cells <- colnames(x = GetAssayData(object = object, assay = assay, slot = "scale.data")) + if (length(x = setdiff(x = model.cells, y = scale.data.cells)) == 0) { + existing_features <- names(x = which(x = ! apply( + X = GetAssayData(object = object, assay = assay, slot = "scale.data")[, model.cells], + MARGIN = 1, + FUN = anyNA) + )) + } else { + existing_features <- character() + } if (sct.method == "reference.model") { if (verbose) { message("sct.model ", SCTModel, " is from reference, so no residuals will be recalculated") @@ -1781,7 +1791,8 @@ FetchResidualSCTModel <- function(object, diff_features <- setdiff(x = features_to_compute, y = model.features) intersect_features <- intersect(x = features_to_compute, y = model.features) if (sct.method == "reference") { - vst_out <- reference.SCT.model + vst_out <- SCTModel_to_vst(SCTModel = reference.SCT.model) + # override clip.range clip.range <- vst_out$arguments$sct.clip.range umi.field <- paste0("nCount_", assay) @@ -1795,6 +1806,7 @@ FetchResidualSCTModel <- function(object, vst_out$model_pars_fit <- vst_out$model_pars_fit[all.features, , drop = FALSE] } else { vst_out <- SCTModel_to_vst(SCTModel = slot(object = object[[assay]], name = "SCTModel.list")[[SCTModel]]) + clip.range <- vst_out$arguments$sct.clip.range } clip.max <- max(clip.range) clip.min <- min(clip.range) @@ -1834,37 +1846,46 @@ FetchResidualSCTModel <- function(object, umi = colSums(umi.all), log_umi = log10(x = colSums(umi.all)) ) - # cell_attr <- as.matrix(x = cell_attr) - rownames(cell_attr) <- colnames(umi) - if (sct.method == "reference") { - vst_out$cell_attr <- cell_attr[colnames(umi), ] + rownames(cell_attr) <- colnames(umi.all) + if (sct.method %in% c("reference.model", "reference")) { + vst_out$cell_attr <- cell_attr[colnames(umi.all), ,drop=FALSE] } else { cell_attr_existing <- vst_out$cell_attr cells_missing <- setdiff(rownames(cell_attr), rownames(cell_attr_existing)) - vst_out$cell_attr <- rbind(cell_attr_existing, cell_attr[cells_missing, ]) - vst_out$cell_attr <- vst_out$cell_attr[colnames(umi), ] + vst_out$cell_attr <- rbind(cell_attr_existing, cell_attr[cells_missing, , drop=FALSE]) + vst_out$cell_attr <- vst_out$cell_attr[colnames(umi), , drop=FALSE] } if (verbose) { - if (sct.method == "reference") { + if (sct.method == "reference.model") { message("using reference sct model") } else { message("sct.model: ", SCTModel, " on ", ncol(x = umi), " cells: ", colnames(x = umi.all)[1], " .. ", colnames(x = umi.all)[ncol(umi.all)]) } } + if (vst_out$arguments$min_variance == "umi_median"){ min_var <- min_var_custom } else { min_var <- vst_out$arguments$min_variance } - new_residual <- get_residuals( - vst_out = vst_out, - umi = umi, - residual_type = "pearson", - min_variance = min_var, - res_clip_range = c(clip.min, clip.max), - verbosity = as.numeric(x = verbose) * 2 - ) + if (nrow(umi)>0){ + new_residual <- get_residuals( + vst_out = vst_out, + umi = umi, + residual_type = "pearson", + min_variance = min_var, + res_clip_range = c(clip.min, clip.max), + verbosity = as.numeric(x = verbose) * 2 + ) + } else { + return(matrix( + data = NA, + nrow = length(x = features_to_compute), + ncol = length(x = colnames(umi.all)), + dimnames = list(features_to_compute, colnames(umi.all)) + )) + } new_residual <- as.matrix(x = new_residual) new_residuals[[i]] <- new_residual } From e6a7734aab20a477d9cbbb63c476c14c2c550330 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Fri, 16 Sep 2022 15:25:02 -0400 Subject: [PATCH 215/979] Fix FetchResidualSCTmodel --- R/preprocessing5.R | 59 +++++++++++++++++++++++++++++++--------------- 1 file changed, 40 insertions(+), 19 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 565b82aa0..9f4c88575 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1656,16 +1656,16 @@ FetchResidualSCTModel <- function(object, model.cells <- Cells(x = slot(object = object[[assay]], name = "SCTModel.list")[[SCTModel]]) sct.method <- SCTResults(object = object[[assay]], slot = "arguments", model = SCTModel)$sct.method %||% "default" + + layer.cells <- layer.cells %||% Cells(x = object[[umi.assay]], layer = layer) if (!is.null(reference.SCT.model)) { # use reference SCT model sct.method <- "reference" } - scale.data.cells <- colnames(x = GetAssayData(object = object, assay = assay, slot = "scale.data")) - scale.data.cells.common <- intersect( scale.data.cells, layer.cells) + scale.data.cells.common <- intersect(scale.data.cells, layer.cells) scale.data.cells <- intersect(x = scale.data.cells, y = scale.data.cells.common) - if (length(x = setdiff(x = layer.cells, y = scale.data.cells)) == 0) { existing.scale.data <- suppressWarnings(GetAssayData(object = object, assay = assay, slot = "scale.data")) full.scale.data <- matrix(data = NA, nrow = nrow(x = existing.scale.data), ncol = length(x = layer.cells), dimnames = list(rownames(x = existing.scale.data), layer.cells)) @@ -1683,6 +1683,16 @@ FetchResidualSCTModel <- function(object, } else { features_to_compute <- setdiff(x = new_features, y = existing_features) } + scale.data.cells <- colnames(x = GetAssayData(object = object, assay = assay, slot = "scale.data")) + if (length(x = setdiff(x = model.cells, y = scale.data.cells)) == 0) { + existing_features <- names(x = which(x = ! apply( + X = GetAssayData(object = object, assay = assay, slot = "scale.data")[, model.cells], + MARGIN = 1, + FUN = anyNA) + )) + } else { + existing_features <- character() + } if (sct.method == "reference.model") { if (verbose) { message("sct.model ", SCTModel, " is from reference, so no residuals will be recalculated") @@ -1700,7 +1710,8 @@ FetchResidualSCTModel <- function(object, diff_features <- setdiff(x = features_to_compute, y = model.features) intersect_features <- intersect(x = features_to_compute, y = model.features) if (sct.method == "reference") { - vst_out <- reference.SCT.model + vst_out <- SCTModel_to_vst(SCTModel = reference.SCT.model) + # override clip.range clip.range <- vst_out$arguments$sct.clip.range umi.field <- paste0("nCount_", assay) @@ -1714,6 +1725,7 @@ FetchResidualSCTModel <- function(object, vst_out$model_pars_fit <- vst_out$model_pars_fit[all.features, , drop = FALSE] } else { vst_out <- SCTModel_to_vst(SCTModel = slot(object = object[[assay]], name = "SCTModel.list")[[SCTModel]]) + clip.range <- vst_out$arguments$sct.clip.range } clip.max <- max(clip.range) clip.min <- min(clip.range) @@ -1753,37 +1765,46 @@ FetchResidualSCTModel <- function(object, umi = colSums(umi.all), log_umi = log10(x = colSums(umi.all)) ) - # cell_attr <- as.matrix(x = cell_attr) - rownames(cell_attr) <- colnames(umi) - if (sct.method == "reference") { - vst_out$cell_attr <- cell_attr[colnames(umi), ] + rownames(cell_attr) <- colnames(umi.all) + if (sct.method %in% c("reference.model", "reference")) { + vst_out$cell_attr <- cell_attr[colnames(umi.all), ,drop=FALSE] } else { cell_attr_existing <- vst_out$cell_attr cells_missing <- setdiff(rownames(cell_attr), rownames(cell_attr_existing)) - vst_out$cell_attr <- rbind(cell_attr_existing, cell_attr[cells_missing, ]) - vst_out$cell_attr <- vst_out$cell_attr[colnames(umi), ] + vst_out$cell_attr <- rbind(cell_attr_existing, cell_attr[cells_missing, , drop=FALSE]) + vst_out$cell_attr <- vst_out$cell_attr[colnames(umi), , drop=FALSE] } if (verbose) { - if (sct.method == "reference") { + if (sct.method == "reference.model") { message("using reference sct model") } else { message("sct.model: ", SCTModel, " on ", ncol(x = umi), " cells: ", colnames(x = umi.all)[1], " .. ", colnames(x = umi.all)[ncol(umi.all)]) } } + if (vst_out$arguments$min_variance == "umi_median"){ min_var <- min_var_custom } else { min_var <- vst_out$arguments$min_variance } - new_residual <- get_residuals( - vst_out = vst_out, - umi = umi, - residual_type = "pearson", - min_variance = min_var, - res_clip_range = c(clip.min, clip.max), - verbosity = as.numeric(x = verbose) * 2 - ) + if (nrow(umi)>0){ + new_residual <- get_residuals( + vst_out = vst_out, + umi = umi, + residual_type = "pearson", + min_variance = min_var, + res_clip_range = c(clip.min, clip.max), + verbosity = as.numeric(x = verbose) * 2 + ) + } else { + return(matrix( + data = NA, + nrow = length(x = features_to_compute), + ncol = length(x = colnames(umi.all)), + dimnames = list(features_to_compute, colnames(umi.all)) + )) + } new_residual <- as.matrix(x = new_residual) new_residuals[[i]] <- new_residual } From 05b4e74eea7a401c0d924adebd76c2d885bd64b0 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Mon, 19 Sep 2022 12:30:48 -0400 Subject: [PATCH 216/979] Check for umi assay default --- R/objects.R | 3 ++- R/preprocessing.R | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/R/objects.R b/R/objects.R index 80b8e9d86..69754738b 100644 --- a/R/objects.R +++ b/R/objects.R @@ -1937,7 +1937,8 @@ merge.SCTAssay <- function( if (inherits(x = assays[[assay]], what = "SCTAssay")) { parent.environ <- sys.frame(which = parent.call[1]) seurat.object <- parent.environ$objects[[assay]] - seurat.object <- suppressWarnings(expr = GetResidual(object = seurat.object, features = all.features, assay = parent.environ$assay, verbose = FALSE)) + seurat.object <- suppressWarnings(expr = GetResidual(object = seurat.object, features = all.features, + assay = parent.environ$assay, verbose = FALSE)) return(seurat.object[[parent.environ$assay]]) } return(assays[[assay]]) diff --git a/R/preprocessing.R b/R/preprocessing.R index f1336349f..79e933308 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -412,7 +412,7 @@ GetResidual <- function( "This SCTAssay contains multiple SCT models. Computing residuals for cells using different models" ) } - if (class(x = object[[umi.assay]]) == "Assay"){ + if ((!umi.assay %in% Assays(object = object)) || class(x = object[[umi.assay]])[1] == "Assay"){ new.residuals <- lapply( X = sct.models, FUN = function(x) { @@ -427,7 +427,7 @@ GetResidual <- function( ) } ) - } else if (class(x = object[[umi.assay]]) == "Assay5"){ + } else if (class(x = object[[umi.assay]])[1] == "Assay5"){ new.residuals <- lapply( X = sct.models, FUN = function(x) { From 7c20b4b66ca5ad418cf073b40d331f6973728426 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Tue, 4 Oct 2022 16:35:45 -0400 Subject: [PATCH 217/979] Add support for TileDBMatrices for log-normalization --- DESCRIPTION | 4 ++-- NAMESPACE | 1 + R/preprocessing5.R | 54 ++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 55 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f55f72ec9..837b4a5d2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.0.4.9009 -Date: 2022-09-12 +Version: 4.0.4.9010 +Date: 2022-10-04 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( diff --git a/NAMESPACE b/NAMESPACE index b151647ec..45dd7e870 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -56,6 +56,7 @@ S3method(LogNormalize,DelayedMatrix) S3method(LogNormalize,H5ADMatrix) S3method(LogNormalize,HDF5Matrix) S3method(LogNormalize,SparseArraySeed) +S3method(LogNormalize,TileDBMatrix) S3method(LogNormalize,V3Matrix) S3method(LogNormalize,data.frame) S3method(LogNormalize,default) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 9af502b06..0e13584fb 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -317,7 +317,8 @@ LogNormalize.HDF5Matrix <- function( ... ) { check_installed(pkg = 'HDF5Array', reason = 'for working with HDF5 matrices') - fpath <- slot(object = slot(object = data, name = 'seed'), name = 'filepath') + # fpath <- slot(object = slot(object = data, name = 'seed'), name = 'filepath') + fpath <- DelayedArray::path(object = data) if (.DelayedH5DExists(object = data, path = layer)) { rhdf5::h5delete(file = fpath, name = layer) dpath <- file.path( @@ -352,7 +353,7 @@ LogNormalize.SparseArraySeed <- function( scale.factor = 1e4, margin = 2L, return.seed = TRUE, - verbose= TRUE, + verbose = TRUE, ... ) { check_installed( @@ -372,6 +373,55 @@ LogNormalize.SparseArraySeed <- function( return(data) } +#' @method LogNormalize TileDBMatrix +#' @export +#' +LogNormalize.TileDBMatrix <- function( + data, + scale.factor = 1e4, + margin = 2L, + return.seed = TRUE, + verbose= TRUE, + layer = 'data', + ... +) { + check_installed( + pkg = 'TileDBArray', + reason = 'for working with TileDB matrices' + ) + odir <- c( + dirname(path = DelayedArray::path(object = data)), + getwd(), + tempdir(check = TRUE) + ) + # file.access returns 0 (FALSE) for true and -1 (TRUE) for false + idx <- which(x = !file.access(names = odir, mode = 2L))[1L] + if (rlang::is_na(x = odir)) { + abort(message = "Unable to find a directory to write normalized TileDB matrix") + } + out <- file.path(odir[idx], layer) + if (!file.access(names = out, mode = 0L)) { + warn(message = paste(sQuote(x = out), "exists, overwriting")) + unlink(x = out, recursive = TRUE, force = TRUE) + } + sink <- TileDBArray::TileDBRealizationSink( + dim = dim(x = data), + dimnames = dimnames(x = data), + type = BiocGenerics::type(x = data), + path = out, + attr = layer, + sparse = DelayedArray::is_sparse(x = data) + ) + return(LogNormalize.DelayedMatrix( + data = data, + scale.factor = scale.factor, + margin = margin, + verbose = verbose, + sink = sink, + ... + )) +} + #' @importFrom SeuratObject IsSparse #' #' @method NormalizeData default From ca3ca97c2d093579203c7bcefb082f15c17884e1 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 4 Oct 2022 22:57:54 -0400 Subject: [PATCH 218/979] add delayedArray coutsketch --- R/integration.R | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/R/integration.R b/R/integration.R index 6fc6309e9..e8e73d343 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5959,3 +5959,26 @@ crossprod_DelayedAssay <- function(x, y, block.size = 1e9) { return(product.mat) } + + +CountSketch_DelayedAssay <- function(object, block.size = 1e9, nsketch = 5000L, seed = 123) { + sparse <- DelayedArray::is_sparse(x = object) + suppressMessages(setAutoBlockSize(size = block.size)) + cells.grid <- DelayedArray::colAutoGrid(x = object) + sa.list <- list() + for (i in seq_len(length.out = length(x = cells.grid))) { + vp <- cells.grid[[i]] + block <- DelayedArray::read_block(x = object, viewport = vp, as.sparse = sparse) + + if (sparse) { + block <- as(object = block, Class = 'dgCMatrix') + } else { + block <- as(object = block, Class = 'Matrix') + } + ncells.block <- ncol(block) + S.block <- CountSketch(nsketch = nsketch, ncells = ncells.block, seed = seed) + sa.list[[i]] <- as.matrix(S.block %*% t(block)) + } + SA.mat <- Reduce("+", sa.list) + return(SA.mat) +} From a644f251bde7f0a29302a9a6681dc4ad70fa9490 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 5 Oct 2022 11:03:45 -0400 Subject: [PATCH 219/979] product norm --- R/integration.R | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/R/integration.R b/R/integration.R index e8e73d343..85669e13d 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5965,7 +5965,7 @@ CountSketch_DelayedAssay <- function(object, block.size = 1e9, nsketch = 5000L, sparse <- DelayedArray::is_sparse(x = object) suppressMessages(setAutoBlockSize(size = block.size)) cells.grid <- DelayedArray::colAutoGrid(x = object) - sa.list <- list() + SA.mat <- matrix(data = 0, nrow = nsketch, ncol = nrow(object)) for (i in seq_len(length.out = length(x = cells.grid))) { vp <- cells.grid[[i]] block <- DelayedArray::read_block(x = object, viewport = vp, as.sparse = sparse) @@ -5977,8 +5977,35 @@ CountSketch_DelayedAssay <- function(object, block.size = 1e9, nsketch = 5000L, } ncells.block <- ncol(block) S.block <- CountSketch(nsketch = nsketch, ncells = ncells.block, seed = seed) - sa.list[[i]] <- as.matrix(S.block %*% t(block)) + SA.mat <- SA.mat + (S.block %*% t(block)) } - SA.mat <- Reduce("+", sa.list) return(SA.mat) } + +crossprodNorm_DelayedAssay <- function(x, y, block.size = 1e9) { + # perform t(x) %*% y in blocks for y + if (!inherits(x = y, 'DelayedMatrix')) { + stop('y should a DelayedMatrix') + } + if (nrow(x) != nrow(y)) { + stop('row of x and y should be the same') + } + sparse <- DelayedArray::is_sparse(x = y) + suppressMessages(setAutoBlockSize(size = block.size)) + cells.grid <- DelayedArray::colAutoGrid(x = y) + norm.list <- list() + for (i in seq_len(length.out = length(x = cells.grid))) { + vp <- cells.grid[[i]] + block <- DelayedArray::read_block(x = y, viewport = vp, as.sparse = sparse) + if (sparse) { + block <- as(object = block, Class = 'dgCMatrix') + } else { + block <- as(object = block, Class = 'Matrix') + } + norm.list[[i]] <- colSums(x = as.matrix(t(x) %*% block) ^ 2) + } + norm.vector <- unlist(norm.list) + return(norm.vector) + +} + From 11f201d5a5fa677b4a49a8341f01e19f18be8bf9 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 5 Oct 2022 11:50:07 -0400 Subject: [PATCH 220/979] modify count sketch --- R/integration.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/integration.R b/R/integration.R index 85669e13d..9a4013243 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5977,7 +5977,7 @@ CountSketch_DelayedAssay <- function(object, block.size = 1e9, nsketch = 5000L, } ncells.block <- ncol(block) S.block <- CountSketch(nsketch = nsketch, ncells = ncells.block, seed = seed) - SA.mat <- SA.mat + (S.block %*% t(block)) + SA.mat <- SA.mat + as.matrix(S.block %*% t(block)) } return(SA.mat) } From a4d2b49d2154b1482b9294fedf54ecc044c2dc06 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 5 Oct 2022 12:12:01 -0400 Subject: [PATCH 221/979] fix bug CountSketch --- R/sketching.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/sketching.R b/R/sketching.R index 102410ce9..3af43955e 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -441,7 +441,8 @@ CountSketch <- function(nsketch, ncells, seed = NA_integer_, ...) { return(sparseMatrix( i = iv, j = jv, - x = xv + x = xv, + dims = c(nsketch, ncells) )) } From 39f4d3247b0df405e6351358db0c3ce4bf0f78d3 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 7 Oct 2022 16:27:53 -0400 Subject: [PATCH 222/979] add leverage score delayarray --- NAMESPACE | 2 +- R/integration.R | 23 ------- R/preprocessing5.R | 6 +- R/sketching.R | 156 ++++++++++++++++++++++++++------------------- 4 files changed, 95 insertions(+), 92 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 082149092..3319bc3f1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -47,9 +47,9 @@ S3method(GetTissueCoordinates,VisiumV1) S3method(HVFInfo,SCTAssay) S3method(IntegrateEmbeddings,IntegrationAnchorSet) S3method(IntegrateEmbeddings,TransferAnchorSet) +S3method(LeverageScore,Assay) S3method(LeverageScore,DelayedMatrix) S3method(LeverageScore,Seurat) -S3method(LeverageScore,Seurat5) S3method(LeverageScore,StdAssay) S3method(LeverageScore,default) S3method(LogNormalize,DelayedMatrix) diff --git a/R/integration.R b/R/integration.R index 8d86aa4df..165c2fad9 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5922,29 +5922,6 @@ crossprod_DelayedAssay <- function(x, y, block.size = 1e9) { return(product.mat) } - - -CountSketch_DelayedAssay <- function(object, block.size = 1e9, nsketch = 5000L, seed = 123) { - sparse <- DelayedArray::is_sparse(x = object) - suppressMessages(setAutoBlockSize(size = block.size)) - cells.grid <- DelayedArray::colAutoGrid(x = object) - SA.mat <- matrix(data = 0, nrow = nsketch, ncol = nrow(object)) - for (i in seq_len(length.out = length(x = cells.grid))) { - vp <- cells.grid[[i]] - block <- DelayedArray::read_block(x = object, viewport = vp, as.sparse = sparse) - - if (sparse) { - block <- as(object = block, Class = 'dgCMatrix') - } else { - block <- as(object = block, Class = 'Matrix') - } - ncells.block <- ncol(block) - S.block <- CountSketch(nsketch = nsketch, ncells = ncells.block, seed = seed) - SA.mat <- SA.mat + as.matrix(S.block %*% t(block)) - } - return(SA.mat) -} - crossprodNorm_DelayedAssay <- function(x, y, block.size = 1e9) { # perform t(x) %*% y in blocks for y if (!inherits(x = y, 'DelayedMatrix')) { diff --git a/R/preprocessing5.R b/R/preprocessing5.R index f6f1802ce..69157da88 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -47,12 +47,14 @@ FindVariableFeatures.default <- function( call. = FALSE ) } - return(method( + var.gene.ouput <- method( data = object, nselect = nselect, verbose = verbose, ... - )) + ) + rownames(x = var.gene.ouput) <- rownames(x = object) + return(var.gene.ouput) } g <- function(x, method = VST) { diff --git a/R/sketching.R b/R/sketching.R index 3af43955e..004cc694c 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -150,7 +150,7 @@ LeverageScoreSampling <- function( # Methods for Seurat-defined generics #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -#' @importFrom Matrix qrR +#' @importFrom Matrix qrR t #' #' @method LeverageScore default #' @export @@ -224,6 +224,7 @@ LeverageScore.default <- function( return(rowSums(x = Z ^ 2)) } +#' @importFrom Matrix qrR t #' @method LeverageScore DelayedMatrix #' @export #' @@ -234,6 +235,7 @@ LeverageScore.DelayedMatrix <- function( method = CountSketch, eps = 0.5, seed = 123L, + block.size = 1e8, verbose = TRUE, ... ) { @@ -244,42 +246,50 @@ LeverageScore.DelayedMatrix <- function( if (!is_quosure(x = method)) { method <- enquo(arg = method) } - grid <- DelayedArray::colAutoGrid(x = object) - scores <- vector(mode = 'numeric', length = ncol(x = object)) - if (isTRUE(x = verbose)) { - pb <- txtProgressBar(style = 3L, file = stderr()) + sa <- SketchMatrixProd(object = object, + block.size = block.size, + nsketch = nsketch, + method = method, + ...) + qr.sa <- base::qr(x = sa) + R <- if (inherits(x = qr.sa, what = 'sparseQR')) { + qrR(qr = qr.sa) + } else { + base::qr.R(qr = qr.sa) } - for (i in length(x = grid)) { - vp <- grid[[i]] - idx <- seq.int( - from = IRanges::start(x = slot(object = vp, name = 'ranges')[2L]), - to = IRanges::end(x = slot(object = vp, name = 'ranges')[2L]) - ) - x <- as.sparse(x = DelayedArray::read_block( - x = object, - viewport = vp, - as.sparse = FALSE - )) - scores[idx] <- LeverageScore( - object = x, - nsketch = nsketch, - ndims = ndims, - method = method, - eps = 0.5, - seed = seed, - verbose = FALSE - # ... - ) - if (isTRUE(x = verbose)) { - setTxtProgressBar(pb = pb, value = i / length(x = grid)) - } + if (length(x = which(x = diag(x = R) == 0))> 0) { + warning("not all features are variable features") + var.index <- which(x = diag(x = R) != 0) + R <- R[var.index, var.index] } - if (isTRUE(x = verbose)) { - close(con = pb) + R.inv <- as.sparse(x = backsolve(r = R, x = diag(x = ncol(x = R)))) + JL <- as.sparse(x = JLEmbed( + nrow = ncol(x = R.inv), + ncol = ndims, + eps = eps, + seed = seed + )) + RP.mat <- R.inv %*% JL + sparse <- DelayedArray::is_sparse(x = object) + suppressMessages(setAutoBlockSize(size = block.size)) + cells.grid <- DelayedArray::colAutoGrid(x = object) + norm.list <- list() + for (i in seq_len(length.out = length(x = cells.grid))) { + vp <- cells.grid[[i]] + block <- DelayedArray::read_block(x = object, viewport = vp, as.sparse = sparse) + if (sparse) { + block <- as(object = block, Class = 'dgCMatrix') + } else { + block <- as(object = block, Class = 'Matrix') + } + norm.list[[i]] <- colSums(x = as.matrix(t(RP.mat) %*% block[1:ncol(R),]) ^ 2) } + scores <- unlist(norm.list) return(scores) } + + #' @method LeverageScore StdAssay #' @export #' @@ -328,6 +338,14 @@ LeverageScore.StdAssay <- function( return(scores) } + +#' @method LeverageScore Assay +#' @export +#' +LeverageScore.Assay <- LeverageScore.StdAssay + + + #' @method LeverageScore Seurat #' @export #' @@ -364,41 +382,6 @@ LeverageScore.Seurat <- function( return(object) } -#' @method LeverageScore Seurat5 -#' @export -#' -LeverageScore.Seurat5 <- function( - object, - assay = NULL, - features = NULL, - nsketch = 5000L, - ndims = NULL, - method = CountSketch, - layer = 'data', - eps = 0.5, - seed = 123L, - verbose = TRUE, - ... -) { - assay <- assay[1L] %||% DefaultAssay(object = object) - assay <- match.arg(arg = assay, choices = Assays(object = object)) - method <- enquo(arg = method) - scores <- LeverageScore( - object = object[[assay]], - features = features, - nsketch = nsketch, - ndims = ndims, - method = method, - layer = layer, - eps = eps, - seed = seed, - verbose = verbose, - ... - ) - names(x = scores) <- paste0("seurat_", names(x = scores)) - object[[]] <- scores - return(object) -} #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Methods for R-defined generics @@ -513,6 +496,47 @@ JLEmbed <- function(nrow, ncol, eps = 0.1, seed = NA_integer_, method = "li") { return(m) } + + +SketchMatrixProd <- function( + object, + method = CountSketch, + block.size = 1e9, + nsketch = 5000L, + seed = 123L, + ...) { + + if (is_quosure(x = method)) { + method <- eval( + expr = quo_get_expr(quo = method), + envir = quo_get_env(quo = method) + ) + } + if (is.character(x = method)) { + method <- match.fun(FUN = method) + } + stopifnot(is.function(x = method)) + sparse <- DelayedArray::is_sparse(x = object) + suppressMessages(setAutoBlockSize(size = block.size)) + cells.grid <- DelayedArray::colAutoGrid(x = object) + SA.mat <- matrix(data = 0, nrow = nsketch, ncol = nrow(object)) + for (i in seq_len(length.out = length(x = cells.grid))) { + vp <- cells.grid[[i]] + block <- DelayedArray::read_block(x = object, viewport = vp, as.sparse = sparse) + + if (sparse) { + block <- as(object = block, Class = 'dgCMatrix') + } else { + block <- as(object = block, Class = 'Matrix') + } + ncells.block <- ncol(block) + S.block <- method(nsketch = nsketch, ncells = ncells.block, seed = seed, ...) + SA.mat <- SA.mat + as.matrix(S.block %*% t(block)) + } + return(SA.mat) +} + + #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # S4 Methods #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% From 22965f4aca73e4b7c42a8b03f9b4b21741836408 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Fri, 7 Oct 2022 16:56:40 -0400 Subject: [PATCH 223/979] Minor fixes --- R/sketching.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/sketching.R b/R/sketching.R index 102410ce9..26141280e 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -249,11 +249,11 @@ LeverageScore.DelayedMatrix <- function( if (isTRUE(x = verbose)) { pb <- txtProgressBar(style = 3L, file = stderr()) } - for (i in length(x = grid)) { + for (i in seq_len(length.out = length(x = grid))) { vp <- grid[[i]] idx <- seq.int( - from = IRanges::start(x = slot(object = vp, name = 'ranges')[2L]), - to = IRanges::end(x = slot(object = vp, name = 'ranges')[2L]) + from = IRanges::start(x = vp)[2L], + to = IRanges::end(x = vp)[2L] ) x <- as.sparse(x = DelayedArray::read_block( x = object, From b881f24a44edb317f991b83a6b41f9e1ced15700 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Fri, 7 Oct 2022 16:56:44 -0400 Subject: [PATCH 224/979] Add LogNormalize.TENxRealizationMatrix Add checks for issues with DelayedArray --- NAMESPACE | 2 +- R/preprocessing5.R | 81 ++++++++++++++++++++++++++-------------------- 2 files changed, 47 insertions(+), 36 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 45dd7e870..78a696eae 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -55,7 +55,7 @@ S3method(LeverageScore,default) S3method(LogNormalize,DelayedMatrix) S3method(LogNormalize,H5ADMatrix) S3method(LogNormalize,HDF5Matrix) -S3method(LogNormalize,SparseArraySeed) +S3method(LogNormalize,TENxMatrix) S3method(LogNormalize,TileDBMatrix) S3method(LogNormalize,V3Matrix) S3method(LogNormalize,data.frame) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 0e13584fb..a034395f7 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -245,6 +245,10 @@ LogNormalize.DelayedMatrix <- function( } if (!inherits(x = sink, what = 'RealizationSink')) { abort(message = "'sink' must be a RealizationSink") + } else if (inherits(x = sink, what = 'arrayRealizationSink')) { + # arrayRealizationSinks do not support write_block with rowAutoGrid or colAutoGrid + # Because of course they don't + abort(message = "Array RealizationSinks are not supported due to issues with {DelayedArray}") } else if (!all(dim(x = sink) == dim(x = data))) { abort(message = "'sink' must be the same size as 'data'") } @@ -263,6 +267,9 @@ LogNormalize.DelayedMatrix <- function( for (i in seq_len(length.out = length(x = grid))) { vp <- grid[[i]] x <- DelayedArray::read_block(x = data, viewport = vp, as.sparse = sparse) + if (isTRUE(x = sparse)) { + x <- as(object = x, Class = 'dgCMatrix') + } x <- LogNormalize( data = x, scale.factor = scale.factor, @@ -317,7 +324,6 @@ LogNormalize.HDF5Matrix <- function( ... ) { check_installed(pkg = 'HDF5Array', reason = 'for working with HDF5 matrices') - # fpath <- slot(object = slot(object = data, name = 'seed'), name = 'filepath') fpath <- DelayedArray::path(object = data) if (.DelayedH5DExists(object = data, path = layer)) { rhdf5::h5delete(file = fpath, name = layer) @@ -345,34 +351,6 @@ LogNormalize.HDF5Matrix <- function( )) } -#' @method LogNormalize SparseArraySeed -#' @export -#' -LogNormalize.SparseArraySeed <- function( - data, - scale.factor = 1e4, - margin = 2L, - return.seed = TRUE, - verbose = TRUE, - ... -) { - check_installed( - pkg = 'DelayedArray', - reason = 'for working with SparseArraySeeds' - ) - data <- LogNormalize( - data = as(object = data, Class = 'CsparseMatrix'), - scale.factor = scale.factor, - margin = margin, - verbose = verbose, - ... - ) - if (!isFALSE(x = return.seed)) { - data <- as(object = data, Class = 'SparseArraySeed') - } - return(data) -} - #' @method LogNormalize TileDBMatrix #' @export #' @@ -380,7 +358,6 @@ LogNormalize.TileDBMatrix <- function( data, scale.factor = 1e4, margin = 2L, - return.seed = TRUE, verbose= TRUE, layer = 'data', ... @@ -396,7 +373,7 @@ LogNormalize.TileDBMatrix <- function( ) # file.access returns 0 (FALSE) for true and -1 (TRUE) for false idx <- which(x = !file.access(names = odir, mode = 2L))[1L] - if (rlang::is_na(x = odir)) { + if (rlang::is_na(x = idx)) { abort(message = "Unable to find a directory to write normalized TileDB matrix") } out <- file.path(odir[idx], layer) @@ -412,8 +389,42 @@ LogNormalize.TileDBMatrix <- function( attr = layer, sparse = DelayedArray::is_sparse(x = data) ) - return(LogNormalize.DelayedMatrix( - data = data, + return(NextMethod( + generic = 'LogNormalize', + object = data, + scale.factor = scale.factor, + margin = margin, + verbose = verbose, + sink = sink, + ... + )) +} + +#' @method LogNormalize TENxMatrix +#' @export +#' +LogNormalize.TENxMatrix <- function( + data, + scale.factor = 1e4, + margin = 2L, + verbose= TRUE, + layer = 'data', + ... +) { + check_installed(pkg = 'HDF5Array', reason = 'for working with HDF5 matrices') + fpath <- DelayedArray::path(object = data) + if (.DelayedH5DExists(object = data, path = layer)) { + rhdf5::h5delete(file = fpath, name = layer) + } + sink <- HDF5Array::TENxRealizationSink( + dim = dim(x = data), + dimnames = dimnames(x = data), + filepath = fpath, + group = layer + ) + return(NextMethod( + generic = 'LogNormalize', + object = data, scale.factor = scale.factor, margin = margin, verbose = verbose, @@ -464,11 +475,11 @@ NormalizeData.default <- function( .DelayedH5DExists <- function(object, path) { check_installed(pkg = 'HDF5Array', reason = 'for working with HDF5 files') - if (!inherits(x = object, what = c('HDF5Array', 'H5ADMatrix'))) { + if (!inherits(x = object, what = c('HDF5Array', 'H5ADMatrix', 'TENxMatrix'))) { abort(message = "'object' must be an HDF5Array or H5ADMatrix") } on.exit(expr = rhdf5::h5closeAll(), add = TRUE) - fpath <- slot(object = slot(object = object, name = 'seed'), name = 'filepath') + fpath <- DelayedArray::path(object = object) h5loc <- rhdf5::H5Fopen( name = fpath, flags = 'H5F_ACC_RDWR', From e1844b5d1dcfb6edaa8c66659c5e878e044d3f26 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 21 Oct 2022 15:46:26 -0400 Subject: [PATCH 225/979] fix all.equal --- R/integration.R | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/R/integration.R b/R/integration.R index 7e42c68d2..7809b0afe 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5458,7 +5458,9 @@ ValidateParams_IntegrateEmbeddings_IntegrationAnchors <- function( for (i in 1:nobs) { if (!isTRUE(all.equal( target = Cells(x = weight.reduction[[i]]), - current = Cells(x = object.list[[i]]))) + current = Cells(x = object.list[[i]]), + check.attributes = FALSE + )) ) { stop("Cell names in the provided weight.reduction ", i, " don't ", "match with the cell names in object ", i, ".", call. = FALSE) @@ -5506,7 +5508,7 @@ ValidateParams_IntegrateEmbeddings_TransferAnchors <- function( } reference.cells <- slot(object = anchorset, name = "reference.cells") reference.cells <- gsub(pattern = "_reference", replacement = "", x = reference.cells) - if (!isTRUE(x = all.equal(target = reference.cells, current = Cells(x = reference)))) { + if (!isTRUE(x = all.equal(target = reference.cells, current = Cells(x = reference), check.attributes = FALSE))) { stop("The set of cells used as a reference in the AnchorSet does not match ", "the set of cells provided in the reference object.") } @@ -5596,7 +5598,8 @@ ValidateParams_IntegrateEmbeddings_TransferAnchors <- function( weight.reduction <- RenameCells(object = weight.reduction, new.names = paste0(Cells(x = weight.reduction), "_query")) if (!isTRUE(all.equal( target = Cells(x = weight.reduction), - current = Cells(x = query) + current = Cells(x = query), + check.attributes = FALSE ))) { stop("Cell names in the provided weight.reduction don't ", "match with the cell names in the query object.", call. = FALSE) From a83a54789b74f549f91e439c7f902020852ae55d Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Fri, 21 Oct 2022 16:13:39 -0400 Subject: [PATCH 226/979] Minor fixes --- R/preprocessing5.R | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index a034395f7..bcf542cc2 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -21,7 +21,6 @@ hvf.methods <- list() # Methods for Seurat-defined generics #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -#' @importFrom rlang is_quosure quo_get_env quo_get_expr #' @method FindVariableFeatures default #' @export #' @@ -60,7 +59,6 @@ g <- function(x, method = VST) { FindVariableFeatures(object = x, method = method, layer = 'counts') } -#' @importFrom rlang as_name enquo is_quosure #' @importFrom SeuratObject DefaultLayer Features Key Layers #' #' @method FindVariableFeatures StdAssay @@ -145,7 +143,7 @@ FindVariableFeatures.StdAssay <- function( sep = '_' ) rownames(x = hvf.info) <- Features(x = object, layer = layer[i]) - object[[colnames(x = hvf.info)]] <- hvf.info + object[colnames(x = hvf.info)] <- hvf.info } return(object) } @@ -528,7 +526,7 @@ NormalizeData.StdAssay <- function( method = 'LogNormalize', scale.factor = 1e4, margin = 1L, - layer = NULL, + layer = NULL, # TODO: set to counts save = 'data', default = TRUE, verbose = TRUE, @@ -536,7 +534,7 @@ NormalizeData.StdAssay <- function( ) { olayer <- layer <- unique(x = layer) %||% DefaultLayer(object = object) layer <- Layers(object = object, search = layer) - if (save == DefaultLayer(object = object)) { + if (save %in% olayer) { default <- FALSE } if (length(x = save) != length(x = layer)) { From a638bd5a6f0d88932dffbdceb0a6fbe33beb0205 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Fri, 21 Oct 2022 16:14:37 -0400 Subject: [PATCH 227/979] Start adding IntegrateLayers --- DESCRIPTION | 3 +- NAMESPACE | 3 + R/integration5.R | 176 +++++++++++++++++++++++++++++++++++++++++++++++ R/zzz.R | 12 +++- 4 files changed, 192 insertions(+), 2 deletions(-) create mode 100644 R/integration5.R diff --git a/DESCRIPTION b/DESCRIPTION index 837b4a5d2..73346c99b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -90,11 +90,12 @@ Collate: 'differential_expression.R' 'dimensional_reduction.R' 'integration.R' + 'zzz.R' + 'integration5.R' 'mixscape.R' 'objects.R' 'preprocessing.R' 'preprocessing5.R' - 'zzz.R' 'sketching.R' 'tree.R' 'utilities.R' diff --git a/NAMESPACE b/NAMESPACE index 78a696eae..8500d3cde 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -47,6 +47,8 @@ S3method(GetTissueCoordinates,VisiumV1) S3method(HVFInfo,SCTAssay) S3method(IntegrateEmbeddings,IntegrationAnchorSet) S3method(IntegrateEmbeddings,TransferAnchorSet) +S3method(IntegrateLayers,Seurat) +S3method(IntegrateLayers,StdAssay) S3method(LeverageScore,DelayedMatrix) S3method(LeverageScore,Seurat) S3method(LeverageScore,Seurat5) @@ -244,6 +246,7 @@ export(Index) export(Indices) export(IntegrateData) export(IntegrateEmbeddings) +export(IntegrateLayers) export(IntegrateSketchEmbeddings) export(Intensity) export(IsGlobal) diff --git a/R/integration5.R b/R/integration5.R new file mode 100644 index 000000000..a4daccdb1 --- /dev/null +++ b/R/integration5.R @@ -0,0 +1,176 @@ +#' @include zzz.R +#' @include generics.R +#' +NULL + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Generics +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#' @export +#' +IntegrateLayers <- function(object, ...) { + UseMethod(generic = 'IntegrateLayers', object = object) +} + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Functions +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#' @export +#' +HarmonyIntegration <- function( + object, + features, + assay, + layers = NULL, + npcs = 50L, + key = 'harmony_', + theta = NULL, + lambda = NULL, + sigma = 0.1, + nclust = NULL, + tau = 0, + block.size = 0.05, + max.iter.harmony = 10L, + max.iter.cluster = 20L, + epsilon.cluster = 1e-05, + epsilon.harmony = 1e-04, + project.dim = TRUE, + verbose = TRUE, + ... +) { + check_installed( + pkg = "harmony", + reason = "for running integration with Harmony" + ) + if (!inherits(x = object, what = 'StdAssay')) { + abort(message = "'object' must be a v5 assay object") + } + layers <- Layers(object = object, search = layers) + object <- ScaleData( + object = object, + features = features, + layer = layers, + verbose = verbose + ) + pca <- RunPCA(object = object, npcs = npcs, verbose = verbose) + cmap <- slot(object = object, name = 'cells')[, layers] + md <- as.data.frame(x = labels( + object = cmap, + values = Cells(x = object, layer = 'scale.data'), + select = 'first' + )) + names(x = md) <- 'layer' + md <- md[Cells(x = object, layer = 'scale.data'), , drop = FALSE] + harmony.embed <- harmony::HarmonyMatrix( + data_mat = Embeddings(object = pca), + meta_data = md, + vars_use = 'layer', + do_pca = FALSE, + npcs = 0L, + theta = theta, + lambda = lambda, + sigma = sigma, + nclust = nclust, + tau = tau, + block.size = block.size, + max.iter.harmony = max.iter.harmony, + max.iter.cluster = max.iter.cluster, + epsilon.cluster = epsilon.cluster, + epsilon.harmony = epsilon.harmony, + return_object = FALSE, + verbose = verbose + ) + rownames(x = harmony.embed) <- Cells(x = pca) + dr <- suppressWarnings(expr = CreateDimReducObject( + embeddings = harmony.embed, + key = key, + assay = assay + )) + if (isTRUE(x = project.dim)) { + warn("projection") + } + return(list(harmony = dr)) +} + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Methods for Seurat-defined generics +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#' @method IntegrateLayers StdAssay +#' @export +#' +IntegrateLayers.StdAssay <- function( + object, + method, + assay, + features, # TODO: allow selectintegrationfeatures to run on v5 assays + layers = NULL, + group.by = NULL, + ... +) { + if (is_quosure(x = method)) { + method <- eval( + expr = quo_get_expr(quo = method), + envir = quo_get_env(quo = method) + ) + } + if (is.character(x = method)) { + method <- get(x = method) + } + if (!is.function(x = method)) { + abort(message = "'method' must be a function for integrating layers") + } + layers <- Layers(object = object, search = layers) + if (!is.null(x = group.by)) { + object <- split(x = object, f = group.by, drop = TRUE) + } + return(method(object = object, features = features, assay = assay, ...)) +} + +#' @method IntegrateLayers Seurat +#' @export +#' +IntegrateLayers.Seurat <- function( + object, + method, + group.by = NULL, + assay = NULL, + features = NULL, + layers = NULL, + ... +) { + method <- enquo(arg = method) + assay <- assay %||% DefaultAssay(object = object) + if (!inherits(x = object[[assay]], what = 'StdAssay')) { + abort(message = "'assay' must be a v5 assay") + } + features <- SelectIntegrationFeatures5(object = object, assay = assay) + group.by <- if (is.null(x = group.by)) { + group.by + } else if (rlang::is_na(x = group.by)) { + Idents(object = object) + } else if (is_scalar_character(x = group.by) && group.by %in% names(x = object[[]])) { + object[[group.by, drop = TRUE]] + } else { + abort(message = "'group.by' must be the name of a column in cell-level meta data") + } + value <- IntegrateLayers( + object = object[[assay]], + method = method, + assay = assay, + features = features, + layers = layers, + group.by = group.by, + ... + ) + for (i in names(x = value)) { + object[[i]] <- value[[i]] + } + return(object) +} + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Internal +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/R/zzz.R b/R/zzz.R index 73443ce83..14f702d5c 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,5 +1,15 @@ #' @importFrom methods slot -#' @importFrom rlang abort arg_match caller_env check_installed inform warn +#' @importFrom rlang abort +#' arg_match +#' as_name +#' caller_env +#' check_installed +#' enquo +#' inform +#' is_quosure +#' quo_get_env +#' quo_get_expr +#' warn #' NULL From 456e93357183d9207053ab95c35309e8a9b2b071 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sat, 22 Oct 2022 00:58:05 -0400 Subject: [PATCH 228/979] fix uwot num_precomputed_nns --- R/dimensional_reduction.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R index 581f35427..1211f1774 100644 --- a/R/dimensional_reduction.R +++ b/R/dimensional_reduction.R @@ -1385,6 +1385,9 @@ RunUMAP.default <- function( call. = FALSE ) } + if (!"num_precomputed_nns" %in% names(x = model)) { + model$num_precomputed_nns <- 0 + } if (is.list(x = object)) { if (ncol(object$idx) != model$n_neighbors) { warning("Number of neighbors between query and reference ", From 973df584146cdc64963a0b3ff2f11bc0923e5a78 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Mon, 24 Oct 2022 14:32:12 -0400 Subject: [PATCH 229/979] Revamp IntegrateLayers --- DESCRIPTION | 1 + R/integration5.R | 151 +++++++++++++++++++++++++++++++------- R/roxygen.R | 62 ++++++++++++++++ man/HarmonyIntegration.Rd | 32 ++++++++ man/IntegrateLayers.Rd | 53 +++++++++++++ 5 files changed, 271 insertions(+), 28 deletions(-) create mode 100644 R/roxygen.R create mode 100644 man/HarmonyIntegration.Rd create mode 100644 man/IntegrateLayers.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 73346c99b..cb81833a6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -96,6 +96,7 @@ Collate: 'objects.R' 'preprocessing.R' 'preprocessing5.R' + 'roxygen.R' 'sketching.R' 'tree.R' 'utilities.R' diff --git a/R/integration5.R b/R/integration5.R index a4daccdb1..d772f7fd1 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -7,22 +7,122 @@ NULL # Generics #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#' Integrate Layers +#' +#' @param object A \code{\link[SeuratObject]{Seurat}} object +#' @param method Integration method function; can choose from: +#' \Sexpr[stage=render,results=rd]{Seurat::.rd_methods("integration")} +#' @param ... Arguments passed on to \code{method} +#' +#' @return \code{object} with integration data added to it +#' +#' @section Integration Methods Functions: +#' Integration method functions can be written by anyone to implement any +#' integration method in Seurat. These methods should expect to take a +#' \link[SeuratObject:Assay5]{v5 assay} as input and return a named list of +#' objects that can be added back to a \code{Seurat} object (eg. a +#' \link[SeuratObject:DimReduc]{dimensional reduction} or cell-level meta data) +#' +#' Every integration method function should expect the following arguments: +#' \itemize{ +#' \item \dQuote{\code{object}}: an \code{\link[SeuratObject]{Assay5}} object +#' \item \dQuote{\code{assay}}: name of \code{object} in the original +#' \code{\link[SeuratObject]{Seurat}} object +#' \item \dQuote{\code{layers}}: names of normalized layers in \code{object} +#' \item \dQuote{\code{scale.layer}}: name(s) of scaled layer(s) in +#' \code{object} +#' \item \dQuote{\code{features}}: a vector of features for integration +#' \item \dQuote{\code{groups}}: a one-column data frame with the groups for +#' each cell in \code{object}; the column name will be \dQuote{group} +#' } +#' #' @export #' -IntegrateLayers <- function(object, ...) { - UseMethod(generic = 'IntegrateLayers', object = object) +IntegrateLayers <- function( + object, + method, + group.by = NULL, + assay = NULL, + features = NULL, + layers = NULL, + scale.layer = 'scale.data', + ... +) { + # Get the integration method + if (is_quosure(x = method)) { + method <- eval( + expr = quo_get_expr(quo = method), + envir = quo_get_env(quo = method) + ) + } + if (is.character(x = method)) { + method <- get(x = method) + } + if (!is.function(x = method)) { + abort(message = "'method' must be a function for integrating layers") + } + # Check our assay + assay <- assay %||% DefaultAssay(object = object) + if (!inherits(x = object[[assay]], what = 'StdAssay')) { + abort(message = "'assay' must be a v5 assay") + } + layers <- Layers(object = object, assay = assay, search = layers) + features <- features %||% SelectIntegrationFeatures5(object = object, assay = assay) + scale.layer <- Layers(object = object, search = scale.layer) + features <- intersect( + x = features, + y = Features(x = object[[assay]], layer = scale.layer) + ) + if (!length(x = features)) { + abort(message = "None of the features provided are found in this assay") + } + # Check our groups + groups <- if (is.null(x = group.by) && length(x = layers) > 1L) { + cmap <- slot(object = object[[assay]], name = 'cells')[, layers] + as.data.frame(x = labels( + object = cmap, + values = Cells(x = object[[assay]], layer = scale.layer) + )) + } else if (is_scalar_character(x = group.by) && group.by %in% names(x = object[[]])) { + FetchData( + object = object, + vars = group.by, + cells = colnames(x = object[[assay]]) + ) + } else { + abort(message = "'group.by' must correspond to a column of cell-level meta data") + } + names(x = groups) <- "group" + # Run the integration method + value <- method( + object = object[[assay]], + assay = assay, + layers = layers, + scale.layer = scale.layer, + features = features, + groups = groups, + ... + ) + for (i in names(x = value)) { + object[[i]] <- value[[i]] + } + return(object) } #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Functions #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#' Harmony Integration +#' #' @export #' HarmonyIntegration <- function( object, - features, assay, + groups, + features = NULL, + scale.layer = 'scale.data', layers = NULL, npcs = 50L, key = 'harmony_', @@ -36,7 +136,7 @@ HarmonyIntegration <- function( max.iter.cluster = 20L, epsilon.cluster = 1e-05, epsilon.harmony = 1e-04, - project.dim = TRUE, + # project.dim = TRUE, verbose = TRUE, ... ) { @@ -47,26 +147,21 @@ HarmonyIntegration <- function( if (!inherits(x = object, what = 'StdAssay')) { abort(message = "'object' must be a v5 assay object") } - layers <- Layers(object = object, search = layers) - object <- ScaleData( + # Run joint PCA + features <- features %||% Features(x = object, layer = scale.layer) + pca <- RunPCA( object = object, + assay = assay, features = features, - layer = layers, + layer = scale.layer, + npcs = npcs, verbose = verbose ) - pca <- RunPCA(object = object, npcs = npcs, verbose = verbose) - cmap <- slot(object = object, name = 'cells')[, layers] - md <- as.data.frame(x = labels( - object = cmap, - values = Cells(x = object, layer = 'scale.data'), - select = 'first' - )) - names(x = md) <- 'layer' - md <- md[Cells(x = object, layer = 'scale.data'), , drop = FALSE] + # Run Harmony harmony.embed <- harmony::HarmonyMatrix( data_mat = Embeddings(object = pca), - meta_data = md, - vars_use = 'layer', + meta_data = groups, + vars_use = 'group', do_pca = FALSE, npcs = 0L, theta = theta, @@ -83,23 +178,23 @@ HarmonyIntegration <- function( verbose = verbose ) rownames(x = harmony.embed) <- Cells(x = pca) + # TODO add feature loadings from PCA dr <- suppressWarnings(expr = CreateDimReducObject( embeddings = harmony.embed, key = key, assay = assay )) - if (isTRUE(x = project.dim)) { - warn("projection") - } - return(list(harmony = dr)) + return(list(pca = pca, harmony = dr)) } +attr(x = HarmonyIntegration, which = 'Seurat.method') <- 'integration' + #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Methods for Seurat-defined generics #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -#' @method IntegrateLayers StdAssay -#' @export +# @method IntegrateLayers StdAssay +# @export #' IntegrateLayers.StdAssay <- function( object, @@ -126,11 +221,11 @@ IntegrateLayers.StdAssay <- function( if (!is.null(x = group.by)) { object <- split(x = object, f = group.by, drop = TRUE) } - return(method(object = object, features = features, assay = assay, ...)) + return(method(object = object, features = features, assay = assay, layers = layers, ...)) } -#' @method IntegrateLayers Seurat -#' @export +# @method IntegrateLayers Seurat +# @export #' IntegrateLayers.Seurat <- function( object, @@ -146,7 +241,7 @@ IntegrateLayers.Seurat <- function( if (!inherits(x = object[[assay]], what = 'StdAssay')) { abort(message = "'assay' must be a v5 assay") } - features <- SelectIntegrationFeatures5(object = object, assay = assay) + features <- features %||% SelectIntegrationFeatures5(object = object, assay = assay) group.by <- if (is.null(x = group.by)) { group.by } else if (rlang::is_na(x = group.by)) { diff --git a/R/roxygen.R b/R/roxygen.R new file mode 100644 index 000000000..ab17e5845 --- /dev/null +++ b/R/roxygen.R @@ -0,0 +1,62 @@ +#' @include zzz.R +#' +NULL + +#' @importFrom utils lsf.str +#' +#' @export +#' +.rd_methods <- function(method = 'integration') { + methods <- sapply( + X = grep(pattern = '^package:', x = search(), value = TRUE), + FUN = function(x) { + fxns <- as.character(x = lsf.str(pos = x)) + attrs <- vector(mode = 'logical', length = length(x = fxns)) + for (i in seq_along(along.with = fxns)) { + mthd <- attr(x = get(x = fxns[i], pos = x), which = 'Seurat.method') + attrs[i] <- is_scalar_character(x = mthd) && isTRUE(x = mthd == method) + } + return(fxns[attrs]) + }, + simplify = FALSE, + USE.NAMES = TRUE + ) + methods <- Filter(f = length, x = methods) + names(x = methods) <- gsub( + pattern = '^package:', + replacement = '', + x = names(x = methods) + ) + if (!length(x = methods)) { + return('') + } + ret <- vector( + mode = 'character', + length = sum(vapply( + X = methods, + FUN = length, + FUN.VALUE = integer(length = 1L) + )) + ) + j <- 1L + for (pkg in names(x = methods)) { + for (fxn in methods[[pkg]]) { + ret[j] <- ifelse( + test = pkg == 'Seurat', + yes = paste0('\\item \\code{\\link{', fxn, '}}'), + no = paste0( + '\\item \\code{\\link[', + pkg, + ':', + fxn, + ']{', + pkg, + '::', + fxn, '}}' + ) + ) + j <- j + 1L + } + } + return(paste('\\itemize{', paste0(' ', ret, collapse = '\n'), '}', sep = '\n')) +} diff --git a/man/HarmonyIntegration.Rd b/man/HarmonyIntegration.Rd new file mode 100644 index 000000000..079cfb0a5 --- /dev/null +++ b/man/HarmonyIntegration.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/integration5.R +\name{HarmonyIntegration} +\alias{HarmonyIntegration} +\title{Harmony Integration} +\usage{ +HarmonyIntegration( + object, + assay, + groups, + features = NULL, + scale.layer = "scale.data", + layers = NULL, + npcs = 50L, + key = "harmony_", + theta = NULL, + lambda = NULL, + sigma = 0.1, + nclust = NULL, + tau = 0, + block.size = 0.05, + max.iter.harmony = 10L, + max.iter.cluster = 20L, + epsilon.cluster = 1e-05, + epsilon.harmony = 1e-04, + verbose = TRUE, + ... +) +} +\description{ +Harmony Integration +} diff --git a/man/IntegrateLayers.Rd b/man/IntegrateLayers.Rd new file mode 100644 index 000000000..55640b057 --- /dev/null +++ b/man/IntegrateLayers.Rd @@ -0,0 +1,53 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/integration5.R +\name{IntegrateLayers} +\alias{IntegrateLayers} +\title{Integrate Layers} +\usage{ +IntegrateLayers( + object, + method, + group.by = NULL, + assay = NULL, + features = NULL, + layers = NULL, + scale.layer = "scale.data", + ... +) +} +\arguments{ +\item{object}{A \code{\link[SeuratObject]{Seurat}} object} + +\item{method}{Integration method function; can choose from: +\Sexpr[stage=render,results=rd]{Seurat::.rd_methods("integration")}} + +\item{...}{Arguments passed on to \code{method}} +} +\value{ +\code{object} with integration data added to it +} +\description{ +Integrate Layers +} +\section{Integration Methods Functions}{ + +Integration method functions can be written by anyone to implement any +integration method in Seurat. These methods should expect to take a +\link[SeuratObject:Assay5]{v5 assay} as input and return a named list of +objects that can be added back to a \code{Seurat} object (eg. a +\link[SeuratObject:DimReduc]{dimensional reduction} or cell-level meta data) + +Every integration method function should expect the following arguments: +\itemize{ + \item \dQuote{\code{object}}: an \code{\link[SeuratObject]{Assay5}} object + \item \dQuote{\code{assay}}: name of \code{object} in the original + \code{\link[SeuratObject]{Seurat}} object + \item \dQuote{\code{layers}}: names of normalized layers in \code{object} + \item \dQuote{\code{scale.layer}}: name(s) of scaled layer(s) in + \code{object} + \item \dQuote{\code{features}}: a vector of features for integration + \item \dQuote{\code{groups}}: a one-column data frame with the groups for + each cell in \code{object}; the column name will be \dQuote{group} +} +} + From a4ff80a0b217d49e43f4bb7155764340d82559c1 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Mon, 24 Oct 2022 14:32:28 -0400 Subject: [PATCH 230/979] Allow scaling by layer --- R/preprocessing5.R | 103 +++++++++++++++++++++++++++++++-------------- 1 file changed, 72 insertions(+), 31 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index bcf542cc2..43f5207e2 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -615,6 +615,7 @@ ScaleData.StdAssay <- function( layer = NULL, vars.to.regress = NULL, latent.data = NULL, + by.layer = FALSE, split.by = NULL, model.use = 'linear', use.umi = FALSE, @@ -628,53 +629,93 @@ ScaleData.StdAssay <- function( ... ) { use.umi <- ifelse(test = model.use != 'linear', yes = TRUE, no = use.umi) + olayer <- layer <- unique(x = layer) %||% DefaultLayer(object = object) layer <- Layers(object = object, search = layer) if (isTRUE(x = use.umi)) { - message("'use.umi' is TRUE, please make sure 'layer' specifies raw counts") + inform( + message = "'use.umi' is TRUE, please make sure 'layer' specifies raw counts" + ) } features <- features %||% Reduce( f = union, x = lapply( X = layer, FUN = function(x) { - return(VariableFeatures(object = object, layer = layer)) + return(VariableFeatures(object = object, layer = x)) } ) ) if (!length(x = features)) { features <- Reduce(f = union, x = lapply(X = layer, FUN = Features, x = object)) } - ldata <- if (length(x = layer) > 1L) { - StitchMatrix( - x = LayerData(object = object, layer = layer[1L], features = features), - y = lapply( - X = layer[2:length(x = layer)], - FUN = LayerData, - object = object, - features = features - ), - rowmap = slot(object = object, name = 'features')[features, layer], - colmap = slot(object = object, name = 'cells')[, layer] - ) + if (isTRUE(x = by.layer)) { + if (length(x = save) != length(x = layer)) { + save <- make.unique(names = gsub( + pattern = olayer, + replacement = save, + x = layer + )) + } + for (i in seq_along(along.with = layer)) { + lyr <- layer[i] + if (isTRUE(x = verbose)) { + inform(message = paste("Scaling data for layer", sQuote(x = lyr))) + } + LayerData(object = object, layer = save[i], ...) <- ScaleData( + object = LayerData( + object = object, + layer = lyr, + features = features, + fast = NA + ), + features = features, + vars.to.regress = vars.to.regress, + latent.data = latent.data, + split.by = split.by, + model.use = model.use, + use.umi = use.umi, + do.scale = do.scale, + do.center = do.center, + scale.max = scale.max, + block.size = block.size, + min.cells.to.block = min.cells.to.block, + verbose = verbose, + ... + ) + } } else { - LayerData(object = object, layer = layer, features = features) + ldata <- if (length(x = layer) > 1L) { + StitchMatrix( + x = LayerData(object = object, layer = layer[1L], features = features), + y = lapply( + X = layer[2:length(x = layer)], + FUN = LayerData, + object = object, + features = features + ), + rowmap = slot(object = object, name = 'features')[features, layer], + colmap = slot(object = object, name = 'cells')[, layer] + ) + } else { + LayerData(object = object, layer = layer, features = features) + } + LayerData(object = object, layer = save, features = features) <- ScaleData( + object = ldata, + features = features, + vars.to.regress = vars.to.regress, + latent.data = latent.data, + split.by = split.by, + model.use = model.use, + use.umi = use.umi, + do.scale = do.scale, + do.center = do.center, + scale.max = scale.max, + block.size = block.size, + min.cells.to.block = min.cells.to.block, + verbose = verbose, + ... + ) } - LayerData(object = object, layer = save, features = features) <- ScaleData( - object = ldata, - features = features, - vars.to.regress = vars.to.regress, - latent.data = latent.data, - split.by = split.by, - model.use = model.use, - use.umi = use.umi, - do.scale = do.scale, - do.center = do.center, - scale.max = scale.max, - block.size = block.size, - min.cells.to.block = min.cells.to.block, - verbose = verbose, - ... - ) return(object) } From 4ba613c03095d33c7a816dc5a7fd126dd5a7fc65 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Mon, 24 Oct 2022 14:32:35 -0400 Subject: [PATCH 231/979] Update docs --- NAMESPACE | 5 +++-- man/CreateSCTAssayObject.Rd | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 8500d3cde..7b63c5d3f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -47,8 +47,6 @@ S3method(GetTissueCoordinates,VisiumV1) S3method(HVFInfo,SCTAssay) S3method(IntegrateEmbeddings,IntegrationAnchorSet) S3method(IntegrateEmbeddings,TransferAnchorSet) -S3method(IntegrateLayers,Seurat) -S3method(IntegrateLayers,StdAssay) S3method(LeverageScore,DelayedMatrix) S3method(LeverageScore,Seurat) S3method(LeverageScore,Seurat5) @@ -152,6 +150,7 @@ export("Project<-") export("SCTResults<-") export("Tool<-") export("VariableFeatures<-") +export(.rd_methods) export(AddAzimuthResults) export(AddMetaData) export(AddModuleScore) @@ -236,6 +235,7 @@ export(GroupCorrelationPlot) export(HTODemux) export(HTOHeatmap) export(HVFInfo) +export(HarmonyIntegration) export(HoverLocator) export(IFeaturePlot) export(ISpatialDimPlot) @@ -779,6 +779,7 @@ importFrom(utils,globalVariables) importFrom(utils,head) importFrom(utils,isS3method) importFrom(utils,isS3stdGeneric) +importFrom(utils,lsf.str) importFrom(utils,methods) importFrom(utils,packageVersion) importFrom(utils,read.csv) diff --git a/man/CreateSCTAssayObject.Rd b/man/CreateSCTAssayObject.Rd index c3c1406c8..70f30f633 100644 --- a/man/CreateSCTAssayObject.Rd +++ b/man/CreateSCTAssayObject.Rd @@ -25,10 +25,10 @@ CreateSCTAssayObject( \item{min.cells}{Include features detected in at least this many cells. Will subset the counts matrix as well. To reintroduce excluded features, create a -new object with a lower cutoff.} +new object with a lower cutoff} \item{min.features}{Include cells where at least this many features are -detected.} +detected} \item{SCTModel.list}{list of SCTModels} } From fa62ecbb333f3c4bb18fb24a8b5ef92620393fa9 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Mon, 24 Oct 2022 14:32:57 -0400 Subject: [PATCH 232/979] Bump v5 version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index cb81833a6..16b4c59b0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.0.4.9010 -Date: 2022-10-04 +Version: 4.0.4.9011 +Date: 2022-10-24 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From 8685ed52bb89b3d8fa3c7da04e5a006951329b1d Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 24 Oct 2022 17:42:19 -0400 Subject: [PATCH 233/979] CCA integration --- R/integration.R | 2 ++ R/integration5.R | 64 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) diff --git a/R/integration.R b/R/integration.R index 165c2fad9..1e2f23edd 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1596,6 +1596,8 @@ IntegrateEmbeddings.IntegrationAnchorSet <- function( } slot(object = anchorset, name = "object.list") <- object.list new.reduction.name.safe <- gsub(pattern = "_", replacement = "", x = new.reduction.name) + new.reduction.name.safe <- gsub(pattern = "[.]", replacement = "", x = new.reduction.name.safe) + reference.integrated <- PairwiseIntegrateReference( anchorset = anchorset, new.assay.name = new.reduction.name.safe, diff --git a/R/integration5.R b/R/integration5.R index d772f7fd1..675a2f319 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -189,6 +189,70 @@ HarmonyIntegration <- function( attr(x = HarmonyIntegration, which = 'Seurat.method') <- 'integration' +#' Seurat-CCA Integration +#' +#' @inheritParams FindIntegrationAnchors +#' @export +#' + +CCAIntegration <- function( + object = NULL, + assay = NULL, + layers = NULL, + orig.reduction = 'pca', + new.reduction = 'integrated.dr', + reference = NULL, + anchor.features = NULL, + normalization.method = c("LogNormalize", "SCT"), + dims = 1:30, + k.filter = NA, + scale.data.layer = 'scale.data', + verbose = TRUE, + ...) { + anchor.features <- anchor.features %||% SelectIntegrationFeatures5(object = object) + assay <- assay %||% DefaultAssay(object = object) + layers <- layers %||% Layers(object, search = 'data') + object <- RunPCA(object = object, + assay = assay, + features = anchor.features, + reduction.name = orig.reduction, + reduction.key = paste0(orig.reduction,"_"), + verbose = verbose + ) + + object.list <- list() + for (i in seq_along(along.with = layers)) { + object.list[[i]] <- CreateSeuratObject(counts = object[[assay]][[layers[i]]] ) + object.list[[i]][['RNA']][[scale.data.layer]] <- object[[assay]]$scale.data[,Cells(object.list[[i]])] + object.list[[i]][['RNA']]$counts <- NULL + } + anchor <- FindIntegrationAnchors(object.list = object.list, + anchor.features = anchor.features, + scale = FALSE, + reduction = 'cca', + normalization.method = normalization.method, + dims = dims, + k.filter = k.filter, + reference = reference, + verbose = verbose, + ... + ) + + ## diet Seurat object + ### + + ### + object_merged <- IntegrateEmbeddings(anchorset = anchor, + reductions = object[[orig.reduction]], + new.reduction.name = new.reduction, + verbose = verbose) + object[[new.reduction]] <- object_merged[[new.reduction]] + return(object) +} + + + + #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Methods for Seurat-defined generics #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% From 6df6506b9d38939923988c1c2039ff078bb10682 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 24 Oct 2022 21:58:03 -0400 Subject: [PATCH 234/979] RPCA inte --- R/integration5.R | 67 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 66 insertions(+), 1 deletion(-) diff --git a/R/integration5.R b/R/integration5.R index 675a2f319..85fea0e95 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -199,7 +199,7 @@ CCAIntegration <- function( object = NULL, assay = NULL, layers = NULL, - orig.reduction = 'pca', + orig.reduction = 'pca.rna', new.reduction = 'integrated.dr', reference = NULL, anchor.features = NULL, @@ -330,6 +330,71 @@ IntegrateLayers.Seurat <- function( return(object) } + + +#' Seurat-RPCA Integration +#' +#' @inheritParams FindIntegrationAnchors +#' @export +#' + +RPCAIntegration <- function( + object = NULL, + assay = NULL, + layers = NULL, + orig.reduction = 'pca.rna', + new.reduction = 'integrated.dr', + reference = NULL, + anchor.features = NULL, + normalization.method = c("LogNormalize", "SCT"), + dims = 1:30, + k.filter = NA, + scale.data.layer = 'scale.data', + verbose = TRUE, + ...) { + anchor.features <- anchor.features %||% SelectIntegrationFeatures5(object = object) + assay <- assay %||% DefaultAssay(object = object) + layers <- layers %||% Layers(object, search = 'data') + object <- RunPCA(object = object, + assay = assay, + features = anchor.features, + reduction.name = orig.reduction, + reduction.key = paste0(orig.reduction,"_"), + verbose = verbose + ) + + object.list <- list() + for (i in seq_along(along.with = layers)) { + object.list[[i]] <- CreateSeuratObject(counts = object[[assay]][[layers[i]]] ) + VariableFeatures(object = object.list[[i]]) <- anchor.features + object.list[[i]] <- ScaleData( object.list[[i]], verbose = FALSE) + object.list[[i]] <- RunPCA( object.list[[i]], verbose = FALSE) + object.list[[i]][['RNA']]$counts <- NULL + } + + anchor <- FindIntegrationAnchors(object.list = object.list, + anchor.features = anchor.features, + scale = FALSE, + reduction = 'rpca', + normalization.method = normalization.method, + dims = dims, + k.filter = k.filter, + reference = reference, + verbose = verbose, + ... + ) + ## diet Seurat object + ### + + ### + object_merged <- IntegrateEmbeddings(anchorset = anchor, + reductions = object[[orig.reduction]], + new.reduction.name = new.reduction, + verbose = verbose) + object[[new.reduction]] <- object_merged[[new.reduction]] + return(object) +} + #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Internal #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% From d5f0f613c6b7378a1b026de652b3d3eb6c2af7a6 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 24 Oct 2022 23:24:58 -0400 Subject: [PATCH 235/979] add joint PCA integration --- R/integration.R | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/R/integration.R b/R/integration.R index 1e2f23edd..0c00a16d7 100644 --- a/R/integration.R +++ b/R/integration.R @@ -71,6 +71,7 @@ NULL #' \itemize{ #' \item{cca: Canonical correlation analysis} #' \item{rpca: Reciprocal PCA} +#' \item{jpca: Joint PCA} #' \item{rlsi: Reciprocal LSI} #' } #' @param l2.norm Perform L2 normalization on the CCA cell embeddings after @@ -136,7 +137,7 @@ FindIntegrationAnchors <- function( scale = TRUE, normalization.method = c("LogNormalize", "SCT"), sct.clip.range = NULL, - reduction = c("cca", "rpca", "rlsi"), + reduction = c("cca", "rpca", "jpca", "rlsi"), l2.norm = TRUE, dims = 1:30, k.anchor = 5, @@ -261,7 +262,11 @@ FindIntegrationAnchors <- function( # if using pca or lsi, only need to compute the internal neighborhood structure once # for each dataset internal.neighbors <- list() - if (nn.reduction %in% c("pca", "lsi")) { + if (nn.reduction %in% c("pca", "lsi","jpca")) { + if (nn.reduction == 'jpca') { + nn.reduction <- 'joint.pca' + reduction <- 'joint.pca' + } k.filter <- NA if (verbose) { message("Computing within dataset neighborhoods") @@ -401,10 +406,28 @@ FindIntegrationAnchors <- function( } object.pair }, + 'joint.pca' = { + object.pair <- merge(x = object.1, y = object.2) + reduction.2 <- "joint.pca" + object.pair[['joint.pca']] <- CreateDimReducObject( + embeddings = rbind(Embeddings(object.1[['joint.pca']]), + Embeddings(object.2[['joint.pca']])), + key = 'Joint_', + assay = 'ToIntegrate') + if (l2.norm) { + object.pair <- L2Dim(object = object.pair, + reduction = 'joint.pca', + new.dr = 'joint.pca.l2', + new.key = 'Jl2_' + ) + reduction <- paste0(reduction, ".l2") + reduction.2 <- paste0(reduction.2, ".l2") + } + object.pair + }, stop("Invalid reduction parameter. Please choose either cca, rpca, or rlsi") ) internal.neighbors <- internal.neighbors[c(i, j)] - anchors <- FindAnchors( object.pair = object.pair, assay = c("ToIntegrate", "ToIntegrate"), From 7153d92aeb278983afc41186f0b7f8972435e208 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 24 Oct 2022 23:59:58 -0400 Subject: [PATCH 236/979] JCPA integration --- R/integration5.R | 190 +++++++++++++++++++++++++++++++---------------- 1 file changed, 126 insertions(+), 64 deletions(-) diff --git a/R/integration5.R b/R/integration5.R index 85fea0e95..118a02b39 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -250,7 +250,133 @@ CCAIntegration <- function( return(object) } +#' Seurat-RPCA Integration +#' +#' @inheritParams FindIntegrationAnchors +#' @export +#' +RPCAIntegration <- function( + object = NULL, + assay = NULL, + layers = NULL, + orig.reduction = 'pca.rna', + new.reduction = 'integrated.dr', + reference = NULL, + anchor.features = NULL, + normalization.method = c("LogNormalize", "SCT"), + dims = 1:30, + k.filter = NA, + scale.data.layer = 'scale.data', + verbose = TRUE, + ...) { + anchor.features <- anchor.features %||% SelectIntegrationFeatures5(object = object) + assay <- assay %||% DefaultAssay(object = object) + layers <- layers %||% Layers(object, search = 'data') + object <- RunPCA(object = object, + assay = assay, + features = anchor.features, + reduction.name = orig.reduction, + reduction.key = paste0(orig.reduction,"_"), + verbose = verbose + ) + + object.list <- list() + for (i in seq_along(along.with = layers)) { + object.list[[i]] <- CreateSeuratObject(counts = object[[assay]][[layers[i]]] ) + VariableFeatures(object = object.list[[i]]) <- anchor.features + object.list[[i]] <- ScaleData( object.list[[i]], verbose = FALSE) + object.list[[i]] <- RunPCA( object.list[[i]], verbose = FALSE) + object.list[[i]][['RNA']]$counts <- NULL + } + + anchor <- FindIntegrationAnchors(object.list = object.list, + anchor.features = anchor.features, + scale = FALSE, + reduction = 'rpca', + normalization.method = normalization.method, + dims = dims, + k.filter = k.filter, + reference = reference, + verbose = verbose, + ... + ) + ## diet Seurat object + ### + + ### + object_merged <- IntegrateEmbeddings(anchorset = anchor, + reductions = object[[orig.reduction]], + new.reduction.name = new.reduction, + verbose = verbose) + object[[new.reduction]] <- object_merged[[new.reduction]] + return(object) +} + +#' Seurat-Joint PCA Integration +#' +#' @inheritParams FindIntegrationAnchors +#' @export +#' +JointPCAIntegration <- function( + object = NULL, + assay = NULL, + layers = NULL, + orig.reduction = 'pca.rna', + new.reduction = 'integrated.dr', + reference = NULL, + anchor.features = NULL, + normalization.method = NULL, + dims = 1:30, + k.filter = NA, + verbose = TRUE, + ... +) { + anchor.features <- anchor.features %||% SelectIntegrationFeatures5(object = object) + assay <- assay %||% DefaultAssay(object = object) + layers <- layers %||% Layers(object, search = 'data') + object <- RunPCA(object = object, + assay = assay, + features = anchor.features, + reduction.name = orig.reduction, + reduction.key = paste0(orig.reduction,"_"), + verbose = verbose + ) + + object.list <- list() + for (i in seq_along(along.with = layers)) { + object.list[[i]] <- CreateSeuratObject(counts = object[[assay]][[layers[i]]] ) + object.list[[i]][['RNA']]$counts <- NULL + object.list[[i]][['joint.pca']] <- CreateDimReducObject( + embeddings = Embeddings(object = object[[orig.reduction]])[Cells(object.list[[i]]),], + assay = 'RNA', + key = 'J_' + ) + } + + ## diet Seurat object + ### + + ### + + anchor <- FindIntegrationAnchors(object.list = object.list, + anchor.features = anchor.features, + scale = FALSE, + reduction = 'jpca', + normalization.method = normalization.method, + dims = dims, + k.filter = k.filter, + reference = reference, + verbose = verbose, + ... + ) + object_merged <- IntegrateEmbeddings(anchorset = anchor, + reductions = object[[orig.reduction]], + new.reduction.name = new.reduction, + verbose = verbose) + object[[new.reduction]] <- object_merged[[new.reduction]] + return(object) +} #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -331,70 +457,6 @@ IntegrateLayers.Seurat <- function( } - -#' Seurat-RPCA Integration -#' -#' @inheritParams FindIntegrationAnchors -#' @export -#' - -RPCAIntegration <- function( - object = NULL, - assay = NULL, - layers = NULL, - orig.reduction = 'pca.rna', - new.reduction = 'integrated.dr', - reference = NULL, - anchor.features = NULL, - normalization.method = c("LogNormalize", "SCT"), - dims = 1:30, - k.filter = NA, - scale.data.layer = 'scale.data', - verbose = TRUE, - ...) { - anchor.features <- anchor.features %||% SelectIntegrationFeatures5(object = object) - assay <- assay %||% DefaultAssay(object = object) - layers <- layers %||% Layers(object, search = 'data') - object <- RunPCA(object = object, - assay = assay, - features = anchor.features, - reduction.name = orig.reduction, - reduction.key = paste0(orig.reduction,"_"), - verbose = verbose - ) - - object.list <- list() - for (i in seq_along(along.with = layers)) { - object.list[[i]] <- CreateSeuratObject(counts = object[[assay]][[layers[i]]] ) - VariableFeatures(object = object.list[[i]]) <- anchor.features - object.list[[i]] <- ScaleData( object.list[[i]], verbose = FALSE) - object.list[[i]] <- RunPCA( object.list[[i]], verbose = FALSE) - object.list[[i]][['RNA']]$counts <- NULL - } - - anchor <- FindIntegrationAnchors(object.list = object.list, - anchor.features = anchor.features, - scale = FALSE, - reduction = 'rpca', - normalization.method = normalization.method, - dims = dims, - k.filter = k.filter, - reference = reference, - verbose = verbose, - ... - ) - ## diet Seurat object - ### - - ### - object_merged <- IntegrateEmbeddings(anchorset = anchor, - reductions = object[[orig.reduction]], - new.reduction.name = new.reduction, - verbose = verbose) - object[[new.reduction]] <- object_merged[[new.reduction]] - return(object) -} - #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Internal #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% From b427912b8657ea5539963cd3944a998e8d30dfdd Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 25 Oct 2022 16:06:16 -0400 Subject: [PATCH 237/979] add loadings --- R/integration.R | 2 ++ R/integration5.R | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/R/integration.R b/R/integration.R index 0c00a16d7..a21c8cc37 100644 --- a/R/integration.R +++ b/R/integration.R @@ -412,6 +412,7 @@ FindIntegrationAnchors <- function( object.pair[['joint.pca']] <- CreateDimReducObject( embeddings = rbind(Embeddings(object.1[['joint.pca']]), Embeddings(object.2[['joint.pca']])), + loadings = Loadings(object.1[['joint.pca']]), key = 'Joint_', assay = 'ToIntegrate') if (l2.norm) { @@ -3779,6 +3780,7 @@ FindAnchors <- function( max.features = max.features, projected = projected ) + if(length(top.features) == 2){ top.features <- intersect(top.features[[1]], top.features[[2]]) } else{ diff --git a/R/integration5.R b/R/integration5.R index 118a02b39..e46297aa6 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -342,7 +342,6 @@ JointPCAIntegration <- function( reduction.key = paste0(orig.reduction,"_"), verbose = verbose ) - object.list <- list() for (i in seq_along(along.with = layers)) { object.list[[i]] <- CreateSeuratObject(counts = object[[assay]][[layers[i]]] ) @@ -350,6 +349,7 @@ JointPCAIntegration <- function( object.list[[i]][['joint.pca']] <- CreateDimReducObject( embeddings = Embeddings(object = object[[orig.reduction]])[Cells(object.list[[i]]),], assay = 'RNA', + loadings = Loadings(object[[orig.reduction]]), key = 'J_' ) } From 132150f395effe32d6799660ac565f2859c59506 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 25 Oct 2022 17:32:25 -0400 Subject: [PATCH 238/979] fix JointInte --- R/integration5.R | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/R/integration5.R b/R/integration5.R index e46297aa6..734ce0a36 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -184,7 +184,7 @@ HarmonyIntegration <- function( key = key, assay = assay )) - return(list(pca = pca, harmony = dr)) + return(list('pca' = pca, 'harmony' = dr)) } attr(x = HarmonyIntegration, which = 'Seurat.method') <- 'integration' @@ -325,42 +325,46 @@ JointPCAIntegration <- function( orig.reduction = 'pca.rna', new.reduction = 'integrated.dr', reference = NULL, - anchor.features = NULL, + features = NULL, normalization.method = NULL, dims = 1:30, + npcs = 50, k.filter = NA, + scale.layer = 'scale.data', verbose = TRUE, + groups = NULL, ... ) { - anchor.features <- anchor.features %||% SelectIntegrationFeatures5(object = object) - assay <- assay %||% DefaultAssay(object = object) + features <- features %||% SelectIntegrationFeatures5(object = object) + assay <- assay %||% 'RNA' layers <- layers %||% Layers(object, search = 'data') - object <- RunPCA(object = object, - assay = assay, - features = anchor.features, - reduction.name = orig.reduction, - reduction.key = paste0(orig.reduction,"_"), - verbose = verbose + npcs <- max(npcs, dims) + pca <- RunPCA( + object = object, + assay = assay, + features = features, + layer = scale.layer, + npcs = npcs, + verbose = verbose ) object.list <- list() for (i in seq_along(along.with = layers)) { - object.list[[i]] <- CreateSeuratObject(counts = object[[assay]][[layers[i]]] ) + object.list[[i]] <- CreateSeuratObject(counts = object[[layers[i]]] ) object.list[[i]][['RNA']]$counts <- NULL object.list[[i]][['joint.pca']] <- CreateDimReducObject( - embeddings = Embeddings(object = object[[orig.reduction]])[Cells(object.list[[i]]),], + embeddings = Embeddings(object = pca)[Cells(object.list[[i]]),], assay = 'RNA', - loadings = Loadings(object[[orig.reduction]]), + loadings = Loadings(pca), key = 'J_' ) } - ## diet Seurat object ### ### anchor <- FindIntegrationAnchors(object.list = object.list, - anchor.features = anchor.features, + anchor.features = features, scale = FALSE, reduction = 'jpca', normalization.method = normalization.method, @@ -371,11 +375,12 @@ JointPCAIntegration <- function( ... ) object_merged <- IntegrateEmbeddings(anchorset = anchor, - reductions = object[[orig.reduction]], + reductions = pca, new.reduction.name = new.reduction, verbose = verbose) - object[[new.reduction]] <- object_merged[[new.reduction]] - return(object) + output.list <- list(pca, object_merged[[new.reduction]]) + names(output.list) <- c(orig.reduction, new.reduction) + return(output.list) } From fddb8eea7f4a3ce7a9f0eb31ab98f8c82849b22f Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 25 Oct 2022 17:48:58 -0400 Subject: [PATCH 239/979] fix rpca inte --- R/integration5.R | 40 +++++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/R/integration5.R b/R/integration5.R index 734ce0a36..4500dc642 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -206,7 +206,7 @@ CCAIntegration <- function( normalization.method = c("LogNormalize", "SCT"), dims = 1:30, k.filter = NA, - scale.data.layer = 'scale.data', + scale.layer = 'scale.data', verbose = TRUE, ...) { anchor.features <- anchor.features %||% SelectIntegrationFeatures5(object = object) @@ -263,35 +263,39 @@ RPCAIntegration <- function( orig.reduction = 'pca.rna', new.reduction = 'integrated.dr', reference = NULL, - anchor.features = NULL, + features = NULL, normalization.method = c("LogNormalize", "SCT"), dims = 1:30, + npcs = 50, k.filter = NA, - scale.data.layer = 'scale.data', + scale.layer = 'scale.data', + groups = NULL, verbose = TRUE, ...) { - anchor.features <- anchor.features %||% SelectIntegrationFeatures5(object = object) - assay <- assay %||% DefaultAssay(object = object) + features <- features %||% SelectIntegrationFeatures5(object = object) + assay <- assay %||% 'RNA' layers <- layers %||% Layers(object, search = 'data') - object <- RunPCA(object = object, - assay = assay, - features = anchor.features, - reduction.name = orig.reduction, - reduction.key = paste0(orig.reduction,"_"), - verbose = verbose + npcs <- max(npcs, dims) + pca <- RunPCA( + object = object, + assay = assay, + features = features, + layer = scale.layer, + npcs = npcs, + verbose = verbose ) object.list <- list() for (i in seq_along(along.with = layers)) { - object.list[[i]] <- CreateSeuratObject(counts = object[[assay]][[layers[i]]] ) - VariableFeatures(object = object.list[[i]]) <- anchor.features + object.list[[i]] <- CreateSeuratObject(counts = object[[layers[i]]] ) + VariableFeatures(object = object.list[[i]]) <- features object.list[[i]] <- ScaleData( object.list[[i]], verbose = FALSE) object.list[[i]] <- RunPCA( object.list[[i]], verbose = FALSE) object.list[[i]][['RNA']]$counts <- NULL } anchor <- FindIntegrationAnchors(object.list = object.list, - anchor.features = anchor.features, + anchor.features = features, scale = FALSE, reduction = 'rpca', normalization.method = normalization.method, @@ -306,11 +310,13 @@ RPCAIntegration <- function( ### object_merged <- IntegrateEmbeddings(anchorset = anchor, - reductions = object[[orig.reduction]], + reductions = pca, new.reduction.name = new.reduction, verbose = verbose) - object[[new.reduction]] <- object_merged[[new.reduction]] - return(object) + + output.list <- list(pca, object_merged[[new.reduction]]) + names(output.list) <- c(orig.reduction, new.reduction) + return(output.list) } #' Seurat-Joint PCA Integration From 4126b19246e7c0dca2857765f2272756f5199ee3 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Tue, 25 Oct 2022 18:00:07 -0400 Subject: [PATCH 240/979] Update DietSeurat to support v5 assays --- DESCRIPTION | 9 ++-- NAMESPACE | 3 ++ R/objects.R | 114 ++++++++++++++++++++++++++++------------------ R/zzz.R | 2 +- man/DietSeurat.Rd | 32 ++++++------- 5 files changed, 94 insertions(+), 66 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 16b4c59b0..3a96adf16 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.0.4.9011 -Date: 2022-10-24 +Version: 4.0.4.9012 +Date: 2022-10-25 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( @@ -24,10 +24,10 @@ Authors@R: c( ) URL: https://satijalab.org/seurat, https://github.com/satijalab/seurat BugReports: https://github.com/satijalab/seurat/issues -Remotes: mojaveazure/seurat-object@feat/docs5 Depends: R (>= 4.0.0), - methods + methods, + SeuratObject (>= 4.9.9.9035) Imports: cluster, cowplot, @@ -66,7 +66,6 @@ Imports: Rtsne, scales, scattermore (>= 0.7), - SeuratObject (>= 4.9.9.9014), sctransform (>= 0.3.4), shiny, spatstat.core, diff --git a/NAMESPACE b/NAMESPACE index 7b63c5d3f..baee2f72c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -451,7 +451,9 @@ importFrom(SeuratObject,"Misc<-") importFrom(SeuratObject,"Project<-") importFrom(SeuratObject,"Tool<-") importFrom(SeuratObject,"VariableFeatures<-") +importFrom(SeuratObject,.FilterObjects) importFrom(SeuratObject,.MARGIN) +importFrom(SeuratObject,.PropagateList) importFrom(SeuratObject,.SparseSlots) importFrom(SeuratObject,AddMetaData) importFrom(SeuratObject,Assays) @@ -504,6 +506,7 @@ importFrom(SeuratObject,Stdev) importFrom(SeuratObject,StitchMatrix) importFrom(SeuratObject,Tool) importFrom(SeuratObject,UpdateSeuratObject) +importFrom(SeuratObject,UpdateSlots) importFrom(SeuratObject,VariableFeatures) importFrom(SeuratObject,WhichCells) importFrom(SeuratObject,as.Graph) diff --git a/R/objects.R b/R/objects.R index 39dd56499..da3d7231a 100644 --- a/R/objects.R +++ b/R/objects.R @@ -441,82 +441,108 @@ CreateSCTAssayObject <- function( #' Slim down a Seurat object #' -#' Keep only certain aspects of the Seurat object. Can be useful in functions that utilize merge as -#' it reduces the amount of data in the merge. +#' Keep only certain aspects of the Seurat object. Can be useful in functions +#' that utilize merge as it reduces the amount of data in the merge #' -#' @param object Seurat object -#' @param counts Preserve the count matrices for the assays specified -#' @param data Preserve the data slot for the assays specified -#' @param scale.data Preserve the scale.data slot for the assays specified +#' @param object A \code{\link[SeuratObject]{Seurat}} object +#' @param layers A vector or named list of layers to keep #' @param features Only keep a subset of features, defaults to all features #' @param assays Only keep a subset of assays specified here -#' @param dimreducs Only keep a subset of DimReducs specified here (if NULL, -#' remove all DimReducs) -#' @param graphs Only keep a subset of Graphs specified here (if NULL, remove -#' all Graphs) +#' @param dimreducs Only keep a subset of DimReducs specified here (if +#' \code{NULL}, remove all DimReducs) +#' @param graphs Only keep a subset of Graphs specified here (if \code{NULL}, +#' remove all Graphs) #' @param misc Preserve the \code{misc} slot; default is \code{TRUE} +#' @param ... Ignored +#' +#' @return \code{object} with only the sub-object specified retained +#' +#' @importFrom SeuratObject .FilterObjects .PropagateList Assays +#' Layers UpdateSlots #' #' @export +#' #' @concept objects #' DietSeurat <- function( object, - counts = TRUE, - data = TRUE, - scale.data = FALSE, + layers = NULL, features = NULL, assays = NULL, dimreducs = NULL, graphs = NULL, - misc = TRUE + misc = TRUE, + ... ) { + CheckDots(...) object <- UpdateSlots(object = object) - assays <- assays %||% FilterObjects(object = object, classes.keep = "Assay") - assays <- assays[assays %in% FilterObjects(object = object, classes.keep = 'Assay')] - if (length(x = assays) == 0) { - stop("No assays provided were found in the Seurat object") + assays <- assays %||% Assays(object = object) + assays <- intersect(x = assays, y = Assays(object = object)) + if (!length(x = assays)) { + abort(message = "No assays provided were found in the Seurat object") } if (!DefaultAssay(object = object) %in% assays) { - stop("The default assay is slated to be removed, please change the default assay") + abort( + message = "The default assay is slated to be removed, please change the default assay" + ) } - if (!counts && !data) { - stop("Either one or both of 'counts' and 'data' must be kept") + layers <- layers %||% assays + layers <- .PropagateList(x = layers, names = assays) + for (assay in names(x = layers)) { + layers[[assay]] <- tryCatch( + expr = Layers(object = object[[assay]], search = layers[[assay]]), + error = function(...) { + return(character(length = 0L)) + } + ) } - for (assay in FilterObjects(object = object, classes.keep = 'Assay')) { + layers <- Filter(f = length, x = layers) + if (!length(x = layers)) { + abort(message = "None of the requested layers found") + } + for (assay in Assays(object = object)) { if (!(assay %in% assays)) { object[[assay]] <- NULL - } else { - if (!is.null(x = features)) { - features.assay <- intersect(x = features, y = rownames(x = object[[assay]])) - if (length(x = features.assay) == 0) { - if (assay == DefaultAssay(object = object)) { - stop("The default assay is slated to be removed, please change the default assay") - } else { - warning("No features found in assay '", assay, "', removing...") - object[[assay]] <- NULL - } - } else { - object[[assay]] <- subset(x = object[[assay]], features = features.assay) - } - } - if (!counts) { - slot(object = object[[assay]], name = 'counts') <- new(Class = 'matrix') + next + } + layers.rm <- setdiff( + x = Layers(object = object[[assay]]), + y = layers[[assay]] + ) + if (length(x = layers.rm)) { + if (inherits(x = object[[assay]], what = 'Assay') && all(c('counts', 'data') %in% layers.rm)) { + abort(message = "Cannot remove both 'counts' and 'data' from v3 Assays") } - if (!data) { - stop('data = FALSE currently not supported') + for (lyr in layers.rm) { + object[[assay]][[lyr]] <- NULL } - if (!scale.data) { - slot(object = object[[assay]], name = 'scale.data') <- new(Class = 'matrix') + } + if (!is.null(x = features)) { + features.assay <- intersect( + x = features, + y = rownames(x = object[[assay]]) + ) + if (!length(x = features.assay)) { + warn(message = paste0( + 'No features found in assay ', + sQuote(x = assay), + ', removing...' + )) + object[[assay]] <- NULL + next } + object[[assay]] <- subset(x = object[[assay]], features = features.assay) } } # remove misc when desired if (!isTRUE(x = misc)) { slot(object = object, name = "misc") <- list() } - # remove unspecified DimReducs and Graphs - all.objects <- FilterObjects(object = object, classes.keep = c('DimReduc', 'Graph')) + all.objects <- .FilterObjects( + object = object, + classes.keep = c('DimReduc', 'Graph') + ) objects.to.remove <- all.objects[!all.objects %in% c(dimreducs, graphs)] for (ob in objects.to.remove) { object[[ob]] <- NULL diff --git a/R/zzz.R b/R/zzz.R index 14f702d5c..fa55ab123 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,4 +1,4 @@ -#' @importFrom methods slot +#' @importFrom methods slot slot<- #' @importFrom rlang abort #' arg_match #' as_name diff --git a/man/DietSeurat.Rd b/man/DietSeurat.Rd index 35b66560e..6dad20f83 100644 --- a/man/DietSeurat.Rd +++ b/man/DietSeurat.Rd @@ -6,39 +6,39 @@ \usage{ DietSeurat( object, - counts = TRUE, - data = TRUE, - scale.data = FALSE, + layers = NULL, features = NULL, assays = NULL, dimreducs = NULL, graphs = NULL, - misc = TRUE + misc = TRUE, + ... ) } \arguments{ -\item{object}{Seurat object} +\item{object}{A \code{\link[SeuratObject]{Seurat}} object} -\item{counts}{Preserve the count matrices for the assays specified} - -\item{data}{Preserve the data slot for the assays specified} - -\item{scale.data}{Preserve the scale.data slot for the assays specified} +\item{layers}{A vector or named list of layers to keep} \item{features}{Only keep a subset of features, defaults to all features} \item{assays}{Only keep a subset of assays specified here} -\item{dimreducs}{Only keep a subset of DimReducs specified here (if NULL, -remove all DimReducs)} +\item{dimreducs}{Only keep a subset of DimReducs specified here (if +\code{NULL}, remove all DimReducs)} -\item{graphs}{Only keep a subset of Graphs specified here (if NULL, remove -all Graphs)} +\item{graphs}{Only keep a subset of Graphs specified here (if \code{NULL}, +remove all Graphs)} \item{misc}{Preserve the \code{misc} slot; default is \code{TRUE}} + +\item{...}{Ignored} +} +\value{ +\code{object} with only the sub-object specified retained } \description{ -Keep only certain aspects of the Seurat object. Can be useful in functions that utilize merge as -it reduces the amount of data in the merge. +Keep only certain aspects of the Seurat object. Can be useful in functions +that utilize merge as it reduces the amount of data in the merge } \concept{objects} From 50693be96d11300c2b86db97ddb8dedce840288d Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 25 Oct 2022 20:47:58 -0400 Subject: [PATCH 241/979] CCA inte --- R/integration5.R | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/R/integration5.R b/R/integration5.R index 4500dc642..3d6377192 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -202,32 +202,36 @@ CCAIntegration <- function( orig.reduction = 'pca.rna', new.reduction = 'integrated.dr', reference = NULL, - anchor.features = NULL, + features = NULL, normalization.method = c("LogNormalize", "SCT"), dims = 1:30, + npcs = 50, + groups = NULL, k.filter = NA, scale.layer = 'scale.data', verbose = TRUE, ...) { - anchor.features <- anchor.features %||% SelectIntegrationFeatures5(object = object) - assay <- assay %||% DefaultAssay(object = object) + features <- features %||% SelectIntegrationFeatures5(object = object) + assay <- assay %||% 'RNA' layers <- layers %||% Layers(object, search = 'data') - object <- RunPCA(object = object, - assay = assay, - features = anchor.features, - reduction.name = orig.reduction, - reduction.key = paste0(orig.reduction,"_"), - verbose = verbose + npcs <- max(npcs, dims) + pca <- RunPCA( + object = object, + assay = assay, + features = features, + layer = scale.layer, + npcs = npcs, + verbose = verbose ) - + object.list <- list() for (i in seq_along(along.with = layers)) { - object.list[[i]] <- CreateSeuratObject(counts = object[[assay]][[layers[i]]] ) - object.list[[i]][['RNA']][[scale.data.layer]] <- object[[assay]]$scale.data[,Cells(object.list[[i]])] + object.list[[i]] <- CreateSeuratObject(counts = object[[layers[i]]] ) + object.list[[i]][['RNA']][[scale.layer]] <- object$scale.data[,Cells(object.list[[i]])] object.list[[i]][['RNA']]$counts <- NULL } - anchor <- FindIntegrationAnchors(object.list = object.list, - anchor.features = anchor.features, + anchor <- FindIntegrationAnchors(object.list = object.list, + anchor.features = features, scale = FALSE, reduction = 'cca', normalization.method = normalization.method, @@ -243,11 +247,12 @@ CCAIntegration <- function( ### object_merged <- IntegrateEmbeddings(anchorset = anchor, - reductions = object[[orig.reduction]], + reductions = pca, new.reduction.name = new.reduction, verbose = verbose) - object[[new.reduction]] <- object_merged[[new.reduction]] - return(object) + output.list <- list(pca, object_merged[[new.reduction]]) + names(output.list) <- c(orig.reduction, new.reduction) + return(output.list) } #' Seurat-RPCA Integration From eff2a2d7da34198b79f34a8a6bb632ba63d35344 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 25 Oct 2022 22:38:43 -0400 Subject: [PATCH 242/979] subset features --- R/integration5.R | 52 +++++++++++++++++++----------------------------- 1 file changed, 20 insertions(+), 32 deletions(-) diff --git a/R/integration5.R b/R/integration5.R index 3d6377192..fd2fe53bd 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -223,17 +223,17 @@ CCAIntegration <- function( npcs = npcs, verbose = verbose ) - object.list <- list() for (i in seq_along(along.with = layers)) { - object.list[[i]] <- CreateSeuratObject(counts = object[[layers[i]]] ) - object.list[[i]][['RNA']][[scale.layer]] <- object$scale.data[,Cells(object.list[[i]])] + object.list[[i]] <- CreateSeuratObject(counts = object[[layers[i]]][features,] ) + object.list[[i]][['RNA']]$scale.data <- object[[scale.layer]][features, Cells(object.list[[i]])] object.list[[i]][['RNA']]$counts <- NULL } + anchor <- FindIntegrationAnchors(object.list = object.list, - anchor.features = features, - scale = FALSE, - reduction = 'cca', + anchor.features = features, + scale = FALSE, + reduction = 'cca', normalization.method = normalization.method, dims = dims, k.filter = k.filter, @@ -241,15 +241,11 @@ CCAIntegration <- function( verbose = verbose, ... ) - - ## diet Seurat object - ### - - ### object_merged <- IntegrateEmbeddings(anchorset = anchor, reductions = pca, new.reduction.name = new.reduction, - verbose = verbose) + verbose = verbose + ) output.list <- list(pca, object_merged[[new.reduction]]) names(output.list) <- c(orig.reduction, new.reduction) return(output.list) @@ -292,16 +288,15 @@ RPCAIntegration <- function( object.list <- list() for (i in seq_along(along.with = layers)) { - object.list[[i]] <- CreateSeuratObject(counts = object[[layers[i]]] ) + object.list[[i]] <- CreateSeuratObject(counts = object[[layers[i]]][features,]) VariableFeatures(object = object.list[[i]]) <- features - object.list[[i]] <- ScaleData( object.list[[i]], verbose = FALSE) - object.list[[i]] <- RunPCA( object.list[[i]], verbose = FALSE) + object.list[[i]] <- ScaleData(object = object.list[[i]], verbose = FALSE) + object.list[[i]] <- RunPCA(object = object.list[[i]], verbose = FALSE) object.list[[i]][['RNA']]$counts <- NULL } - - anchor <- FindIntegrationAnchors(object.list = object.list, - anchor.features = features, - scale = FALSE, + anchor <- FindIntegrationAnchors(object.list = object.list, + anchor.features = features, + scale = FALSE, reduction = 'rpca', normalization.method = normalization.method, dims = dims, @@ -310,14 +305,11 @@ RPCAIntegration <- function( verbose = verbose, ... ) - ## diet Seurat object - ### - - ### object_merged <- IntegrateEmbeddings(anchorset = anchor, reductions = pca, new.reduction.name = new.reduction, - verbose = verbose) + verbose = verbose + ) output.list <- list(pca, object_merged[[new.reduction]]) names(output.list) <- c(orig.reduction, new.reduction) @@ -340,7 +332,7 @@ JointPCAIntegration <- function( normalization.method = NULL, dims = 1:30, npcs = 50, - k.filter = NA, + k.anchor = 20, scale.layer = 'scale.data', verbose = TRUE, groups = NULL, @@ -360,7 +352,7 @@ JointPCAIntegration <- function( ) object.list <- list() for (i in seq_along(along.with = layers)) { - object.list[[i]] <- CreateSeuratObject(counts = object[[layers[i]]] ) + object.list[[i]] <- CreateSeuratObject(counts = object[[layers[i]]][features[1:2], ] ) object.list[[i]][['RNA']]$counts <- NULL object.list[[i]][['joint.pca']] <- CreateDimReducObject( embeddings = Embeddings(object = pca)[Cells(object.list[[i]]),], @@ -369,18 +361,14 @@ JointPCAIntegration <- function( key = 'J_' ) } - ## diet Seurat object - ### - - ### - anchor <- FindIntegrationAnchors(object.list = object.list, anchor.features = features, scale = FALSE, reduction = 'jpca', normalization.method = normalization.method, dims = dims, - k.filter = k.filter, + k.anchor = k.anchor, + k.filter = NA, reference = reference, verbose = verbose, ... From 7170c1f3a10f7ae8655fa3b5c496c30a1522ead3 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 26 Oct 2022 12:09:26 -0400 Subject: [PATCH 243/979] Remove extra newline --- R/sketching.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/sketching.R b/R/sketching.R index 26141280e..c6148a4b4 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -445,7 +445,6 @@ CountSketch <- function(nsketch, ncells, seed = NA_integer_, ...) { )) } - #' Gaussian sketching #' #' @inheritParams CountSketch From aa643077e16779340923eac673a652aa0175c040 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 26 Oct 2022 12:09:42 -0400 Subject: [PATCH 244/979] Update docs for IntegrateLayers --- R/integration5.R | 270 +++++++++++++++++-------------------- man/HarmonyIntegration.Rd | 67 +++++++++ man/IntegrateLayers.Rd | 41 +++--- man/writing-integration.Rd | 46 +++++++ 4 files changed, 259 insertions(+), 165 deletions(-) create mode 100644 man/writing-integration.Rd diff --git a/R/integration5.R b/R/integration5.R index d772f7fd1..6df50e176 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -7,115 +7,35 @@ NULL # Generics #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -#' Integrate Layers +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Functions +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#' Harmony Integration #' -#' @param object A \code{\link[SeuratObject]{Seurat}} object -#' @param method Integration method function; can choose from: -#' \Sexpr[stage=render,results=rd]{Seurat::.rd_methods("integration")} -#' @param ... Arguments passed on to \code{method} +#' @inheritParams harmony::HarmonyMatrix +#' @param object An \code{\link[SeuratObject]{Assay5}} object +#' @param assay Name of \code{object} in the containing \code{Seurat} object +#' @param groups A data frame ... +#' @param features ... +#' @param scale.layer ... +#' @param layers ... +#' @param ... Ignored #' -#' @return \code{object} with integration data added to it +#' @return ... #' -#' @section Integration Methods Functions: -#' Integration method functions can be written by anyone to implement any -#' integration method in Seurat. These methods should expect to take a -#' \link[SeuratObject:Assay5]{v5 assay} as input and return a named list of -#' objects that can be added back to a \code{Seurat} object (eg. a -#' \link[SeuratObject:DimReduc]{dimensional reduction} or cell-level meta data) +#' @note This function requires the +#' \href{https://cran.r-project.org/package=harmony}{\pkg{harmony}} package +#' to be installed #' -#' Every integration method function should expect the following arguments: -#' \itemize{ -#' \item \dQuote{\code{object}}: an \code{\link[SeuratObject]{Assay5}} object -#' \item \dQuote{\code{assay}}: name of \code{object} in the original -#' \code{\link[SeuratObject]{Seurat}} object -#' \item \dQuote{\code{layers}}: names of normalized layers in \code{object} -#' \item \dQuote{\code{scale.layer}}: name(s) of scaled layer(s) in -#' \code{object} -#' \item \dQuote{\code{features}}: a vector of features for integration -#' \item \dQuote{\code{groups}}: a one-column data frame with the groups for -#' each cell in \code{object}; the column name will be \dQuote{group} -#' } +# @templateVar pkg harmony +# @template note-reqdpkg #' #' @export #' -IntegrateLayers <- function( - object, - method, - group.by = NULL, - assay = NULL, - features = NULL, - layers = NULL, - scale.layer = 'scale.data', - ... -) { - # Get the integration method - if (is_quosure(x = method)) { - method <- eval( - expr = quo_get_expr(quo = method), - envir = quo_get_env(quo = method) - ) - } - if (is.character(x = method)) { - method <- get(x = method) - } - if (!is.function(x = method)) { - abort(message = "'method' must be a function for integrating layers") - } - # Check our assay - assay <- assay %||% DefaultAssay(object = object) - if (!inherits(x = object[[assay]], what = 'StdAssay')) { - abort(message = "'assay' must be a v5 assay") - } - layers <- Layers(object = object, assay = assay, search = layers) - features <- features %||% SelectIntegrationFeatures5(object = object, assay = assay) - scale.layer <- Layers(object = object, search = scale.layer) - features <- intersect( - x = features, - y = Features(x = object[[assay]], layer = scale.layer) - ) - if (!length(x = features)) { - abort(message = "None of the features provided are found in this assay") - } - # Check our groups - groups <- if (is.null(x = group.by) && length(x = layers) > 1L) { - cmap <- slot(object = object[[assay]], name = 'cells')[, layers] - as.data.frame(x = labels( - object = cmap, - values = Cells(x = object[[assay]], layer = scale.layer) - )) - } else if (is_scalar_character(x = group.by) && group.by %in% names(x = object[[]])) { - FetchData( - object = object, - vars = group.by, - cells = colnames(x = object[[assay]]) - ) - } else { - abort(message = "'group.by' must correspond to a column of cell-level meta data") - } - names(x = groups) <- "group" - # Run the integration method - value <- method( - object = object[[assay]], - assay = assay, - layers = layers, - scale.layer = scale.layer, - features = features, - groups = groups, - ... - ) - for (i in names(x = value)) { - object[[i]] <- value[[i]] - } - return(object) -} - -#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Functions -#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -#' Harmony Integration +#' @concept integration #' -#' @export +#' @seealso \code{\link[harmony:HarmonyMatrix]{harmony::HarmonyMatrix}()} #' HarmonyIntegration <- function( object, @@ -136,7 +56,6 @@ HarmonyIntegration <- function( max.iter.cluster = 20L, epsilon.cluster = 1e-05, epsilon.harmony = 1e-04, - # project.dim = TRUE, verbose = TRUE, ... ) { @@ -189,22 +108,40 @@ HarmonyIntegration <- function( attr(x = HarmonyIntegration, which = 'Seurat.method') <- 'integration' -#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Methods for Seurat-defined generics -#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -# @method IntegrateLayers StdAssay -# @export +#' Integrate Layers +#' +#' @param object A \code{\link[SeuratObject]{Seurat}} object +#' @param method Integration method function +#' @param group.by ... +#' @param assay Name of assay for integration +#' @param features A vector of features to use for integration +#' @param layers Names of normalized layers in \code{assay} +#' @param scale.layer Name(s) of scaled layer(s) in \code{assay} +#' @param ... Arguments passed on to \code{method} +#' +#' @return \code{object} with integration data added to it +#' +#' @section Integration Method Functions: +#' The following integration method functions are available: +#' \Sexpr[stage=render,results=rd]{Seurat::.rd_methods("integration")} +#' +#' @export +#' +#' @concept integration #' -IntegrateLayers.StdAssay <- function( +#' @seealso \link[Seurat:writing-integration]{Writing integration method functions} +#' +IntegrateLayers <- function( object, method, - assay, - features, # TODO: allow selectintegrationfeatures to run on v5 assays - layers = NULL, group.by = NULL, + assay = NULL, + features = NULL, + layers = NULL, + scale.layer = 'scale.data', ... ) { + # Get the integration method if (is_quosure(x = method)) { method <- eval( expr = quo_get_expr(quo = method), @@ -217,47 +154,46 @@ IntegrateLayers.StdAssay <- function( if (!is.function(x = method)) { abort(message = "'method' must be a function for integrating layers") } - layers <- Layers(object = object, search = layers) - if (!is.null(x = group.by)) { - object <- split(x = object, f = group.by, drop = TRUE) - } - return(method(object = object, features = features, assay = assay, layers = layers, ...)) -} - -# @method IntegrateLayers Seurat -# @export -#' -IntegrateLayers.Seurat <- function( - object, - method, - group.by = NULL, - assay = NULL, - features = NULL, - layers = NULL, - ... -) { - method <- enquo(arg = method) + # Check our assay assay <- assay %||% DefaultAssay(object = object) if (!inherits(x = object[[assay]], what = 'StdAssay')) { abort(message = "'assay' must be a v5 assay") } + layers <- Layers(object = object, assay = assay, search = layers) features <- features %||% SelectIntegrationFeatures5(object = object, assay = assay) - group.by <- if (is.null(x = group.by)) { - group.by - } else if (rlang::is_na(x = group.by)) { - Idents(object = object) - } else if (is_scalar_character(x = group.by) && group.by %in% names(x = object[[]])) { - object[[group.by, drop = TRUE]] + scale.layer <- Layers(object = object, search = scale.layer) + features <- intersect( + x = features, + y = Features(x = object[[assay]], layer = scale.layer) + ) + if (!length(x = features)) { + abort(message = "None of the features provided are found in this assay") + } + # Check our groups + groups <- if (is.null(x = group.by) && length(x = layers) > 1L) { + cmap <- slot(object = object[[assay]], name = 'cells')[, layers] + as.data.frame(x = labels( + object = cmap, + values = Cells(x = object[[assay]], layer = scale.layer) + )) + } else if (is_scalar_character(x = group.by) && group.by %in% names(x = object[[]])) { + FetchData( + object = object, + vars = group.by, + cells = colnames(x = object[[assay]]) + ) } else { - abort(message = "'group.by' must be the name of a column in cell-level meta data") + abort(message = "'group.by' must correspond to a column of cell-level meta data") } - value <- IntegrateLayers( + names(x = groups) <- "group" + # Run the integration method + value <- method( object = object[[assay]], - method = method, assay = assay, - features = features, layers = layers, - group.by = group.by, + scale.layer = scale.layer, + features = features, + groups = groups, ... ) for (i in names(x = value)) { @@ -266,6 +202,54 @@ IntegrateLayers.Seurat <- function( return(object) } +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Methods for Seurat-defined generics +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Internal #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#' Writing Integration Method Functions +#' +#' Integration method functions can be written by anyone to implement any +#' integration method in Seurat. These methods should expect to take a +#' \link[SeuratObject:Assay5]{v5 assay} as input and return a named list of +#' objects that can be added back to a \code{Seurat} object (eg. a +#' \link[SeuratObject:DimReduc]{dimensional reduction} or cell-level meta data) +#' +#' @section Provided Parameters: +#' Every integration method function should expect the following arguments: +#' \itemize{ +#' \item \dQuote{\code{object}}: an \code{\link[SeuratObject]{Assay5}} object +#' \item \dQuote{\code{assay}}: name of \code{object} in the original +#' \code{\link[SeuratObject]{Seurat}} object +#' \item \dQuote{\code{layers}}: names of normalized layers in \code{object} +#' \item \dQuote{\code{scale.layer}}: name(s) of scaled layer(s) in +#' \code{object} +#' \item \dQuote{\code{features}}: a vector of features for integration +#' \item \dQuote{\code{groups}}: a one-column data frame with the groups for +#' each cell in \code{object}; the column name will be \dQuote{group} +#' } +#' +#' @section Method Discovery: +#' The documentation for \code{\link{IntegrateLayers}()} will automatically +#' link to integration method functions provided by packages in the +#' \code{\link[base]{search}()} space. To make an integration method function +#' discoverable by the documentation, simply add an attribute named +#' \dQuote{\code{Seurat.method}} to the function with a value of +#' \dQuote{\code{integration}} +#' \preformatted{ +#' attr(MyIntegrationFunction, which = "Seurat.method") <- "integration" +#' } +#' +#' @keywords internal +#' +#' @concept integration +#' +#' @name writing-integration +#' @rdname writing-integration +#' +#' @seealso \code{\link{IntegrateLayers}()} +#' +NULL diff --git a/man/HarmonyIntegration.Rd b/man/HarmonyIntegration.Rd index 079cfb0a5..6af8242bf 100644 --- a/man/HarmonyIntegration.Rd +++ b/man/HarmonyIntegration.Rd @@ -27,6 +27,73 @@ HarmonyIntegration( ... ) } +\arguments{ +\item{object}{An \code{\link[SeuratObject]{Assay5}} object} + +\item{assay}{Name of \code{object} in the containing \code{Seurat} object} + +\item{groups}{A data frame ...} + +\item{features}{...} + +\item{scale.layer}{...} + +\item{layers}{...} + +\item{npcs}{If doing PCA on input matrix, number of PCs to compute.} + +\item{theta}{Diversity clustering penalty parameter. Specify for each +variable in vars_use Default theta=2. theta=0 does not encourage any +diversity. Larger values of theta result in more diverse clusters.} + +\item{lambda}{Ridge regression penalty parameter. Specify for each variable + in vars_use. +Default lambda=1. Lambda must be strictly positive. Smaller values result +in more aggressive correction.} + +\item{sigma}{Width of soft kmeans clusters. Default sigma=0.1. Sigma scales +the distance from a cell to cluster centroids. Larger values of sigma +result in cells assigned to more clusters. Smaller values of sigma make +soft kmeans cluster approach hard clustering.} + +\item{nclust}{Number of clusters in model. nclust=1 equivalent to simple +linear regression.} + +\item{tau}{Protection against overclustering small datasets with large ones. +tau is the expected number of cells per cluster.} + +\item{block.size}{What proportion of cells to update during clustering. +Between 0 to 1, default 0.05. Larger values may be faster but less accurate} + +\item{max.iter.harmony}{Maximum number of rounds to run Harmony. One round +of Harmony involves one clustering and one correction step.} + +\item{max.iter.cluster}{Maximum number of rounds to run clustering at each +round of Harmony.} + +\item{epsilon.cluster}{Convergence tolerance for clustering round of +Harmony. Set to -Inf to never stop early.} + +\item{epsilon.harmony}{Convergence tolerance for Harmony. Set to -Inf to +never stop early.} + +\item{verbose}{Whether to print progress messages. TRUE to print, +FALSE to suppress.} + +\item{...}{Ignored} +} +\value{ +... +} \description{ Harmony Integration } +\note{ +This function requires the +\href{https://cran.r-project.org/package=harmony}{\pkg{harmony}} package +to be installed +} +\seealso{ +\code{\link[harmony:HarmonyMatrix]{harmony::HarmonyMatrix}()} +} +\concept{integration} diff --git a/man/IntegrateLayers.Rd b/man/IntegrateLayers.Rd index 55640b057..23b181b4f 100644 --- a/man/IntegrateLayers.Rd +++ b/man/IntegrateLayers.Rd @@ -18,8 +18,17 @@ IntegrateLayers( \arguments{ \item{object}{A \code{\link[SeuratObject]{Seurat}} object} -\item{method}{Integration method function; can choose from: -\Sexpr[stage=render,results=rd]{Seurat::.rd_methods("integration")}} +\item{method}{Integration method function} + +\item{group.by}{...} + +\item{assay}{Name of assay for integration} + +\item{features}{A vector of features to use for integration} + +\item{layers}{Names of normalized layers in \code{assay}} + +\item{scale.layer}{Name(s) of scaled layer(s) in \code{assay}} \item{...}{Arguments passed on to \code{method}} } @@ -29,25 +38,13 @@ IntegrateLayers( \description{ Integrate Layers } -\section{Integration Methods Functions}{ - -Integration method functions can be written by anyone to implement any -integration method in Seurat. These methods should expect to take a -\link[SeuratObject:Assay5]{v5 assay} as input and return a named list of -objects that can be added back to a \code{Seurat} object (eg. a -\link[SeuratObject:DimReduc]{dimensional reduction} or cell-level meta data) - -Every integration method function should expect the following arguments: -\itemize{ - \item \dQuote{\code{object}}: an \code{\link[SeuratObject]{Assay5}} object - \item \dQuote{\code{assay}}: name of \code{object} in the original - \code{\link[SeuratObject]{Seurat}} object - \item \dQuote{\code{layers}}: names of normalized layers in \code{object} - \item \dQuote{\code{scale.layer}}: name(s) of scaled layer(s) in - \code{object} - \item \dQuote{\code{features}}: a vector of features for integration - \item \dQuote{\code{groups}}: a one-column data frame with the groups for - each cell in \code{object}; the column name will be \dQuote{group} -} +\section{Integration Method Functions}{ + +The following integration method functions are available: +\Sexpr[stage=render,results=rd]{Seurat::.rd_methods("integration")} } +\seealso{ +\link[Seurat:writing-integration]{Writing integration method functions} +} +\concept{integration} diff --git a/man/writing-integration.Rd b/man/writing-integration.Rd new file mode 100644 index 000000000..e7ee7acb0 --- /dev/null +++ b/man/writing-integration.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/integration5.R +\name{writing-integration} +\alias{writing-integration} +\title{Writing Integration Method Functions} +\description{ +Integration method functions can be written by anyone to implement any +integration method in Seurat. These methods should expect to take a +\link[SeuratObject:Assay5]{v5 assay} as input and return a named list of +objects that can be added back to a \code{Seurat} object (eg. a +\link[SeuratObject:DimReduc]{dimensional reduction} or cell-level meta data) +} +\section{Provided Parameters}{ + +Every integration method function should expect the following arguments: +\itemize{ + \item \dQuote{\code{object}}: an \code{\link[SeuratObject]{Assay5}} object + \item \dQuote{\code{assay}}: name of \code{object} in the original + \code{\link[SeuratObject]{Seurat}} object + \item \dQuote{\code{layers}}: names of normalized layers in \code{object} + \item \dQuote{\code{scale.layer}}: name(s) of scaled layer(s) in + \code{object} + \item \dQuote{\code{features}}: a vector of features for integration + \item \dQuote{\code{groups}}: a one-column data frame with the groups for + each cell in \code{object}; the column name will be \dQuote{group} +} +} + +\section{Method Discovery}{ + +The documentation for \code{\link{IntegrateLayers}()} will automatically +link to integration method functions provided by packages in the +\code{\link[base]{search}()} space. To make an integration method function +discoverable by the documentation, simply add an attribute named +\dQuote{\code{Seurat.method}} to the function with a value of +\dQuote{\code{integration}} +\preformatted{ +attr(MyIntegrationFunction, which = "Seurat.method") <- "integration" +} +} + +\seealso{ +\code{\link{IntegrateLayers}()} +} +\concept{integration} +\keyword{internal} From 0723bb087298feb178b1903419312878a232d334 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 26 Oct 2022 17:04:28 -0400 Subject: [PATCH 245/979] Move RunPCA out of IntegrateLayers workflow Pass `orig` (dimreduc) instead of `assay` (character) to method functions --- R/integration5.R | 57 +++++++++++++++++++++++++------------- man/HarmonyIntegration.Rd | 4 +-- man/IntegrateLayers.Rd | 6 +++- man/writing-integration.Rd | 4 +-- 4 files changed, 47 insertions(+), 24 deletions(-) diff --git a/R/integration5.R b/R/integration5.R index 6df50e176..2f6d968fc 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -15,7 +15,8 @@ NULL #' #' @inheritParams harmony::HarmonyMatrix #' @param object An \code{\link[SeuratObject]{Assay5}} object -#' @param assay Name of \code{object} in the containing \code{Seurat} object +# @param assay Name of \code{object} in the containing \code{Seurat} object +#' @param orig \link[SeuratObject:DimReduc]{Dimensional reduction} to correct #' @param groups A data frame ... #' @param features ... #' @param scale.layer ... @@ -39,7 +40,7 @@ NULL #' HarmonyIntegration <- function( object, - assay, + orig, groups, features = NULL, scale.layer = 'scale.data', @@ -65,20 +66,22 @@ HarmonyIntegration <- function( ) if (!inherits(x = object, what = 'StdAssay')) { abort(message = "'object' must be a v5 assay object") + } else if (!inherits(x = orig, what = 'DimReduc')) { + abort(message = "'orig' must be a dimensional reduction") } - # Run joint PCA - features <- features %||% Features(x = object, layer = scale.layer) - pca <- RunPCA( - object = object, - assay = assay, - features = features, - layer = scale.layer, - npcs = npcs, - verbose = verbose - ) + # # Run joint PCA + # features <- features %||% Features(x = object, layer = scale.layer) + # pca <- RunPCA( + # object = object, + # assay = assay, + # features = features, + # layer = scale.layer, + # npcs = npcs, + # verbose = verbose + # ) # Run Harmony harmony.embed <- harmony::HarmonyMatrix( - data_mat = Embeddings(object = pca), + data_mat = Embeddings(object = orig), meta_data = groups, vars_use = 'group', do_pca = FALSE, @@ -96,14 +99,15 @@ HarmonyIntegration <- function( return_object = FALSE, verbose = verbose ) - rownames(x = harmony.embed) <- Cells(x = pca) + rownames(x = harmony.embed) <- Cells(x = orig) # TODO add feature loadings from PCA dr <- suppressWarnings(expr = CreateDimReducObject( embeddings = harmony.embed, key = key, - assay = assay + # assay = assay + assay = DefaultAssay(object = orig) )) - return(list(pca = pca, harmony = dr)) + return(list(harmony = dr)) } attr(x = HarmonyIntegration, which = 'Seurat.method') <- 'integration' @@ -112,7 +116,9 @@ attr(x = HarmonyIntegration, which = 'Seurat.method') <- 'integration' #' #' @param object A \code{\link[SeuratObject]{Seurat}} object #' @param method Integration method function -#' @param group.by ... +#' @param orig Name of dimensional reduction for correction +#' @param group.by Name of meta data to group cells by; defaults to splits +#' assay layers #' @param assay Name of assay for integration #' @param features A vector of features to use for integration #' @param layers Names of normalized layers in \code{assay} @@ -134,6 +140,7 @@ attr(x = HarmonyIntegration, which = 'Seurat.method') <- 'integration' IntegrateLayers <- function( object, method, + orig = NULL, group.by = NULL, assay = NULL, features = NULL, @@ -169,6 +176,15 @@ IntegrateLayers <- function( if (!length(x = features)) { abort(message = "None of the features provided are found in this assay") } + # Check our dimensional reduction + orig <- orig %||% DefaultDimReduc(object = object, assay = assay) + if (!orig %in% Reductions(object = object)) { + abort(message = paste(sQuote(x = orig), 'is not a dimensional reduction')) + } + obj.orig <- object[[orig]] + if (is.null(x = DefaultAssay(object = obj.orig))) { + DefaultAssay(object = obj.orig) <- assay + } # Check our groups groups <- if (is.null(x = group.by) && length(x = layers) > 1L) { cmap <- slot(object = object[[assay]], name = 'cells')[, layers] @@ -190,6 +206,7 @@ IntegrateLayers <- function( value <- method( object = object[[assay]], assay = assay, + orig = obj.orig, layers = layers, scale.layer = scale.layer, features = features, @@ -222,8 +239,10 @@ IntegrateLayers <- function( #' Every integration method function should expect the following arguments: #' \itemize{ #' \item \dQuote{\code{object}}: an \code{\link[SeuratObject]{Assay5}} object -#' \item \dQuote{\code{assay}}: name of \code{object} in the original -#' \code{\link[SeuratObject]{Seurat}} object +# \item \dQuote{\code{assay}}: name of \code{object} in the original +# \code{\link[SeuratObject]{Seurat}} object +#' \item \dQuote{\code{orig}}: \link[SeuratObject:DimReduc]{dimensional +#' reduction} to correct #' \item \dQuote{\code{layers}}: names of normalized layers in \code{object} #' \item \dQuote{\code{scale.layer}}: name(s) of scaled layer(s) in #' \code{object} diff --git a/man/HarmonyIntegration.Rd b/man/HarmonyIntegration.Rd index 6af8242bf..a0d85c035 100644 --- a/man/HarmonyIntegration.Rd +++ b/man/HarmonyIntegration.Rd @@ -6,7 +6,7 @@ \usage{ HarmonyIntegration( object, - assay, + orig, groups, features = NULL, scale.layer = "scale.data", @@ -30,7 +30,7 @@ HarmonyIntegration( \arguments{ \item{object}{An \code{\link[SeuratObject]{Assay5}} object} -\item{assay}{Name of \code{object} in the containing \code{Seurat} object} +\item{orig}{\link[SeuratObject:DimReduc]{Dimensional reduction} to correct} \item{groups}{A data frame ...} diff --git a/man/IntegrateLayers.Rd b/man/IntegrateLayers.Rd index 23b181b4f..85b34ed32 100644 --- a/man/IntegrateLayers.Rd +++ b/man/IntegrateLayers.Rd @@ -7,6 +7,7 @@ IntegrateLayers( object, method, + orig = NULL, group.by = NULL, assay = NULL, features = NULL, @@ -20,7 +21,10 @@ IntegrateLayers( \item{method}{Integration method function} -\item{group.by}{...} +\item{orig}{Name of dimensional reduction for correction} + +\item{group.by}{Name of meta data to group cells by; defaults to splits +assay layers} \item{assay}{Name of assay for integration} diff --git a/man/writing-integration.Rd b/man/writing-integration.Rd index e7ee7acb0..60f151cdf 100644 --- a/man/writing-integration.Rd +++ b/man/writing-integration.Rd @@ -15,8 +15,8 @@ objects that can be added back to a \code{Seurat} object (eg. a Every integration method function should expect the following arguments: \itemize{ \item \dQuote{\code{object}}: an \code{\link[SeuratObject]{Assay5}} object - \item \dQuote{\code{assay}}: name of \code{object} in the original - \code{\link[SeuratObject]{Seurat}} object + \item \dQuote{\code{orig}}: \link[SeuratObject:DimReduc]{dimensional + reduction} to correct \item \dQuote{\code{layers}}: names of normalized layers in \code{object} \item \dQuote{\code{scale.layer}}: name(s) of scaled layer(s) in \code{object} From c0fc67db7819035f843f764fa574193ea1a08466 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 26 Oct 2022 17:35:44 -0400 Subject: [PATCH 246/979] Update docs --- NAMESPACE | 5 ++- R/integration5.R | 1 - man/CCAIntegration.Rd | 50 +++++++++++++++++++++++ man/FastRPCAIntegration.Rd | 69 ++++++++++++++++++++++++++++++++ man/FetchResiduals.Rd | 2 +- man/FindIntegrationAnchors.Rd | 3 +- man/FindTransferAnchors.Rd | 1 + man/GetResidual.Rd | 2 +- man/IntegrateSketchEmbeddings.Rd | 1 + man/JointPCAIntegration.Rd | 50 +++++++++++++++++++++++ man/RPCAIntegration.Rd | 50 +++++++++++++++++++++++ 11 files changed, 228 insertions(+), 6 deletions(-) create mode 100644 man/CCAIntegration.Rd create mode 100644 man/FastRPCAIntegration.Rd create mode 100644 man/JointPCAIntegration.Rd create mode 100644 man/RPCAIntegration.Rd diff --git a/NAMESPACE b/NAMESPACE index 6ff986864..f1f96acad 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -48,8 +48,6 @@ S3method(HVFInfo,SCTAssay) S3method(IntegrateEmbeddings,IntegrationAnchorSet) S3method(IntegrateEmbeddings,TransferAnchorSet) S3method(LeverageScore,Assay) -S3method(IntegrateLayers,Seurat) -S3method(IntegrateLayers,StdAssay) S3method(LeverageScore,DelayedMatrix) S3method(LeverageScore,Seurat) S3method(LeverageScore,StdAssay) @@ -172,6 +170,7 @@ export(BlackAndWhite) export(BlueAndRed) export(BoldTitle) export(BuildClusterTree) +export(CCAIntegration) export(CalcPerturbSig) export(CalculateBarcodeInflections) export(CaseMatch) @@ -261,6 +260,7 @@ export(IsGlobal) export(JS) export(JackStraw) export(JackStrawPlot) +export(JointPCAIntegration) export(Key) export(L2CCA) export(L2Dim) @@ -311,6 +311,7 @@ export(Project) export(ProjectDim) export(ProjectUMAP) export(PurpleAndYellow) +export(RPCAIntegration) export(Radius) export(Read10X) export(Read10X_Image) diff --git a/R/integration5.R b/R/integration5.R index 4028d08d8..9fd0739be 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -178,7 +178,6 @@ CCAIntegration <- function( #' @inheritParams FindIntegrationAnchors #' @export #' - RPCAIntegration <- function( object = NULL, assay = NULL, diff --git a/man/CCAIntegration.Rd b/man/CCAIntegration.Rd new file mode 100644 index 000000000..fac09783e --- /dev/null +++ b/man/CCAIntegration.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/integration5.R +\name{CCAIntegration} +\alias{CCAIntegration} +\title{Seurat-CCA Integration} +\usage{ +CCAIntegration( + object = NULL, + assay = NULL, + layers = NULL, + orig.reduction = "pca.rna", + new.reduction = "integrated.dr", + reference = NULL, + features = NULL, + normalization.method = c("LogNormalize", "SCT"), + dims = 1:30, + npcs = 50, + groups = NULL, + k.filter = NA, + scale.layer = "scale.data", + verbose = TRUE, + ... +) +} +\arguments{ +\item{assay}{A vector of assay names specifying which assay to use when +constructing anchors. If NULL, the current default assay for each object is +used.} + +\item{reference}{A vector specifying the object/s to be used as a reference +during integration. If NULL (default), all pairwise anchors are found (no +reference/s). If not NULL, the corresponding objects in \code{object.list} +will be used as references. When using a set of specified references, anchors +are first found between each query and each reference. The references are +then integrated through pairwise integration. Each query is then mapped to +the integrated reference.} + +\item{normalization.method}{Name of normalization method used: LogNormalize +or SCT} + +\item{dims}{Which dimensions to use from the CCA to specify the neighbor +search space} + +\item{k.filter}{How many neighbors (k) to use when filtering anchors} + +\item{verbose}{Print progress bars and output} +} +\description{ +Seurat-CCA Integration +} diff --git a/man/FastRPCAIntegration.Rd b/man/FastRPCAIntegration.Rd new file mode 100644 index 000000000..aee14c3f3 --- /dev/null +++ b/man/FastRPCAIntegration.Rd @@ -0,0 +1,69 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/integration.R +\name{FastRPCAIntegration} +\alias{FastRPCAIntegration} +\title{Perform integration on the joint PCA cell embeddings.} +\usage{ +FastRPCAIntegration( + object.list, + reference = NULL, + anchor.features = 2000, + k.anchor = 20, + dims = 1:30, + scale = TRUE, + normalization.method = c("LogNormalize", "SCT"), + new.reduction.name = "integrated_dr", + npcs = 50, + findintegrationanchors.args = list(), + verbose = TRUE +) +} +\arguments{ +\item{object.list}{A list of \code{\link{Seurat}} objects between which to +find anchors for downstream integration.} + +\item{reference}{A vector specifying the object/s to be used as a reference +during integration. If NULL (default), all pairwise anchors are found (no +reference/s). If not NULL, the corresponding objects in \code{object.list} +will be used as references. When using a set of specified references, anchors +are first found between each query and each reference. The references are +then integrated through pairwise integration. Each query is then mapped to +the integrated reference.} + +\item{anchor.features}{Can be either: +\itemize{ + \item{A numeric value. This will call \code{\link{SelectIntegrationFeatures}} + to select the provided number of features to be used in anchor finding} + \item{A vector of features to be used as input to the anchor finding process} +}} + +\item{k.anchor}{How many neighbors (k) to use when picking anchors} + +\item{dims}{Which dimensions to use from the CCA to specify the neighbor +search space} + +\item{scale}{Whether or not to scale the features provided. Only set to FALSE +if you have previously scaled the features you want to use for each object in +the object.list} + +\item{normalization.method}{Name of normalization method used: LogNormalize +or SCT} + +\item{new.reduction.name}{Name of integrated dimensional reduction} + +\item{npcs}{Total Number of PCs to compute and store (50 by default)} + +\item{findintegrationanchors.args}{A named list of additional arguments to +\code{\link{FindIntegrationAnchors}}} + +\item{verbose}{Print messages and progress} +} +\value{ +Returns a Seurat object with integrated dimensional reduction +} +\description{ +This is a convenience wrapper function around the following three functions +that are often run together when perform integration. +#' \code{\link{FindIntegrationAnchors}}, \code{\link{RunPCA}}, +\code{\link{IntegrateEmbeddings}}. +} diff --git a/man/FetchResiduals.Rd b/man/FetchResiduals.Rd index 81536048f..cf3a65edd 100644 --- a/man/FetchResiduals.Rd +++ b/man/FetchResiduals.Rd @@ -7,7 +7,7 @@ FetchResiduals( object, features, - assay = NULL, + assay = "SCT", umi.assay = "RNA", layer = "counts", clip.range = NULL, diff --git a/man/FindIntegrationAnchors.Rd b/man/FindIntegrationAnchors.Rd index 62495f03c..8c4d0831c 100644 --- a/man/FindIntegrationAnchors.Rd +++ b/man/FindIntegrationAnchors.Rd @@ -12,7 +12,7 @@ FindIntegrationAnchors( scale = TRUE, normalization.method = c("LogNormalize", "SCT"), sct.clip.range = NULL, - reduction = c("cca", "rpca", "rlsi"), + reduction = c("cca", "rpca", "jpca", "rlsi"), l2.norm = TRUE, dims = 1:30, k.anchor = 5, @@ -63,6 +63,7 @@ be one of: \itemize{ \item{cca: Canonical correlation analysis} \item{rpca: Reciprocal PCA} + \item{jpca: Joint PCA} \item{rlsi: Reciprocal LSI} }} diff --git a/man/FindTransferAnchors.Rd b/man/FindTransferAnchors.Rd index 398a55a03..49bbc4cf2 100644 --- a/man/FindTransferAnchors.Rd +++ b/man/FindTransferAnchors.Rd @@ -12,6 +12,7 @@ FindTransferAnchors( reference.assay = NULL, reference.neighbors = NULL, query.assay = NULL, + query.layers = NULL, reduction = "pcaproject", reference.reduction = NULL, project.query = FALSE, diff --git a/man/GetResidual.Rd b/man/GetResidual.Rd index 6a8835796..12044efec 100644 --- a/man/GetResidual.Rd +++ b/man/GetResidual.Rd @@ -8,7 +8,7 @@ GetResidual( object, features, assay = NULL, - umi.assay = NULL, + umi.assay = "RNA", clip.range = NULL, replace.value = FALSE, na.rm = TRUE, diff --git a/man/IntegrateSketchEmbeddings.Rd b/man/IntegrateSketchEmbeddings.Rd index f668183b3..8ac33f5d0 100644 --- a/man/IntegrateSketchEmbeddings.Rd +++ b/man/IntegrateSketchEmbeddings.Rd @@ -7,6 +7,7 @@ IntegrateSketchEmbeddings( object, atoms = "sketch", + atoms.layers = NULL, orig = "RNA", features = NULL, reduction = "integrated_dr", diff --git a/man/JointPCAIntegration.Rd b/man/JointPCAIntegration.Rd new file mode 100644 index 000000000..8156a8d06 --- /dev/null +++ b/man/JointPCAIntegration.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/integration5.R +\name{JointPCAIntegration} +\alias{JointPCAIntegration} +\title{Seurat-Joint PCA Integration} +\usage{ +JointPCAIntegration( + object = NULL, + assay = NULL, + layers = NULL, + orig.reduction = "pca.rna", + new.reduction = "integrated.dr", + reference = NULL, + features = NULL, + normalization.method = NULL, + dims = 1:30, + npcs = 50, + k.anchor = 20, + scale.layer = "scale.data", + verbose = TRUE, + groups = NULL, + ... +) +} +\arguments{ +\item{assay}{A vector of assay names specifying which assay to use when +constructing anchors. If NULL, the current default assay for each object is +used.} + +\item{reference}{A vector specifying the object/s to be used as a reference +during integration. If NULL (default), all pairwise anchors are found (no +reference/s). If not NULL, the corresponding objects in \code{object.list} +will be used as references. When using a set of specified references, anchors +are first found between each query and each reference. The references are +then integrated through pairwise integration. Each query is then mapped to +the integrated reference.} + +\item{normalization.method}{Name of normalization method used: LogNormalize +or SCT} + +\item{dims}{Which dimensions to use from the CCA to specify the neighbor +search space} + +\item{k.anchor}{How many neighbors (k) to use when picking anchors} + +\item{verbose}{Print progress bars and output} +} +\description{ +Seurat-Joint PCA Integration +} diff --git a/man/RPCAIntegration.Rd b/man/RPCAIntegration.Rd new file mode 100644 index 000000000..cf18c2c35 --- /dev/null +++ b/man/RPCAIntegration.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/integration5.R +\name{RPCAIntegration} +\alias{RPCAIntegration} +\title{Seurat-RPCA Integration} +\usage{ +RPCAIntegration( + object = NULL, + assay = NULL, + layers = NULL, + orig.reduction = "pca.rna", + new.reduction = "integrated.dr", + reference = NULL, + features = NULL, + normalization.method = c("LogNormalize", "SCT"), + dims = 1:30, + npcs = 50, + k.filter = NA, + scale.layer = "scale.data", + groups = NULL, + verbose = TRUE, + ... +) +} +\arguments{ +\item{assay}{A vector of assay names specifying which assay to use when +constructing anchors. If NULL, the current default assay for each object is +used.} + +\item{reference}{A vector specifying the object/s to be used as a reference +during integration. If NULL (default), all pairwise anchors are found (no +reference/s). If not NULL, the corresponding objects in \code{object.list} +will be used as references. When using a set of specified references, anchors +are first found between each query and each reference. The references are +then integrated through pairwise integration. Each query is then mapped to +the integrated reference.} + +\item{normalization.method}{Name of normalization method used: LogNormalize +or SCT} + +\item{dims}{Which dimensions to use from the CCA to specify the neighbor +search space} + +\item{k.filter}{How many neighbors (k) to use when filtering anchors} + +\item{verbose}{Print progress bars and output} +} +\description{ +Seurat-RPCA Integration +} From 066cb60b8ec4ef0b4957866ca55c65dacaaf7036 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 27 Oct 2022 09:27:28 -0400 Subject: [PATCH 247/979] reformat inte methods --- R/integration5.R | 60 +++++++++++++----------------------------------- 1 file changed, 16 insertions(+), 44 deletions(-) diff --git a/R/integration5.R b/R/integration5.R index 9fd0739be..4a586efeb 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -121,13 +121,12 @@ CCAIntegration <- function( object = NULL, assay = NULL, layers = NULL, - orig.reduction = 'pca.rna', + orig = NULL, new.reduction = 'integrated.dr', reference = NULL, features = NULL, normalization.method = c("LogNormalize", "SCT"), dims = 1:30, - npcs = 50, groups = NULL, k.filter = NA, scale.layer = 'scale.data', @@ -136,15 +135,7 @@ CCAIntegration <- function( features <- features %||% SelectIntegrationFeatures5(object = object) assay <- assay %||% 'RNA' layers <- layers %||% Layers(object, search = 'data') - npcs <- max(npcs, dims) - pca <- RunPCA( - object = object, - assay = assay, - features = features, - layer = scale.layer, - npcs = npcs, - verbose = verbose - ) + object.list <- list() for (i in seq_along(along.with = layers)) { object.list[[i]] <- CreateSeuratObject(counts = object[[layers[i]]][features,] ) @@ -164,12 +155,12 @@ CCAIntegration <- function( ... ) object_merged <- IntegrateEmbeddings(anchorset = anchor, - reductions = pca, + reductions = orig, new.reduction.name = new.reduction, verbose = verbose ) - output.list <- list(pca, object_merged[[new.reduction]]) - names(output.list) <- c(orig.reduction, new.reduction) + output.list <- list(object_merged[[new.reduction]]) + names(output.list) <- c(new.reduction) return(output.list) } @@ -182,13 +173,12 @@ RPCAIntegration <- function( object = NULL, assay = NULL, layers = NULL, - orig.reduction = 'pca.rna', + orig = NULL, new.reduction = 'integrated.dr', reference = NULL, features = NULL, normalization.method = c("LogNormalize", "SCT"), dims = 1:30, - npcs = 50, k.filter = NA, scale.layer = 'scale.data', groups = NULL, @@ -197,15 +187,6 @@ RPCAIntegration <- function( features <- features %||% SelectIntegrationFeatures5(object = object) assay <- assay %||% 'RNA' layers <- layers %||% Layers(object, search = 'data') - npcs <- max(npcs, dims) - pca <- RunPCA( - object = object, - assay = assay, - features = features, - layer = scale.layer, - npcs = npcs, - verbose = verbose - ) object.list <- list() for (i in seq_along(along.with = layers)) { @@ -227,13 +208,13 @@ RPCAIntegration <- function( ... ) object_merged <- IntegrateEmbeddings(anchorset = anchor, - reductions = pca, + reductions = orig, new.reduction.name = new.reduction, verbose = verbose ) - output.list <- list(pca, object_merged[[new.reduction]]) - names(output.list) <- c(orig.reduction, new.reduction) + output.list <- list(object_merged[[new.reduction]]) + names(output.list) <- c(new.reduction) return(output.list) } @@ -246,13 +227,12 @@ JointPCAIntegration <- function( object = NULL, assay = NULL, layers = NULL, - orig.reduction = 'pca.rna', + orig = NULL, new.reduction = 'integrated.dr', reference = NULL, features = NULL, normalization.method = NULL, dims = 1:30, - npcs = 50, k.anchor = 20, scale.layer = 'scale.data', verbose = TRUE, @@ -262,23 +242,15 @@ JointPCAIntegration <- function( features <- features %||% SelectIntegrationFeatures5(object = object) assay <- assay %||% 'RNA' layers <- layers %||% Layers(object, search = 'data') - npcs <- max(npcs, dims) - pca <- RunPCA( - object = object, - assay = assay, - features = features, - layer = scale.layer, - npcs = npcs, - verbose = verbose - ) + object.list <- list() for (i in seq_along(along.with = layers)) { object.list[[i]] <- CreateSeuratObject(counts = object[[layers[i]]][features[1:2], ] ) object.list[[i]][['RNA']]$counts <- NULL object.list[[i]][['joint.pca']] <- CreateDimReducObject( - embeddings = Embeddings(object = pca)[Cells(object.list[[i]]),], + embeddings = Embeddings(object = orig)[Cells(object.list[[i]]),], assay = 'RNA', - loadings = Loadings(pca), + loadings = Loadings(orig), key = 'J_' ) } @@ -295,11 +267,11 @@ JointPCAIntegration <- function( ... ) object_merged <- IntegrateEmbeddings(anchorset = anchor, - reductions = pca, + reductions = orig, new.reduction.name = new.reduction, verbose = verbose) - output.list <- list(pca, object_merged[[new.reduction]]) - names(output.list) <- c(orig.reduction, new.reduction) + output.list <- list(object_merged[[new.reduction]]) + names(output.list) <- c(new.reduction) return(output.list) } From 41ec628cfda43a2967b64f2bea76e0e01a22ff85 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Fri, 28 Oct 2022 12:58:01 -0400 Subject: [PATCH 248/979] New SCTAssay interaction methods --- DESCRIPTION | 1 + NAMESPACE | 6 ++++++ R/objects.R | 44 ++++++++++++++++++++++++++++++++++++++++++-- R/reexports.R | 6 ++++++ 4 files changed, 55 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b881768bc..a518b2a76 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -34,6 +34,7 @@ Imports: fitdistrplus, future, future.apply, + generics (>= 0.1.3), ggplot2 (>= 3.3.0), ggrepel, ggridges, diff --git a/NAMESPACE b/NAMESPACE index f1f96acad..4b6e5a1eb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,10 +8,13 @@ S3method("levels<-",SCTAssay) S3method(AnnotateAnchors,IntegrationAnchorSet) S3method(AnnotateAnchors,TransferAnchorSet) S3method(AnnotateAnchors,default) +S3method(Cells,SCTAssay) S3method(Cells,SCTModel) S3method(Cells,STARmap) S3method(Cells,SlideSeq) S3method(Cells,VisiumV1) +S3method(Features,SCTAssay) +S3method(Features,SCTModel) S3method(FindClusters,Seurat) S3method(FindClusters,default) S3method(FindMarkers,Assay) @@ -131,6 +134,7 @@ S3method(as.Seurat,SingleCellExperiment) S3method(as.SingleCellExperiment,Seurat) S3method(as.data.frame,Matrix) S3method(as.sparse,H5Group) +S3method(components,SCTAssay) S3method(dim,STARmap) S3method(dim,SlideSeq) S3method(dim,VisiumV1) @@ -395,6 +399,7 @@ export(as.Neighbor) export(as.Seurat) export(as.SingleCellExperiment) export(as.sparse) +export(components) export(scalefactors) exportClasses(AnchorSet) exportClasses(Assay) @@ -531,6 +536,7 @@ importFrom(future,nbrOfWorkers) importFrom(future,plan) importFrom(future.apply,future_lapply) importFrom(future.apply,future_sapply) +importFrom(generics,components) importFrom(ggplot2,Geom) importFrom(ggplot2,GeomPolygon) importFrom(ggplot2,GeomViolin) diff --git a/R/objects.R b/R/objects.R index c34074c98..c0e429805 100644 --- a/R/objects.R +++ b/R/objects.R @@ -1338,6 +1338,17 @@ Cells.SCTModel <- function(x, ...) { return(rownames(x = slot(object = x, name = "cell.attributes"))) } +#' @method Cells SCTAssay +#' @export +#' +Cells.SCTAssay <- function(x, layer = NA) { + layer <- layer %||% levels(x = x)[1L] + if (rlang::is_na(x = layer)) { + return(colnames(x = x)) + } + return(Cells(x = components(object = x, model = layer))) +} + #' @rdname Cells #' @concept objects #' @concept spatial @@ -1369,6 +1380,29 @@ Cells.VisiumV1 <- function(x, ...) { return(rownames(x = GetTissueCoordinates(object = x, scale = NULL))) } +#' @importFrom SeuratObject DefaultLayer Layers +#' +#' @method Features SCTAssay +#' @export +#' +Features.SCTAssay <- function(x, layer = NA) { + layer <- layer %||% DefaultLayer(object = x) + if (rlang::is_na(x = layer)) { + return(rownames(x = x)) + } + layer <- rlang::arg_match(arg = layer, values = c(Layers(object = x), levels(x = x))) + if (layer %in% levels(x = x)) { + return(Features(x = components(object = x, model = layer))) + } + return(NextMethod()) +} + +#' @method Features SCTModel +#' @export +#' +Features.SCTModel <- function(x, ...) { + return(rownames(x = SCTResults(object = x, slot = 'feature.attributes'))) +} #' @param assay Assay to get #' @@ -1395,7 +1429,6 @@ GetAssay.Seurat <- function(object, assay = NULL, ...) { return(slot(object = object, name = 'assays')[[assay]]) } - #' Get Image Data #' #' @inheritParams SeuratObject::GetImage @@ -1838,6 +1871,14 @@ ScaleFactors.VisiumV1 <- function(object, ...) { return(subset(x = x, cells = i)) } +#' @method components SCTAssay +#' @export +#' +components.SCTAssay <- function(object, model) { + model <- rlang::arg_match(arg = model, values = levels(x = object)) + return(slot(object = object, name = 'SCTModel.list')[[model]]) +} + #' @method dim SlideSeq #' @concept objects #' @export @@ -1867,7 +1908,6 @@ dim.VisiumV1 <- function(x) { return(dim(x = GetImage(object = x)$raster)) } - #' @rdname SCTAssay-class #' @name SCTAssay-class #' diff --git a/R/reexports.R b/R/reexports.R index 1f3a6b8ed..9401c7ec0 100644 --- a/R/reexports.R +++ b/R/reexports.R @@ -145,6 +145,12 @@ NULL # Functions and Generics #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#' @importFrom generics components +#' @rdname reexports +#' @export +#' +generics::components + #' @importFrom SeuratObject %||% #' @rdname reexports #' @export From 4f7cc70d4bb9ec45a48d6e3807c54d0c6b0d9319 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Fri, 28 Oct 2022 12:58:46 -0400 Subject: [PATCH 249/979] Update docs --- man/reexports.Rd | 3 +++ 1 file changed, 3 insertions(+) diff --git a/man/reexports.Rd b/man/reexports.Rd index e94d58954..aa8615d0c 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -3,6 +3,7 @@ \docType{import} \name{reexports} \alias{reexports} +\alias{components} \alias{\%||\%} \alias{\%iff\%} \alias{AddMetaData} @@ -70,6 +71,8 @@ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ + \item{generics}{\code{\link[generics]{components}}} + \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject]{Index}}, \code{\link[SeuratObject:Index]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}} }} From 707b6a3fdc1ae8b41fbc7b1d0b7e9f0b83aad8eb Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 28 Oct 2022 16:44:24 -0400 Subject: [PATCH 250/979] fix inte sketch embedding --- R/integration.R | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/R/integration.R b/R/integration.R index a21c8cc37..960e98175 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1906,6 +1906,7 @@ IntegrateSketchEmbeddings <- function( nrow = ncol(x = object[[orig]]), ncol = length(x = object[[reduction]]) ) + rownames(emb.all) <- colnames(object[[orig]]) ncells <- c( 0, sapply( @@ -1915,12 +1916,6 @@ IntegrateSketchEmbeddings <- function( } ) ) - blocks <- lapply( - X = seq_along(along.with = layers), - FUN = function(x) { - return((sum(ncells[1:x]) + 1):sum(ncells[1:(x + 1)])) - } - ) if (length(atoms.layers) == 1) { atoms.layers <- rep(atoms.layers, length(layers)) } @@ -1988,15 +1983,17 @@ IntegrateSketchEmbeddings <- function( emb } ) - emb.all[ blocks[[i]],] <- as.matrix(x = emb) + + emb.all[rownames(emb), ] <- as.matrix(x = emb) + } - rownames(x = emb.all) <- colnames(x = object[[orig]]) - object[[reduction.name]] <- suppressWarnings(expr = CreateDimReducObject( + + object[[reduction.name]] <- CreateDimReducObject( embeddings = emb.all, loadings = Loadings(object = object[[reduction]]), key = reduction.key, assay = orig - )) + ) CheckGC() return(object) } From 5fa3ba9dfdce946b4f5866def1feef9fb7159615 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Fri, 28 Oct 2022 17:25:15 -0400 Subject: [PATCH 251/979] Add VariableFeatures.SCTAssay --- R/objects.R | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/R/objects.R b/R/objects.R index c0e429805..c48fe1927 100644 --- a/R/objects.R +++ b/R/objects.R @@ -1834,6 +1834,37 @@ SCTResults.Seurat <- function(object, assay = "SCT", slot, model = NULL, ...) { return(SCTResults(object = object[[assay]], slot = slot, model = model, ...)) } +#' @method VariableFeatures SCTAssay +#' @export +#' +VariableFeatures.SCTAssay <- function(object, layer = NULL, n = 2000, simplify = TRUE, ...) { + layer <- layer %||% levels(object)[1L] + layer <- match.arg(arg = layer, choices = levels(x = object), several.ok = TRUE) + # fetch vf from every model) + model.list <- slot(object = object, name = "SCTModel.list") + variable.features <- list() + for (i in seq_along(layer)){ + model <- model.list[[layer[[i]]]] + feature.attr <- SCTResults(object = model, slot = "feature.attributes") + feature.variance <- feature.attr[,"residual_variance"] + names(x = feature.variance) <- rownames(x = feature.attr) + feature.variance <- sort(x = feature.variance, decreasing = TRUE) + if (!is.null(x = n)) { + top.features <- names(x = feature.variance)[1:min(n, length(x = feature.variance))] + } else { + top.features <- names(x = feature.variance) + } + variable.features[[i]] <- top.features + } + names(variable.features) <- names(layer) + + if (isTRUE(x = simplify)) { + variable.features <- Reduce(f = union, x = variable.features) + } + return(variable.features) +} + + #' @rdname ScaleFactors #' @method ScaleFactors VisiumV1 #' @export From c3f0b615a3097f97ebb7260af3a4ab9202faa03f Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Fri, 28 Oct 2022 18:10:41 -0400 Subject: [PATCH 252/979] Improvements to VF.SCT --- NAMESPACE | 4 ++++ R/objects.R | 65 ++++++++++++++++++++++++++++++++++++----------------- R/zzz.R | 2 ++ 3 files changed, 51 insertions(+), 20 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 4b6e5a1eb..24c41a969 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -128,6 +128,8 @@ S3method(VST,DelayedMatrix) S3method(VST,default) S3method(VST,dgCMatrix) S3method(VST,matrix) +S3method(VariableFeatures,SCTAssay) +S3method(VariableFeatures,SCTModel) S3method(as.CellDataSet,Seurat) S3method(as.Seurat,CellDataSet) S3method(as.Seurat,SingleCellExperiment) @@ -716,8 +718,10 @@ importFrom(rlang,check_installed) importFrom(rlang,enquo) importFrom(rlang,inform) importFrom(rlang,invoke) +importFrom(rlang,is_na) importFrom(rlang,is_quosure) importFrom(rlang,is_scalar_character) +importFrom(rlang,is_scalar_integerish) importFrom(rlang,quo_get_env) importFrom(rlang,quo_get_expr) importFrom(rlang,warn) diff --git a/R/objects.R b/R/objects.R index c48fe1927..9f8e2e8cf 100644 --- a/R/objects.R +++ b/R/objects.R @@ -1834,37 +1834,62 @@ SCTResults.Seurat <- function(object, assay = "SCT", slot, model = NULL, ...) { return(SCTResults(object = object[[assay]], slot = slot, model = model, ...)) } +#' @method VariableFeatures SCTModel +#' @export +#' +VariableFeatures.SCTModel <- function(object, n = 2000, ...) { + if (!is_scalar_integerish(x = n) || (!is_na(x = n < 1L) && n < 1L)) { + abort(message = "'n' must be a single positive integer") + } + feature.attr <- SCTResults(object = object, slot = 'feature.attributes') + feature.variance <- feature.attr[, 'residual_variance'] + names(x = feature.variance) <- row.names(x = feature.attr) + feature.variance <- sort(x = feature.variance, decreasing = TRUE) + if (is_na(x = n)) { + return(names(x = feature.variance)) + } + return(head(x = names(x = feature.variance), n = n)) +} + #' @method VariableFeatures SCTAssay #' @export #' -VariableFeatures.SCTAssay <- function(object, layer = NULL, n = 2000, simplify = TRUE, ...) { - layer <- layer %||% levels(object)[1L] +VariableFeatures.SCTAssay <- function( + object, + layer = NULL, + n = 2000, + simplify = TRUE, + ... +) { + layer <- layer %||% levels(x = object)[1L] + if (is_na(x = layer)) { + layer <- levels(x = object) + } layer <- match.arg(arg = layer, choices = levels(x = object), several.ok = TRUE) - # fetch vf from every model) - model.list <- slot(object = object, name = "SCTModel.list") - variable.features <- list() - for (i in seq_along(layer)){ - model <- model.list[[layer[[i]]]] - feature.attr <- SCTResults(object = model, slot = "feature.attributes") - feature.variance <- feature.attr[,"residual_variance"] - names(x = feature.variance) <- rownames(x = feature.attr) - feature.variance <- sort(x = feature.variance, decreasing = TRUE) - if (!is.null(x = n)) { - top.features <- names(x = feature.variance)[1:min(n, length(x = feature.variance))] - } else { - top.features <- names(x = feature.variance) + variable.features <- sapply( + X = layer, + FUN = function(lyr) { + return(VariableFeatures( + object = components(object = object, model = lyr), + n = n, + ... + )) + }, + simplify = FALSE, + USE.NAMES = TRUE + ) + if (length(x = variable.features) == 1L) { + if (isFALSE(x = simplify)) { + return(variable.features) } - variable.features[[i]] <- top.features + return(variable.features[[1L]]) } - names(variable.features) <- names(layer) - if (isTRUE(x = simplify)) { - variable.features <- Reduce(f = union, x = variable.features) + return(Reduce(f = union, x = variable.features)) } return(variable.features) } - #' @rdname ScaleFactors #' @method ScaleFactors VisiumV1 #' @export diff --git a/R/zzz.R b/R/zzz.R index fa55ab123..a1e0702ce 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -6,7 +6,9 @@ #' check_installed #' enquo #' inform +#' is_na #' is_quosure +#' is_scalar_integerish #' quo_get_env #' quo_get_expr #' warn From 101f83fa51f8877038587aee1792841baf493eb9 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sun, 30 Oct 2022 09:11:33 -0400 Subject: [PATCH 253/979] optimize integrateSketch --- R/integration.R | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/R/integration.R b/R/integration.R index 960e98175..02c3f16a3 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1901,12 +1901,7 @@ IntegrateSketchEmbeddings <- function( ) ) features <- intersect(x = features, y = features.atom) - emb.all <- matrix( - data = NA_real_, - nrow = ncol(x = object[[orig]]), - ncol = length(x = object[[reduction]]) - ) - rownames(emb.all) <- colnames(object[[orig]]) + ncells <- c( 0, sapply( @@ -1919,6 +1914,8 @@ IntegrateSketchEmbeddings <- function( if (length(atoms.layers) == 1) { atoms.layers <- rep(atoms.layers, length(layers)) } + emb.list <- list() + for (i in seq_along(along.with = layers)) { if (length(unique(atoms.layers)) == length(layers)) { cells.sketch <- Cells(x = object[[atoms]], layer = atoms.layers[i]) @@ -1973,20 +1970,23 @@ IntegrateSketchEmbeddings <- function( )[,cells.sketch]) %*% R) sketch.transform <- ginv(X = exp.mat) %*% Embeddings(object = object[[reduction]])[cells.sketch ,] - emb <- matrix.prod.function(x = R %*% sketch.transform, + emb <- matrix.prod.function(x = R %*% sketch.transform, y = LayerData( object = object[[orig]], layer = layers[i], features = features )) - emb <- t(emb) emb } ) - - emb.all[rownames(emb), ] <- as.matrix(x = emb) - - } + emb.list[[i]] <- as.matrix(x = emb) + } + emb.all <- t(matrix(data = unlist(emb.list), + nrow = ncol(x = object[[orig]]), + ncol = length(x = object[[reduction]]) + )) + rownames(x = emb.all) <- colnames(object[[orig]]) + emb.all <- emb.all[colnames(object[[orig]]), ] object[[reduction.name]] <- CreateDimReducObject( embeddings = emb.all, @@ -5918,7 +5918,7 @@ FastRPCAIntegration <- function( return(object_merged) } -crossprod_DelayedAssay <- function(x, y, block.size = 1e9) { +crossprod_DelayedAssay <- function(x, y, block.size = 1e8) { # perform t(x) %*% y in blocks for y if (!inherits(x = y, 'DelayedMatrix')) { stop('y should a DelayedMatrix') From fa9849632478f7452313b069b9c8d3c09d1d3cf2 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sun, 30 Oct 2022 09:41:57 -0400 Subject: [PATCH 254/979] delayed array operation --- R/utilities.R | 128 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) diff --git a/R/utilities.R b/R/utilities.R index 3a4835404..259717035 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -2435,3 +2435,131 @@ ToNumeric <- function(x){ } return(x) } + + + +# cross product from delayed array +# +crossprod_DelayedAssay <- function(x, y, block.size = 1e8) { + # perform t(x) %*% y in blocks for y + if (!inherits(x = y, 'DelayedMatrix')) { + stop('y should a DelayedMatrix') + } + if (nrow(x) != nrow(y)) { + stop('row of x and y should be the same') + } + sparse <- DelayedArray::is_sparse(x = y) + suppressMessages(setAutoBlockSize(size = block.size)) + cells.grid <- DelayedArray::colAutoGrid(x = y) + product.list <- list() + for (i in seq_len(length.out = length(x = cells.grid))) { + vp <- cells.grid[[i]] + block <- DelayedArray::read_block(x = y, viewport = vp, as.sparse = sparse) + if (sparse) { + block <- as(object = block, Class = 'dgCMatrix') + } else { + block <- as(object = block, Class = 'Matrix') + } + product.list[[i]] <- as.matrix(t(x) %*% block) + } + product.mat <- matrix(data = unlist(product.list), nrow = ncol(x) , ncol = ncol(y)) + colnames(product.mat) <- colnames(y) + rownames(product.mat) <- rownames(x) + return(product.mat) +} + +# cross product row norm from delayed array +# +crossprodNorm_DelayedAssay <- function(x, y, block.size = 1e8) { + # perform t(x) %*% y in blocks for y + if (!inherits(x = y, 'DelayedMatrix')) { + stop('y should a DelayedMatrix') + } + if (nrow(x) != nrow(y)) { + stop('row of x and y should be the same') + } + sparse <- DelayedArray::is_sparse(x = y) + suppressMessages(setAutoBlockSize(size = block.size)) + cells.grid <- DelayedArray::colAutoGrid(x = y) + norm.list <- list() + for (i in seq_len(length.out = length(x = cells.grid))) { + vp <- cells.grid[[i]] + block <- DelayedArray::read_block(x = y, viewport = vp, as.sparse = sparse) + if (sparse) { + block <- as(object = block, Class = 'dgCMatrix') + } else { + block <- as(object = block, Class = 'Matrix') + } + norm.list[[i]] <- colSums(x = as.matrix(t(x) %*% block) ^ 2) + } + norm.vector <- unlist(norm.list) + return(norm.vector) + +} + +# row mean from delayed array +# +RowMeanDelayedAssay <- function(x, block.size = 1e8) { + if (!inherits(x = x, 'DelayedMatrix')) { + stop('input x should a DelayedMatrix') + } + sparse <- DelayedArray::is_sparse(x = x) + if (sparse ) { + row.sum.function <- RowSumSparse + } else { + row.sum.function <- rowSums2 + } + suppressMessages(setAutoBlockSize(size = block.size)) + cells.grid <- DelayedArray::colAutoGrid(x = x) + sum.list <- list() + for (i in seq_len(length.out = length(x = cells.grid))) { + vp <- cells.grid[[i]] + block <- DelayedArray::read_block(x = x, viewport = vp, as.sparse = sparse) + if (sparse) { + block <- as(object = block, Class = 'dgCMatrix') + } else { + block <- as(object = block, Class = 'Matrix') + } + sum.list[[i]] <- row.sum.function(mat = block) + } + mean.mat <- Reduce('+', sum.list) + mean.mat <- mean.mat/ncol(x) + return(mean.mat) +} + +# row variance from delayed array +# +RowVarDelayedAssay <- function(x, block.size = 1e8) { + if (!inherits(x = x, 'DelayedMatrix')) { + stop('input x should a DelayedMatrix') + } + sparse <- DelayedArray::is_sparse(x = x) + if (sparse ) { + row.sum.function <- RowSumSparse + } else { + row.sum.function <- rowSums2 + } + + suppressMessages(setAutoBlockSize(size = block.size)) + cells.grid <- DelayedArray::colAutoGrid(x = x) + sum2.list <- list() + sum.list <- list() + + for (i in seq_len(length.out = length(x = cells.grid))) { + vp <- cells.grid[[i]] + block <- DelayedArray::read_block(x = x, viewport = vp, as.sparse = sparse) + if (sparse) { + block <- as(object = block, Class = 'dgCMatrix') + } else { + block <- as(object = block, Class = 'Matrix') + } + sum2.list[[i]] <- row.sum.function(mat = block**2) + sum.list[[i]] <- row.sum.function(mat = block) + } + sum.mat <- Reduce('+', sum.list) + sum2.mat <- Reduce('+', sum2.list) + var.mat <- sum2.mat/ncol(x) - (sum.mat/ncol(x))**2 + var.mat <- var.mat * ncol(counts) / (ncol(counts) - 1) + return(var.mat) +} + From f69fdb8aa910898982fe393f9e484cb7a2d787a5 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sun, 30 Oct 2022 09:42:25 -0400 Subject: [PATCH 255/979] move function --- R/integration.R | 54 ------------------------------------------------- 1 file changed, 54 deletions(-) diff --git a/R/integration.R b/R/integration.R index 02c3f16a3..9902918e4 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5918,58 +5918,4 @@ FastRPCAIntegration <- function( return(object_merged) } -crossprod_DelayedAssay <- function(x, y, block.size = 1e8) { - # perform t(x) %*% y in blocks for y - if (!inherits(x = y, 'DelayedMatrix')) { - stop('y should a DelayedMatrix') - } - if (nrow(x) != nrow(y)) { - stop('row of x and y should be the same') - } - sparse <- DelayedArray::is_sparse(x = y) - suppressMessages(setAutoBlockSize(size = block.size)) - cells.grid <- DelayedArray::colAutoGrid(x = y) - product.list <- list() - for (i in seq_len(length.out = length(x = cells.grid))) { - vp <- cells.grid[[i]] - block <- DelayedArray::read_block(x = y, viewport = vp, as.sparse = sparse) - if (sparse) { - block <- as(object = block, Class = 'dgCMatrix') - } else { - block <- as(object = block, Class = 'Matrix') - } - product.list[[i]] <- as.matrix(t(x) %*% block) - } - product.mat <- matrix(data = unlist(product.list), nrow = ncol(x) , ncol = ncol(y)) - colnames(product.mat) <- colnames(y) - rownames(product.mat) <- rownames(x) - return(product.mat) -} - -crossprodNorm_DelayedAssay <- function(x, y, block.size = 1e9) { - # perform t(x) %*% y in blocks for y - if (!inherits(x = y, 'DelayedMatrix')) { - stop('y should a DelayedMatrix') - } - if (nrow(x) != nrow(y)) { - stop('row of x and y should be the same') - } - sparse <- DelayedArray::is_sparse(x = y) - suppressMessages(setAutoBlockSize(size = block.size)) - cells.grid <- DelayedArray::colAutoGrid(x = y) - norm.list <- list() - for (i in seq_len(length.out = length(x = cells.grid))) { - vp <- cells.grid[[i]] - block <- DelayedArray::read_block(x = y, viewport = vp, as.sparse = sparse) - if (sparse) { - block <- as(object = block, Class = 'dgCMatrix') - } else { - block <- as(object = block, Class = 'Matrix') - } - norm.list[[i]] <- colSums(x = as.matrix(t(x) %*% block) ^ 2) - } - norm.vector <- unlist(norm.list) - return(norm.vector) - -} From b76c1e038d654cc738118e60eab96c48464e79a0 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sun, 30 Oct 2022 10:08:42 -0400 Subject: [PATCH 256/979] add sparse sweep --- R/utilities.R | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/R/utilities.R b/R/utilities.R index 259717035..aed82e709 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -2563,3 +2563,27 @@ RowVarDelayedAssay <- function(x, block.size = 1e8) { return(var.mat) } + + +# sparse version of sweep +SweepSparse <- function( + x, + MARGIN, + STATS, + FUN = "/" +) { + if (!inherits(x = x, what = 'dgCMatrix')) { + stop('input should be dgCMatrix. eg: x <- as(x, "CsparseMatrix")') + } + fun <- match.fun(FUN) + if (MARGIN == 1) { + idx <- x@i + 1 + x@x <- fun(x@x, STATS[idx]) + } else if (MARGIN == 2) { + x <- as(x, "RsparseMatrix") + idx <- x@j + 1 + x@x <- fun(x@x, STATS[idx]) + x <- as(x, "CsparseMatrix") + } + return(x) +} From 604e569f3ea84e573616c218b7e222ffd4751a64 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sun, 30 Oct 2022 14:51:25 -0400 Subject: [PATCH 257/979] fix sketch inte emb --- R/integration.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/R/integration.R b/R/integration.R index 02c3f16a3..a56d2331a 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1982,10 +1982,9 @@ IntegrateSketchEmbeddings <- function( emb.list[[i]] <- as.matrix(x = emb) } emb.all <- t(matrix(data = unlist(emb.list), - nrow = ncol(x = object[[orig]]), - ncol = length(x = object[[reduction]]) + nrow = length(x = object[[reduction]]), + ncol = ncol(x = object[[orig]]) )) - rownames(x = emb.all) <- colnames(object[[orig]]) emb.all <- emb.all[colnames(object[[orig]]), ] object[[reduction.name]] <- CreateDimReducObject( From 46d669d5d32dedf8d2aab0395876e1c55ad1714f Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sun, 30 Oct 2022 17:07:22 -0400 Subject: [PATCH 258/979] vst for delayarray --- R/preprocessing5.R | 170 +++++++++++---------------------------------- R/utilities.R | 3 + 2 files changed, 45 insertions(+), 128 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index fb2738ef3..d07c8a648 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -794,11 +794,12 @@ VST.default <- function( #' VST.DelayedMatrix <- function( data, - margin = 1L, + margin = 2L, nselect = 2000L, span = 0.3, clip = NULL, verbose = TRUE, + block.size = 1e8, ... ) { check_installed( @@ -808,148 +809,61 @@ VST.DelayedMatrix <- function( if (!margin %in% c(1L, 2L)) { abort(message = "'margin' must be 1 or 2") } + nfeatures <- dim(x = data)[-margin] + ncells <- dim(x = data)[margin] + hvf.info <- SeuratObject::EmptyDF(n = nfeatures) + hvf.info$mean <- RowMeanDelayedAssay(x = data, block.size = block.size) + # Calculate feature variance + hvf.info$variance <- RowVarDelayedAssay(x = data, block.size = block.size) + hvf.info$variance.expected <- 0L + not.const <- hvf.info$variance > 0 + fit <- loess( + formula = log10(x = variance) ~ log10(x = mean), + data = hvf.info[not.const, , drop = TRUE], + span = span + ) + hvf.info$variance.expected[not.const] <- 10 ^ fit$fitted + + suppressMessages(setAutoBlockSize(size = block.size)) grid <- if (margin == 1L) { DelayedArray::rowAutoGrid(x = data) } else { DelayedArray::colAutoGrid(x = data) } - nfeatures <- dim(x = data)[margin] - ncells <- dim(x = data)[-margin] - # hvf.info <- SeuratObject::EmptyDF(n = nfeatures) - hvf.info <- vector(mode = 'list', length = length(x = grid)) sparse <- DelayedArray::is_sparse(x = data) - # Calculate feature means - # if (isTRUE(x = verbose)) { - # inform(message = "Calculating feature means") - # } - # hvf.info$mean <- if (margin == 1L) { - # DelayedArray::rowMeans(x = data) - # } else { - # DelayedArray::colMeans(x = data) - # } - # Calculate variance - # hvf.info$variance <- NA_real_ - if (isTRUE(x = verbose)) { - # inform(message = "Calculating feature variances") - inform(message = "Identifying variable features") - pb <- txtProgressBar(style = 3L, file = stderr()) + if (sparse) { + sweep.func <- SweepSparse + rowsum.func <- RowSumSparse + } else { + sweep.func <- sweep + rowsum.func <- rowSums2 } + var_stand.list <- list() for (i in seq_len(length.out = length(x = grid))) { vp <- grid[[i]] - idx <- seq.int( - from = IRanges::start(x = slot(object = vp, name = 'ranges')[margin]), - to = IRanges::end(x = slot(object = vp, name = 'ranges')[margin]) - ) - x <- DelayedArray::read_block(x = data, viewport = vp, as.sparse = sparse) - if (isTRUE(x = sparse)) { - x <- as(object = x, Class = "CsparseMatrix") - } - hvf.info[[i]] <- VST( - data = x, - margin = margin, - nselect = floor(x = nselect / length(x = grid)), - span = span, - clip = clip, - verbose = FALSE, - ... - ) - # if (margin == 2L) { - # x <- t(x = x) - # } - # mu <- hvf.info$mean[idx] - # hvf.info$variance[idx] <- rowSums(x = ((x - mu) ^ 2) / (ncells - 1L)) - # # hvf.info$variance[idx] <- vapply( - # # X = seq_along(along.with = mu), - # # FUN = function(j) { - # # y <- if (margin == 1L) { - # # x[j, ] - # # } else { - # # x[, j] - # # } - # # y <- y - mu[j] - # # return(sum(y ^ 2) / (ncells - 1L)) - # # }, - # # FUN.VALUE = numeric(length = 1L) - # # ) - # if (isTRUE(x = verbose)) { - # setTxtProgressBar(pb = pb, value = i / length(x = grid)) - # } - # } - # if (isTRUE(x = verbose)) { - # close(con = pb) - # } - # hvf.info$variance.expected <- 0 - # not.const <- hvf.info$variance > 0 - # fit <- loess( - # formula = log10(x = variance) ~ log10(x = mean), - # data = hvf.info[not.const, , drop = FALSE], - # span = span - # ) - # hvf.info$variance.expected[not.const] <- 10 ^ fit$fitted - # # Calculate standardized variance - # hvf.info$variance.standardized <- NA_real_ - # if (isTRUE(x = verbose)) { - # inform( - # message = "Calculating feature variances of standardized and clipped values" - # ) - # pb <- txtProgressBar(style = 3L, file = stderr()) - # } - # clip <- clip %||% sqrt(x = ncells) - # for (i in seq_len(length.out = length(x = grid))) { - # vp <- grid[[i]] - # idx <- seq.int( - # from = IRanges::start(x = slot(object = vp, name = 'ranges')[margin]), - # to = IRanges::end(x = slot(object = vp, name = 'ranges')[margin]) - # ) - # x <- DelayedArray::read_block(x = data, viewport = vp, as.sparse = sparse) - # if (isTRUE(x = sparse)) { - # x <- as(object = x, Class = "CsparseMatrix") - # } - # if (margin == 2L) { - # x <- t(x = x) - # } - # mu <- hvf.info$mean[idx] - # sd <- sqrt(x = hvf.info$variance.expected[idx]) - # hvf.info$variance.standardized[idx] <- 0 - # sdn <- which(x = sd != 0) - # hvf.info$variance.standardized[idx[sdn]] <- rowSums(x = (((x[sdn, ] - mu[sdn]) / sd[sdn]) ^ 2) / (ncells - 1L)) - # # hvf.info$variance.standardized[idx] <- vapply( - # # X = seq_along(along.with = mu), - # # FUN = function(j) { - # # if (sd[j] == 0) { - # # return(0) - # # } - # # y <- if (margin == 1L) { - # # x[j, ] - # # } else { - # # x[, j] - # # } - # # y <- y - mu[j] - # # y <- y / sd[j] - # # y[y > clip] <- clip - # # return(sum(y ^ 2) / (ncells - 1L)) - # # }, - # # FUN.VALUE = numeric(length = 1L) - # # ) - if (isTRUE(x = verbose)) { - setTxtProgressBar(pb = pb, value = i / length(x = grid)) - } - } - if (isTRUE(x = verbose)) { - close(con = pb) - } - hvf.info <- do.call(what = 'rbind', args = hvf.info) - # Set variable status + block <- DelayedArray::read_block(x = data, viewport = vp, as.sparse = sparse) + block <- as(object = block, Class = 'dgCMatrix') + block.stat <- SparseRowVarStd(mat = block, + mu = hvf.info$mean, + sd = sqrt(hvf.info$variance.expected), + vmax = clip %||% sqrt(x = ncol(x = data)), + display_progress = FALSE) + + var_stand.list[[i]] <- block.stat * (ncol(block) - 1) + } + hvf.info$variance.standardized <- Reduce(f = '+', x = var_stand.list)/ + (ncol(data) - 1) + # Set variable features hvf.info$variable <- FALSE - hvf.info$rank <- NA_integer_ - vs <- hvf.info$variance.standardized - vs[vs == 0] <- NA + hvf.info$rank <- NA vf <- head( - x = order(vs, decreasing = TRUE), + x = order(hvf.info$variance.standardized, decreasing = TRUE), n = nselect ) hvf.info$variable[vf] <- TRUE hvf.info$rank[vf] <- seq_along(along.with = vf) + rownames(hvf.info) <- rownames(data) + return(hvf.info) } diff --git a/R/utilities.R b/R/utilities.R index aed82e709..ed240e5a9 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -2575,6 +2575,9 @@ SweepSparse <- function( if (!inherits(x = x, what = 'dgCMatrix')) { stop('input should be dgCMatrix. eg: x <- as(x, "CsparseMatrix")') } + if (dim(x = x)[MARGIN] != length(STATS)){ + warning("Length of STATS is not equal to dim(x)[MARGIN]") + } fun <- match.fun(FUN) if (MARGIN == 1) { idx <- x@i + 1 From 9526cc7a38dc28d68a6f17dd56c02dbf49dda9c2 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 31 Oct 2022 00:48:55 -0400 Subject: [PATCH 259/979] fix sketch emn --- R/integration.R | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/R/integration.R b/R/integration.R index e5496d507..bc761b180 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1839,6 +1839,7 @@ IntegrateSketchEmbeddings <- function( reduction.name = NULL, reduction.key = NULL, layers = NULL, + seed = 123, verbose = TRUE ) { # Check input and output dimensional reductions @@ -1901,7 +1902,6 @@ IntegrateSketchEmbeddings <- function( ) ) features <- intersect(x = features, y = features.atom) - ncells <- c( 0, sapply( @@ -1915,7 +1915,7 @@ IntegrateSketchEmbeddings <- function( atoms.layers <- rep(atoms.layers, length(layers)) } emb.list <- list() - + cells.list <- list() for (i in seq_along(along.with = layers)) { if (length(unique(atoms.layers)) == length(layers)) { cells.sketch <- Cells(x = object[[atoms]], layer = atoms.layers[i]) @@ -1961,7 +1961,8 @@ IntegrateSketchEmbeddings <- function( 'sketch' = { R <- t(x = CountSketch( nsketch = round(x = ratio * length(x = features)), - ncells = length(x = features) + ncells = length(x = features), + seed = seed )) exp.mat <- as.matrix(x = t(x = LayerData( object = object[[atoms]], @@ -1980,13 +1981,14 @@ IntegrateSketchEmbeddings <- function( } ) emb.list[[i]] <- as.matrix(x = emb) + cells.list[[i]] <- colnames(x = emb) } emb.all <- t(matrix(data = unlist(emb.list), nrow = length(x = object[[reduction]]), ncol = ncol(x = object[[orig]]) )) + rownames(emb.all) <- unlist(cells.list) emb.all <- emb.all[colnames(object[[orig]]), ] - object[[reduction.name]] <- CreateDimReducObject( embeddings = emb.all, loadings = Loadings(object = object[[reduction]]), From 6bd366c7026855752c9a1c25314569a21de00337 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Tue, 1 Nov 2022 08:46:44 -0400 Subject: [PATCH 260/979] Add support for SCTAssays in IntegrateLayers() --- NAMESPACE | 1 + R/integration.R | 60 +++++++++++++++++++++++++++++++++---------- R/integration5.R | 67 +++++++++++++++++++++++++++++++++++------------- 3 files changed, 96 insertions(+), 32 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 24c41a969..35b801e58 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -357,6 +357,7 @@ export(ScaleFactors) export(ScoreJackStraw) export(SelectIntegrationFeatures) export(SelectIntegrationFeatures5) +export(SelectSCTIntegrationFeatures) export(SetAssayData) export(SetIdent) export(SetIntegrationData) diff --git a/R/integration.R b/R/integration.R index 960e98175..cf73f8b38 100644 --- a/R/integration.R +++ b/R/integration.R @@ -413,7 +413,7 @@ FindIntegrationAnchors <- function( embeddings = rbind(Embeddings(object.1[['joint.pca']]), Embeddings(object.2[['joint.pca']])), loadings = Loadings(object.1[['joint.pca']]), - key = 'Joint_', + key = 'Joint_', assay = 'ToIntegrate') if (l2.norm) { object.pair <- L2Dim(object = object.pair, @@ -1621,7 +1621,7 @@ IntegrateEmbeddings.IntegrationAnchorSet <- function( slot(object = anchorset, name = "object.list") <- object.list new.reduction.name.safe <- gsub(pattern = "_", replacement = "", x = new.reduction.name) new.reduction.name.safe <- gsub(pattern = "[.]", replacement = "", x = new.reduction.name.safe) - + reference.integrated <- PairwiseIntegrateReference( anchorset = anchorset, new.assay.name = new.reduction.name.safe, @@ -1983,11 +1983,11 @@ IntegrateSketchEmbeddings <- function( emb } ) - + emb.all[rownames(emb), ] <- as.matrix(x = emb) } - + object[[reduction.name]] <- CreateDimReducObject( embeddings = emb.all, loadings = Loadings(object = object[[reduction]]), @@ -2986,6 +2986,38 @@ SelectIntegrationFeatures5 <- function( return(head(x = c(features, features.tie), n = nfeatures)) } +#' @export +#' +SelectSCTIntegrationFeatures <- function(object, nfeatures = 2000, assay = NULL, verbose = TRUE, ...) { + assay <- assay %||% DefaultAssay(object = object) + if (!inherits(x = object[[assay]], what = 'SCTAssay')) { + abort(message = "'assay' must be an SCTAssay") + } + models <- levels(x = object[[assay]]) + vf.list <- VariableFeatures( + object = object[[assay]], + layer = models, + n = nfeatures, + simplify = FALSE + ) + var.features <- sort( + x = table(unlist(x = vf.list, use.names = FALSE)), + decreasing = TRUE + ) + idx <- which(x = var.features == length(x = models)) + var.features <- var.features[idx] + tie.val <- var.features[min(nfeatures, length(x = var.features))] + features <- names(x = var.features[which(x = var.features > tie.val)]) + if (length(x = features)) { + features <- .FeatureRank(features = features, flist = vf.list) + } + features.tie <- .FeatureRank( + features = names(x = var.features[which(x = var.features == tie.val)]), + flist = vf.list + ) + return(head(x = c(features, features.tie), n = nfeatures)) +} + #' Transfer data #' #' Transfer categorical or continuous data across single-cell datasets. For @@ -5770,7 +5802,7 @@ ProjectCellEmbeddings_DelayedAssay <- function( feature.mean = NULL, feature.sd = NULL ) { - + dims <- dims %||% 1:ncol(reference[[reduction]]) assay <- assay %||% DefaultAssay(reference) features <- intersect(rownames(query.data), @@ -5778,15 +5810,15 @@ ProjectCellEmbeddings_DelayedAssay <- function( query.data <- query.data[features,] if (IsSCT(object[[assay]])) { # TODO: SCT reiduals projection - + } else { - feature.mean <- feature.mean[features] %||% + feature.mean <- feature.mean[features] %||% RowMeanSparse(mat = LayerData(object = reference[[assay]], layer = 'data')[features,]) - - feature.sd <- feature.sd[features] %||% - sqrt(RowVarSparse(mat = LayerData(object = reference[[assay]], layer = 'data')[features,])) + + feature.sd <- feature.sd[features] %||% + sqrt(RowVarSparse(mat = LayerData(object = reference[[assay]], layer = 'data')[features,])) feature.sd <- MinMax(feature.sd, max = max(feature.sd), min = 0.1) - + suppressMessages(setAutoBlockSize(size = block.size)) cells.grid <- DelayedArray::colAutoGrid(x = query.data) emb.list <- list() @@ -5798,7 +5830,7 @@ ProjectCellEmbeddings_DelayedAssay <- function( data.block <- apply(data.block, MARGIN = 2, function(x) { x <- (x - feature.mean)/feature.sd return(x) - }) + }) emb.block <- t(reference[[reduction]]@feature.loadings[features,dims]) %*% data.block emb.list[[i]] <- emb.block } @@ -5807,7 +5839,7 @@ ProjectCellEmbeddings_DelayedAssay <- function( rownames(emb.mat) <- colnames(query.data) colnames(emb.mat) <- colnames(reference[[reduction]]@cell.embeddings)[dims] } - + return(emb.mat) } @@ -5970,6 +6002,6 @@ crossprodNorm_DelayedAssay <- function(x, y, block.size = 1e9) { } norm.vector <- unlist(norm.list) return(norm.vector) - + } diff --git a/R/integration5.R b/R/integration5.R index 4a586efeb..c8664619c 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -16,11 +16,12 @@ NULL #' @inheritParams harmony::HarmonyMatrix #' @param object An \code{\link[SeuratObject]{Assay5}} object # @param assay Name of \code{object} in the containing \code{Seurat} object -#' @param orig \link[SeuratObject:DimReduc]{Dimensional reduction} to correct -#' @param groups A data frame ... -#' @param features ... -#' @param scale.layer ... -#' @param layers ... +#' @param orig A \link[SeuratObject:DimReduc]{dimensional reduction} to correct +#' @param groups A one-column data frame with grouping information; column +#' should be called \code{group} +#' @param features Ignored +#' @param scale.layer Ignored +#' @param layers Ignored #' @param ... Ignored #' #' @return ... @@ -64,8 +65,8 @@ HarmonyIntegration <- function( pkg = "harmony", reason = "for running integration with Harmony" ) - if (!inherits(x = object, what = 'StdAssay')) { - abort(message = "'object' must be a v5 assay object") + if (!inherits(x = object, what = c('StdAssay', 'SCTAssay'))) { + abort(message = "'object' must be a v5 or SCT assay") } else if (!inherits(x = orig, what = 'DimReduc')) { abort(message = "'orig' must be a dimensional reduction") } @@ -135,7 +136,7 @@ CCAIntegration <- function( features <- features %||% SelectIntegrationFeatures5(object = object) assay <- assay %||% 'RNA' layers <- layers %||% Layers(object, search = 'data') - + object.list <- list() for (i in seq_along(along.with = layers)) { object.list[[i]] <- CreateSeuratObject(counts = object[[layers[i]]][features,] ) @@ -164,6 +165,8 @@ CCAIntegration <- function( return(output.list) } +attr(x = CCAIntegration, which = 'Seurat.method') <- 'integration' + #' Seurat-RPCA Integration #' #' @inheritParams FindIntegrationAnchors @@ -218,6 +221,8 @@ RPCAIntegration <- function( return(output.list) } +attr(x = RPCAIntegration, which = 'Seurat.method') <- 'integration' + #' Seurat-Joint PCA Integration #' #' @inheritParams FindIntegrationAnchors @@ -242,7 +247,7 @@ JointPCAIntegration <- function( features <- features %||% SelectIntegrationFeatures5(object = object) assay <- assay %||% 'RNA' layers <- layers %||% Layers(object, search = 'data') - + object.list <- list() for (i in seq_along(along.with = layers)) { object.list[[i]] <- CreateSeuratObject(counts = object[[layers[i]]][features[1:2], ] ) @@ -275,6 +280,8 @@ JointPCAIntegration <- function( return(output.list) } +attr(x = JointPCAIntegration, which = 'Seurat.method') <- 'integration' + #' Integrate Layers #' #' @param object A \code{\link[SeuratObject]{Seurat}} object @@ -326,15 +333,26 @@ IntegrateLayers <- function( } # Check our assay assay <- assay %||% DefaultAssay(object = object) - if (!inherits(x = object[[assay]], what = 'StdAssay')) { - abort(message = "'assay' must be a v5 assay") + if (inherits(x = object[[assay]], what = 'SCTAssay')) { + layers <- 'data' + scale.layer <- 'scale.data' + features <- features %||% SelectSCTIntegrationFeatures( + object = object, + assay = assay + ) + } else if (inherits(x = object[[assay]], what = 'StdAssay')) { + layers <- Layers(object = object, assay = assay, search = layers) + scale.layer <- Layers(object = object, search = scale.layer) + features <- features %||% SelectIntegrationFeatures5( + object = object, + assay = assay + ) + } else { + abort(message = "'assay' must be a v5 or SCT assay") } - layers <- Layers(object = object, assay = assay, search = layers) - features <- features %||% SelectIntegrationFeatures5(object = object, assay = assay) - scale.layer <- Layers(object = object, search = scale.layer) features <- intersect( x = features, - y = Features(x = object[[assay]], layer = scale.layer) + y = Features(x = object, assay = assay, layer = scale.layer) ) if (!length(x = features)) { abort(message = "None of the features provided are found in this assay") @@ -349,13 +367,26 @@ IntegrateLayers <- function( DefaultAssay(object = obj.orig) <- assay } # Check our groups - groups <- if (is.null(x = group.by) && length(x = layers) > 1L) { + groups <- if (inherits(x = object[[assay]], what = 'SCTAssay')) { + if (!is.null(x = group.by)) { + warn( + message = "Groups are set automatically by model when integrating SCT assays" + ) + } + df <- SeuratObject::EmptyDF(n = ncol(x = object[[assay]])) + row.names(x = df) <- colnames(x = object[[assay]]) + for (model in levels(x = object[[assay]])) { + cc <- Cells(x = object[[assay]], layer = model) + df[cc, "group"] <- model + } + df + } else if (is.null(x = group.by) && length(x = layers) > 1L) { cmap <- slot(object = object[[assay]], name = 'cells')[, layers] as.data.frame(x = labels( object = cmap, values = Cells(x = object[[assay]], layer = scale.layer) )) - } else if (is_scalar_character(x = group.by) && group.by %in% names(x = object[[]])) { + } else if (is_scalar_character(x = group.by) && group.by %in% names(x = object[[]])) { FetchData( object = object, vars = group.by, @@ -364,7 +395,7 @@ IntegrateLayers <- function( } else { abort(message = "'group.by' must correspond to a column of cell-level meta data") } - names(x = groups) <- "group" + names(x = groups) <- 'group' # Run the integration method value <- method( object = object[[assay]], From 748ef00ff4b2925eacc3c4cee6017139856b8b0b Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Tue, 1 Nov 2022 08:47:03 -0400 Subject: [PATCH 261/979] update docs --- man/CCAIntegration.Rd | 3 +-- man/HarmonyIntegration.Rd | 11 ++++++----- man/JointPCAIntegration.Rd | 3 +-- man/RPCAIntegration.Rd | 3 +-- 4 files changed, 9 insertions(+), 11 deletions(-) diff --git a/man/CCAIntegration.Rd b/man/CCAIntegration.Rd index fac09783e..797b1147a 100644 --- a/man/CCAIntegration.Rd +++ b/man/CCAIntegration.Rd @@ -8,13 +8,12 @@ CCAIntegration( object = NULL, assay = NULL, layers = NULL, - orig.reduction = "pca.rna", + orig = NULL, new.reduction = "integrated.dr", reference = NULL, features = NULL, normalization.method = c("LogNormalize", "SCT"), dims = 1:30, - npcs = 50, groups = NULL, k.filter = NA, scale.layer = "scale.data", diff --git a/man/HarmonyIntegration.Rd b/man/HarmonyIntegration.Rd index a0d85c035..839ebdc7e 100644 --- a/man/HarmonyIntegration.Rd +++ b/man/HarmonyIntegration.Rd @@ -30,15 +30,16 @@ HarmonyIntegration( \arguments{ \item{object}{An \code{\link[SeuratObject]{Assay5}} object} -\item{orig}{\link[SeuratObject:DimReduc]{Dimensional reduction} to correct} +\item{orig}{A \link[SeuratObject:DimReduc]{dimensional reduction} to correct} -\item{groups}{A data frame ...} +\item{groups}{A one-column data frame with grouping information; column +should be called \code{group}} -\item{features}{...} +\item{features}{Ignored} -\item{scale.layer}{...} +\item{scale.layer}{Ignored} -\item{layers}{...} +\item{layers}{Ignored} \item{npcs}{If doing PCA on input matrix, number of PCs to compute.} diff --git a/man/JointPCAIntegration.Rd b/man/JointPCAIntegration.Rd index 8156a8d06..5a3b11beb 100644 --- a/man/JointPCAIntegration.Rd +++ b/man/JointPCAIntegration.Rd @@ -8,13 +8,12 @@ JointPCAIntegration( object = NULL, assay = NULL, layers = NULL, - orig.reduction = "pca.rna", + orig = NULL, new.reduction = "integrated.dr", reference = NULL, features = NULL, normalization.method = NULL, dims = 1:30, - npcs = 50, k.anchor = 20, scale.layer = "scale.data", verbose = TRUE, diff --git a/man/RPCAIntegration.Rd b/man/RPCAIntegration.Rd index cf18c2c35..b97c91d82 100644 --- a/man/RPCAIntegration.Rd +++ b/man/RPCAIntegration.Rd @@ -8,13 +8,12 @@ RPCAIntegration( object = NULL, assay = NULL, layers = NULL, - orig.reduction = "pca.rna", + orig = NULL, new.reduction = "integrated.dr", reference = NULL, features = NULL, normalization.method = c("LogNormalize", "SCT"), dims = 1:30, - npcs = 50, k.filter = NA, scale.layer = "scale.data", groups = NULL, From 212abb19c637e55f9e87e41c5d23e8600ef29934 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 1 Nov 2022 08:55:42 -0400 Subject: [PATCH 262/979] remove duplicate functions --- R/integration.R | 55 ------------------------------------------------- 1 file changed, 55 deletions(-) diff --git a/R/integration.R b/R/integration.R index efdccbc5b..240c83106 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5950,58 +5950,3 @@ FastRPCAIntegration <- function( VariableFeatures(object = object_merged) <- anchor.feature return(object_merged) } - -crossprod_DelayedAssay <- function(x, y, block.size = 1e9) { - # perform t(x) %*% y in blocks for y - if (!inherits(x = y, 'DelayedMatrix')) { - stop('y should a DelayedMatrix') - } - if (nrow(x) != nrow(y)) { - stop('row of x and y should be the same') - } - sparse <- DelayedArray::is_sparse(x = y) - suppressMessages(setAutoBlockSize(size = block.size)) - cells.grid <- DelayedArray::colAutoGrid(x = y) - product.list <- list() - for (i in seq_len(length.out = length(x = cells.grid))) { - vp <- cells.grid[[i]] - block <- DelayedArray::read_block(x = y, viewport = vp, as.sparse = sparse) - if (sparse) { - block <- as(object = block, Class = 'dgCMatrix') - } else { - block <- as(object = block, Class = 'Matrix') - } - product.list[[i]] <- as.matrix(t(x) %*% block) - } - product.mat <- matrix(data = unlist(product.list), nrow = ncol(x) , ncol = ncol(y)) - colnames(product.mat) <- colnames(y) - rownames(product.mat) <- rownames(x) - return(product.mat) -} - -crossprodNorm_DelayedAssay <- function(x, y, block.size = 1e9) { - # perform t(x) %*% y in blocks for y - if (!inherits(x = y, 'DelayedMatrix')) { - stop('y should a DelayedMatrix') - } - if (nrow(x) != nrow(y)) { - stop('row of x and y should be the same') - } - sparse <- DelayedArray::is_sparse(x = y) - suppressMessages(setAutoBlockSize(size = block.size)) - cells.grid <- DelayedArray::colAutoGrid(x = y) - norm.list <- list() - for (i in seq_len(length.out = length(x = cells.grid))) { - vp <- cells.grid[[i]] - block <- DelayedArray::read_block(x = y, viewport = vp, as.sparse = sparse) - if (sparse) { - block <- as(object = block, Class = 'dgCMatrix') - } else { - block <- as(object = block, Class = 'Matrix') - } - norm.list[[i]] <- colSums(x = as.matrix(t(x) %*% block) ^ 2) - } - norm.vector <- unlist(norm.list) - return(norm.vector) - -} From adac964f2f8f76e7ef1e4e6c71c3ee162b0e0557 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Tue, 1 Nov 2022 21:13:26 -0400 Subject: [PATCH 263/979] Fix SCT v5 integration for multiple layers --- R/integration.R | 8 ++++-- R/objects.R | 6 ++-- R/preprocessing5.R | 71 +++++++++++++++++++++++++++++++++++++--------- 3 files changed, 67 insertions(+), 18 deletions(-) diff --git a/R/integration.R b/R/integration.R index 240c83106..88c1ef11e 100644 --- a/R/integration.R +++ b/R/integration.R @@ -2989,7 +2989,7 @@ SelectIntegrationFeatures5 <- function( #' @export #' -SelectSCTIntegrationFeatures <- function(object, nfeatures = 2000, assay = NULL, verbose = TRUE, ...) { +SelectSCTIntegrationFeatures <- function(object, nfeatures = 3000, assay = NULL, verbose = TRUE, ...) { assay <- assay %||% DefaultAssay(object = object) if (!inherits(x = object[[assay]], what = 'SCTAssay')) { abort(message = "'assay' must be an SCTAssay") @@ -3005,8 +3005,10 @@ SelectSCTIntegrationFeatures <- function(object, nfeatures = 2000, assay = NULL, x = table(unlist(x = vf.list, use.names = FALSE)), decreasing = TRUE ) - idx <- which(x = var.features == length(x = models)) - var.features <- var.features[idx] + for (i in 1:length(x = models)) { + vst_out <- SCTModel_to_vst(SCTModel = slot(object = object[[assay]], name = "SCTModel.list")[[models[[i]]]]) + var.features <- var.features[names(x = var.features) %in% rownames(x = vst_out$gene_attr)] + } tie.val <- var.features[min(nfeatures, length(x = var.features))] features <- names(x = var.features[which(x = var.features > tie.val)]) if (length(x = features)) { diff --git a/R/objects.R b/R/objects.R index 9f8e2e8cf..7075991cb 100644 --- a/R/objects.R +++ b/R/objects.R @@ -1837,7 +1837,7 @@ SCTResults.Seurat <- function(object, assay = "SCT", slot, model = NULL, ...) { #' @method VariableFeatures SCTModel #' @export #' -VariableFeatures.SCTModel <- function(object, n = 2000, ...) { +VariableFeatures.SCTModel <- function(object, n = 3000, ...) { if (!is_scalar_integerish(x = n) || (!is_na(x = n < 1L) && n < 1L)) { abort(message = "'n' must be a single positive integer") } @@ -1857,7 +1857,7 @@ VariableFeatures.SCTModel <- function(object, n = 2000, ...) { VariableFeatures.SCTAssay <- function( object, layer = NULL, - n = 2000, + n = 3000, simplify = TRUE, ... ) { @@ -2146,6 +2146,8 @@ merge.SCTAssay <- function( combined.assay, SCTModel.list = model.list ) + features <- VariableFeatures(object = combined.assay) + VariableFeatures(object = combined.assay) <- features return(combined.assay) } diff --git a/R/preprocessing5.R b/R/preprocessing5.R index d07c8a648..86d8d3dbc 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -823,7 +823,7 @@ VST.DelayedMatrix <- function( span = span ) hvf.info$variance.expected[not.const] <- 10 ^ fit$fitted - + suppressMessages(setAutoBlockSize(size = block.size)) grid <- if (margin == 1L) { DelayedArray::rowAutoGrid(x = data) @@ -848,7 +848,7 @@ VST.DelayedMatrix <- function( sd = sqrt(hvf.info$variance.expected), vmax = clip %||% sqrt(x = ncol(x = data)), display_progress = FALSE) - + var_stand.list[[i]] <- block.stat * (ncol(block) - 1) } hvf.info$variance.standardized <- Reduce(f = '+', x = var_stand.list)/ @@ -863,7 +863,7 @@ VST.DelayedMatrix <- function( hvf.info$variable[vf] <- TRUE hvf.info$rank[vf] <- seq_along(along.with = vf) rownames(hvf.info) <- rownames(data) - + return(hvf.info) } @@ -1302,10 +1302,6 @@ SCTransform.StdAssay <- function( cell.attr.object <- cell.attr[colnames(x = counts.chunk),, drop=FALSE] if (!identical(rownames(cell.attr.object), colnames(counts.chunk))) { - print(length(setdiff(rownames(cell.attr.object), colnames(counts.chunk)))) - print(length(setdiff(colnames(counts.chunk),rownames(cell.attr.object)))) - print(rownames(cell.attr.object)[1:5]) - print(colnames(counts.chunk)[1:5]) stop("cell attribute row names must match column names of count matrix") } vst.out <- SCTransform(object = counts.chunk, @@ -1360,7 +1356,6 @@ SCTransform.StdAssay <- function( vst.out$arguments$sct.method <- sct.method Misc(object = assay.out, slot = 'vst.out') <- vst.out assay.out <- as(object = assay.out, Class = "SCTAssay") - #TODO: Add a key to prevent hitting a bug in merge.StdAssay which # does not like character(0) keys being merged return (assay.out) } @@ -1370,7 +1365,7 @@ SCTransform.StdAssay <- function( set.seed(seed = seed.use) selected.block <- sample(x = seq.int(from = 1, to = length(cells.grid)), size = 1) if (verbose){ - message("Using block", selected.block, " from ", dataset.names[[dataset.index]], " to learn model.") + message("Using block ", selected.block, " from ", dataset.names[[dataset.index]], " to learn model.") } vp <- cells.grid[[selected.block]] assay.out <- GetSCT.Chunked(vp = vp, do.correct.umi = FALSE) @@ -1448,6 +1443,7 @@ SCTransform.StdAssay <- function( cell_attrs[[i]] <- cell_attr } new.residuals <- Reduce(cbind, residuals) + corrected_counts <- Reduce(cbind, corrected_counts) cell_attrs <- Reduce(rbind, cell_attrs) @@ -1533,10 +1529,60 @@ SCTransform.StdAssay <- function( } # Return array by merging everythin if (length(x = sct.assay.list) > 1){ + vf.list <- lapply(X = sct.assay.list, FUN = function(object) VariableFeatures(object = object)) + variable.features.union <- Reduce(f = union, x = vf.list) + var.features <- sort( + x = table(unlist(x = vf.list, use.names = FALSE)), + decreasing = TRUE + ) + # idx <- which(x = var.features == length(x = sct.assay.list)) + var.features <- names(x = var.features[1:variable.features.n]) + + #browser() + for (layer.name in names(sct.assay.list)){ + #object.sct <- CreateSeurat5Object(counts = ) + vst_out <- SCTModel_to_vst(SCTModel = slot(object = sct.assay.list[[layer.name]], name = "SCTModel.list")[[1]]) + + all_cells <- Cells(x = object, layer = paste0(layer, ".", layer.name)) + all_features <- Features(x = object, layer = paste0(layer, ".", layer.name)) + variable.features.target <- intersect(x = rownames(x = vst_out$model_pars_fit), y = var.features) + variable.features.target <- setdiff(x = variable.features.target, y = VariableFeatures(sct.assay.list[[layer.name]])) + if (length(variable.features.target )<1){ + next + } + counts <- LayerData( + object = object, + layer = paste0(layer, ".", layer.name), + cells = all_cells + ) + cells.grid <- DelayedArray::colAutoGrid(x = counts, ncol = ncol(counts)) + vp <- cells.grid[[1L]] + block <- DelayedArray::read_block(x = counts, viewport = vp, as.sparse = TRUE) + counts.vp <- as(object = block, Class = 'dgCMatrix') + + if (vst_out$arguments$min_var == "umi_median"){ + nz_median <- median(counts.vp@x) + min_var_custom <- (nz_median / 5)^2 + } else { + min_var_custom <- vst_out$arguments$min_var + } + vst_out$cell_attr <- vst_out$cell_attr[, c("log_umi"), drop=FALSE] + vst_out$model_pars_fit <- vst_out$model_pars_fit[variable.features.target,,drop=FALSE] + + new_residual <- get_residuals( + vst_out = vst_out, + umi = counts.vp[variable.features.target,], + residual_type = "pearson", + min_variance = min_var_custom, + verbosity = FALSE + ) + sct.assay.list[[layer.name]]@scale.data <- rbind(sct.assay.list[[layer.name]]@scale.data, new_residual) + } + sct.assay.list[[dataset.names[dataset.index]]] <- assay.out + variable.feature.list[[dataset.names[dataset.index]]] <- VariableFeatures(assay.out) merged.assay <- merge(x = sct.assay.list[[1]], y = sct.assay.list[2:length(sct.assay.list)]) - # set variable features as the union of the features - variable.features <- Reduce(f = union, x = variable.feature.list) - VariableFeatures(object = merged.assay) <- variable.features + + VariableFeatures(object = merged.assay) <- intersect(x = var.features, y = rownames(x = GetAssayData(object = merged.assay, slot='scale.data'))) # set the names of SCTmodels to be layer names models <- slot(object = merged.assay, name="SCTModel.list") names(models) <- names(x = sct.assay.list) @@ -1851,7 +1897,6 @@ FetchResidualSCTModel <- function(object, if (i==1){ nz_median <- median(umi.all@x) min_var_custom <- (nz_median / 5)^2 - # print(paste("min_var_custom", min_var_custom)) } umi <- umi.all[features_to_compute, , drop = FALSE] From f308de98bfa14a39f8d6de980e88e557762c162a Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Tue, 1 Nov 2022 21:55:50 -0400 Subject: [PATCH 264/979] Fix residual features --- R/preprocessing5.R | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 86d8d3dbc..8b5cac223 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1531,16 +1531,16 @@ SCTransform.StdAssay <- function( if (length(x = sct.assay.list) > 1){ vf.list <- lapply(X = sct.assay.list, FUN = function(object) VariableFeatures(object = object)) variable.features.union <- Reduce(f = union, x = vf.list) - var.features <- sort( + var.features.sorted <- sort( x = table(unlist(x = vf.list, use.names = FALSE)), decreasing = TRUE ) # idx <- which(x = var.features == length(x = sct.assay.list)) - var.features <- names(x = var.features[1:variable.features.n]) - - #browser() + # select top ranking features + #var.features <- names(x = var.features.sorted[1:variable.features.n]) + # calculate residuals for union of features + var.features <- variable.features.union for (layer.name in names(sct.assay.list)){ - #object.sct <- CreateSeurat5Object(counts = ) vst_out <- SCTModel_to_vst(SCTModel = slot(object = sct.assay.list[[layer.name]], name = "SCTModel.list")[[1]]) all_cells <- Cells(x = object, layer = paste0(layer, ".", layer.name)) @@ -1576,10 +1576,11 @@ SCTransform.StdAssay <- function( min_variance = min_var_custom, verbosity = FALSE ) - sct.assay.list[[layer.name]]@scale.data <- rbind(sct.assay.list[[layer.name]]@scale.data, new_residual) + old_residual <- GetAssayData(object = sct.assay.list[[layer.name]], slot = 'scale.data') + merged_residual <- rbind(old_residual, new_residual) + sct.assay.list[[layer.name]] <- SetAssayData(object = sct.assay.list[[layer.name]], slot = 'scale.data', new.data = merged_residual) + VariableFeatures(sct.assay.list[[layer.name]]) <- rownames(x = merged_residual) } - sct.assay.list[[dataset.names[dataset.index]]] <- assay.out - variable.feature.list[[dataset.names[dataset.index]]] <- VariableFeatures(assay.out) merged.assay <- merge(x = sct.assay.list[[1]], y = sct.assay.list[2:length(sct.assay.list)]) VariableFeatures(object = merged.assay) <- intersect(x = var.features, y = rownames(x = GetAssayData(object = merged.assay, slot='scale.data'))) From d2b5f5da51ab8a54915349c81c346a8c835f125e Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Tue, 1 Nov 2022 23:17:53 -0400 Subject: [PATCH 265/979] Fix Fetchresiduals --- R/preprocessing5.R | 56 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 17 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 8b5cac223..c98274bf8 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1656,9 +1656,9 @@ FetchResiduals <- function(object, warning("SCT model not present in assay", call. = FALSE, immediate. = TRUE) return(object) } - possible.features <- unique(x = unlist(x = lapply(X = sct.models, FUN = function(x) { + possible.features <- Reduce(f = union, x = lapply(X = sct.models, FUN = function(x) { rownames(x = SCTResults(object = object[[assay]], slot = "feature.attributes", model = x)) - }))) + })) bad.features <- setdiff(x = features, y = possible.features) if (length(x = bad.features) > 0) { warning("The following requested features are not present in any models: ", @@ -1680,10 +1680,17 @@ FetchResiduals <- function(object, return(object) } } + features <- intersect(x = features.orig, y = features) - if (length(x = sct.models) > 1 & verbose) { - message("This SCTAssay contains multiple SCT models. Computing residuals for cells using") - } + if (length(features) < 1){ + warning("The following requested features are not present in all the models: ", + paste(features.orig, collapse = ", "), + call. = FALSE + ) + return (NULL) + } #if (length(x = sct.models) > 1 & verbose) { + # message("This SCTAssay contains multiple SCT models. Computing residuals for cells using") + #} # Get all (count) layers layers <- Layers(object = object[[umi.assay]], search = layer) @@ -1811,13 +1818,15 @@ FetchResidualSCTModel <- function(object, scale.data.cells <- intersect(x = scale.data.cells, y = scale.data.cells.common) if (length(x = setdiff(x = layer.cells, y = scale.data.cells)) == 0) { existing.scale.data <- suppressWarnings(GetAssayData(object = object, assay = assay, slot = "scale.data")) - full.scale.data <- matrix(data = NA, nrow = nrow(x = existing.scale.data), ncol = length(x = layer.cells), dimnames = list(rownames(x = existing.scale.data), layer.cells)) - full.scale.data[rownames(x = existing.scale.data), colnames(x = existing.scale.data)] <- existing.scale.data - existing_features <- names(x = which(x = !apply( - X = full.scale.data, - MARGIN = 1, - FUN = anyNA - ))) + #full.scale.data <- matrix(data = NA, nrow = nrow(x = existing.scale.data), + # ncol = length(x = layer.cells), dimnames = list(rownames(x = existing.scale.data), layer.cells)) + #full.scale.data[rownames(x = existing.scale.data), colnames(x = existing.scale.data)] <- existing.scale.data + #existing_features <- names(x = which(x = !apply( + # X = full.scale.data, + # MARGIN = 1, + # FUN = anyNA + #))) + existing_features <- rownames(x = existing.scale.data) } else { existing_features <- character() } @@ -1827,6 +1836,10 @@ FetchResidualSCTModel <- function(object, features_to_compute <- setdiff(x = new_features, y = existing_features) } scale.data.cells <- colnames(x = GetAssayData(object = object, assay = assay, slot = "scale.data")) + if (length(features_to_compute)<1){ + return (scale.data.cells[intersect(x = rownames(x = scale.data.cells), y = new_features),,drop=FALSE]) + } + if (length(x = setdiff(x = model.cells, y = scale.data.cells)) == 0) { existing_features <- names(x = which(x = ! apply( X = GetAssayData(object = object, assay = assay, slot = "scale.data")[, model.cells], @@ -1882,9 +1895,9 @@ FetchResidualSCTModel <- function(object, ) # iterate over 2k cells at once - cells.grid <- DelayedArray::colAutoGrid(x = counts, ncol = min(2000, length(x = layer.cells))) + #cells.grid <- DelayedArray::colAutoGrid(x = counts, ncol = min(2000, length(x = layer.cells))) + cells.grid <- DelayedArray::colAutoGrid(x = counts, ncol = length(x = layer.cells)) new_residuals <- list() - # cat(dim(counts)) for (i in seq_len(length.out = length(x = cells.grid))) { vp <- cells.grid[[i]] @@ -1899,7 +1912,6 @@ FetchResidualSCTModel <- function(object, nz_median <- median(umi.all@x) min_var_custom <- (nz_median / 5)^2 } - umi <- umi.all[features_to_compute, , drop = FALSE] ## Add cell_attr for missing cells @@ -1913,8 +1925,18 @@ FetchResidualSCTModel <- function(object, } else { cell_attr_existing <- vst_out$cell_attr cells_missing <- setdiff(rownames(cell_attr), rownames(cell_attr_existing)) - vst_out$cell_attr <- rbind(cell_attr_existing, cell_attr[cells_missing, , drop=FALSE]) - vst_out$cell_attr <- vst_out$cell_attr[colnames(umi), , drop=FALSE] + if (length(cells_missing)>0){ + cell_attr_missing <- cell_attr[cells_missing, ,drop=FALSE] + missing_cols <- setdiff(x = colnames(x = cell_attr_existing), + y = colnames(x = cell_attr_missing)) + + if (length(x = missing_cols) > 0) { + cell_attr_missing[, missing_cols] <- NA + } + vst_out$cell_attr <- rbind(cell_attr_existing, + cell_attr_missing) + vst_out$cell_attr <- vst_out$cell_attr[colnames(umi), , drop=FALSE] + } } if (verbose) { if (sct.method == "reference.model") { From 373242970b6594407c6baadf10e99a60c0ba786a Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 2 Nov 2022 16:27:20 -0400 Subject: [PATCH 266/979] Update SeuratObject version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index a518b2a76..ffe393c00 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,7 +27,7 @@ BugReports: https://github.com/satijalab/seurat/issues Depends: R (>= 4.0.0), methods, - SeuratObject (>= 4.9.9.9035) + SeuratObject (>= 4.9.9.9036) Imports: cluster, cowplot, From 2d0efbd8ddec4cb56aa9100a93d3a9bc44394e7f Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 2 Nov 2022 17:29:00 -0400 Subject: [PATCH 267/979] Update DietSeurat --- NAMESPACE | 5 +++++ R/objects.R | 26 ++++++++++++++++++++++++++ R/zzz.R | 2 ++ 3 files changed, 33 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 35b801e58..6b73226f4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -673,6 +673,11 @@ importFrom(igraph,plot.igraph) importFrom(irlba,irlba) importFrom(jsonlite,fromJSON) importFrom(leiden,leiden) +importFrom(lifecycle,deprecate_soft) +importFrom(lifecycle,deprecate_stop) +importFrom(lifecycle,deprecate_warn) +importFrom(lifecycle,deprecated) +importFrom(lifecycle,is_present) importFrom(lmtest,lrtest) importFrom(matrixStats,rowAnyNAs) importFrom(matrixStats,rowMeans2) diff --git a/R/objects.R b/R/objects.R index 7075991cb..41e8931ed 100644 --- a/R/objects.R +++ b/R/objects.R @@ -472,9 +472,35 @@ DietSeurat <- function( dimreducs = NULL, graphs = NULL, misc = TRUE, + counts = deprecated(), + data = deprecated(), + scale.data = deprecated(), ... ) { CheckDots(...) + dep.args <- c(counts = counts, data = data, scale.data = scale.data) + for (lyr in names(x = dep.args)) { + if (is_present(arg = dep.args[[lyr]])) { + if (is.null(x = layers)) { + layers <- unique(x = unlist(x = lapply( + X = Assays(object = object), + FUN = function(x) { + return(Layers(object = object[[x]])) + } + ))) + } + deprecate_soft( + when = '5.0.0', + what = paste0('DietSeurat(', lyr, ' = )'), + with = 'DietSeurat(layers = )' + ) + layers <- if (isTRUE(x = dep.args[[lyr]])) { + c(layers, lyr) + } else { + Filter(f = \(x) x != lyr, x = layers) + } + } + } object <- UpdateSlots(object = object) assays <- assays %||% Assays(object = object) assays <- intersect(x = assays, y = Assays(object = object)) diff --git a/R/zzz.R b/R/zzz.R index a1e0702ce..7c5dff062 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,4 +1,6 @@ #' @importFrom methods slot slot<- +#' @importFrom lifecycle deprecated deprecate_soft deprecate_stop +#' deprecate_warn is_present #' @importFrom rlang abort #' arg_match #' as_name From a04960d10f3c13546eebf9d77b0b946f5bbaf14c Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Thu, 3 Nov 2022 13:34:44 -0400 Subject: [PATCH 268/979] Fix SCTransform.default --- R/preprocessing.R | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index 292bc01c1..9487fc307 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -1732,24 +1732,7 @@ SCTransform.default <- function( } vst.out }) - # create output assay and put (corrected) umi counts in count slot - if (do.correct.umi & residual.type == 'pearson') { - if (verbose) { - message('Place corrected count matrix in counts slot') - } - assay.out <- CreateAssayObject(counts = vst.out$umi_corrected, check.matrix = FALSE) - vst.out$umi_corrected <- NULL - } else { - assay.out <- CreateAssayObject(counts = umi, check.matrix = FALSE) - } - # set the variable genes - VariableFeatures(object = assay.out) <- residual.features %||% top.features - # put log1p transformed counts in data - assay.out <- SetAssayData( - object = assay.out, - slot = 'data', - new.data = log1p(x = GetAssayData(object = assay.out, slot = 'counts')) - ) + scale.data <- vst.out$y # clip the residuals scale.data[scale.data < clip.range[1]] <- clip.range[1] From 8cf1b5d2fea261281107a7d2df6990a7312275c7 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Thu, 3 Nov 2022 14:37:16 -0400 Subject: [PATCH 269/979] Fix for SelectSCTIntegrationFeatures --- R/integration.R | 31 +++++++++++++++++++++++++------ R/preprocessing5.R | 11 ++++------- 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/R/integration.R b/R/integration.R index 88c1ef11e..15f5757ca 100644 --- a/R/integration.R +++ b/R/integration.R @@ -3011,14 +3011,33 @@ SelectSCTIntegrationFeatures <- function(object, nfeatures = 3000, assay = NULL, } tie.val <- var.features[min(nfeatures, length(x = var.features))] features <- names(x = var.features[which(x = var.features > tie.val)]) - if (length(x = features)) { - features <- .FeatureRank(features = features, flist = vf.list) + if (length(x = features) > 0) { + feature.ranks <- sapply(X = features, FUN = function(x) { + ranks <- sapply(X = vf.list, FUN = function(vf) { + if (x %in% vf) { + return(which(x = x == vf)) + } + return(NULL) + }) + median(x = unlist(x = ranks)) + }) + features <- names(x = sort(x = feature.ranks)) } - features.tie <- .FeatureRank( - features = names(x = var.features[which(x = var.features == tie.val)]), - flist = vf.list + features.tie <- var.features[which(x = var.features == tie.val)] + tie.ranks <- sapply(X = names(x = features.tie), FUN = function(x) { + ranks <- sapply(X = vf.list, FUN = function(vf) { + if (x %in% vf) { + return(which(x = x == vf)) + } + return(NULL) + }) + median(x = unlist(x = ranks)) + }) + features <- c( + features, + names(x = head(x = sort(x = tie.ranks), nfeatures - length(x = features))) ) - return(head(x = c(features, features.tie), n = nfeatures)) + return(features) } #' Transfer data diff --git a/R/preprocessing5.R b/R/preprocessing5.R index c98274bf8..c94472c6e 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1804,20 +1804,18 @@ FetchResidualSCTModel <- function(object, model.features <- rownames(x = SCTResults(object = object[[assay]], slot = "feature.attributes", model = SCTModel)) model.cells <- Cells(x = slot(object = object[[assay]], name = "SCTModel.list")[[SCTModel]]) - sct.method <- SCTResults(object = object[[assay]], slot = "arguments", model = SCTModel)$sct.method %||% "default" - - layer.cells <- layer.cells %||% Cells(x = object[[umi.assay]], layer = layer) if (!is.null(reference.SCT.model)) { # use reference SCT model sct.method <- "reference" } - scale.data.cells <- colnames(x = GetAssayData(object = object, assay = assay, slot = "scale.data")) + existing.scale.data <- suppressWarnings(GetAssayData(object = object, assay = assay, slot = "scale.data")) + scale.data.cells <- colnames(x = existing.scale.data) scale.data.cells.common <- intersect(scale.data.cells, layer.cells) scale.data.cells <- intersect(x = scale.data.cells, y = scale.data.cells.common) if (length(x = setdiff(x = layer.cells, y = scale.data.cells)) == 0) { - existing.scale.data <- suppressWarnings(GetAssayData(object = object, assay = assay, slot = "scale.data")) + # existing.scale.data <- suppressWarnings(GetAssayData(object = object, assay = assay, slot = "scale.data")) #full.scale.data <- matrix(data = NA, nrow = nrow(x = existing.scale.data), # ncol = length(x = layer.cells), dimnames = list(rownames(x = existing.scale.data), layer.cells)) #full.scale.data[rownames(x = existing.scale.data), colnames(x = existing.scale.data)] <- existing.scale.data @@ -1835,9 +1833,8 @@ FetchResidualSCTModel <- function(object, } else { features_to_compute <- setdiff(x = new_features, y = existing_features) } - scale.data.cells <- colnames(x = GetAssayData(object = object, assay = assay, slot = "scale.data")) if (length(features_to_compute)<1){ - return (scale.data.cells[intersect(x = rownames(x = scale.data.cells), y = new_features),,drop=FALSE]) + return (existing.scale.data[intersect(x = rownames(x = scale.data.cells), y = new_features),,drop=FALSE]) } if (length(x = setdiff(x = model.cells, y = scale.data.cells)) == 0) { From 5bc09a9b1ce80501233d4cc270eec7bba05f4753 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Fri, 4 Nov 2022 14:57:14 -0400 Subject: [PATCH 270/979] update docs --- man/DietSeurat.Rd | 3 +++ man/IntegrateSketchEmbeddings.Rd | 1 + 2 files changed, 4 insertions(+) diff --git a/man/DietSeurat.Rd b/man/DietSeurat.Rd index 6dad20f83..e9360c0aa 100644 --- a/man/DietSeurat.Rd +++ b/man/DietSeurat.Rd @@ -12,6 +12,9 @@ DietSeurat( dimreducs = NULL, graphs = NULL, misc = TRUE, + counts = deprecated(), + data = deprecated(), + scale.data = deprecated(), ... ) } diff --git a/man/IntegrateSketchEmbeddings.Rd b/man/IntegrateSketchEmbeddings.Rd index 8ac33f5d0..3fc2e034a 100644 --- a/man/IntegrateSketchEmbeddings.Rd +++ b/man/IntegrateSketchEmbeddings.Rd @@ -16,6 +16,7 @@ IntegrateSketchEmbeddings( reduction.name = NULL, reduction.key = NULL, layers = NULL, + seed = 123, verbose = TRUE ) } From 4cc17e937290b283cd1ae441c03de64f4d63a7a0 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 8 Nov 2022 15:51:52 -0500 Subject: [PATCH 271/979] SCT joint pca --- R/integration.R | 1 + R/integration5.R | 36 +++++++++++++++++++++++++++--------- 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/R/integration.R b/R/integration.R index 15f5757ca..295c1def3 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1408,6 +1408,7 @@ IntegrateData <- function( verbose = verbose ) } + print(i) model.list[[i]] <- slot(object = object.list[[i]][[assay]], name = "SCTModel.list") object.list[[i]][[assay]] <- suppressWarnings(expr = CreateSCTAssayObject( data = GetAssayData( diff --git a/R/integration5.R b/R/integration5.R index c8664619c..2e1e3a59a 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -236,7 +236,7 @@ JointPCAIntegration <- function( new.reduction = 'integrated.dr', reference = NULL, features = NULL, - normalization.method = NULL, + normalization.method = c("LogNormalize", "SCT"), dims = 1:30, k.anchor = 20, scale.layer = 'scale.data', @@ -244,23 +244,41 @@ JointPCAIntegration <- function( groups = NULL, ... ) { + normalization.method <- match.arg(arg = normalization.method) features <- features %||% SelectIntegrationFeatures5(object = object) - assay <- assay %||% 'RNA' + features.diet <- features[1:2] + assay <- assay %||% DefaultAssay(object) layers <- layers %||% Layers(object, search = 'data') object.list <- list() - for (i in seq_along(along.with = layers)) { - object.list[[i]] <- CreateSeuratObject(counts = object[[layers[i]]][features[1:2], ] ) - object.list[[i]][['RNA']]$counts <- NULL - object.list[[i]][['joint.pca']] <- CreateDimReducObject( - embeddings = Embeddings(object = orig)[Cells(object.list[[i]]),], - assay = 'RNA', + if (normalization.method == 'SCT') { + object.sct <- CreateSeuratObject(counts = object[['SCT']], assay = 'SCT') + object.sct <- DietSeurat(object = object.sct, features = features.diet) + object.sct[['joint.pca']] <- CreateDimReducObject( + embeddings = Embeddings(object = orig), + assay = 'SCT', loadings = Loadings(orig), key = 'J_' ) + object.sct$split <- groups + object.list <- SplitObject(object = object.sct,split.by = 'split') + object.list <- PrepSCTIntegration(object.list, anchor.features = features.diet) + } else { + + for (i in seq_along(along.with = layers)) { + object.list[[i]] <- CreateSeuratObject(counts = object[[layers[i]]][features.diet, ] ) + object.list[[i]][['RNA']]$counts <- NULL + object.list[[i]][['joint.pca']] <- CreateDimReducObject( + embeddings = Embeddings(object = orig)[Cells(object.list[[i]]),], + assay = 'RNA', + loadings = Loadings(orig), + key = 'J_' + ) + } } + anchor <- FindIntegrationAnchors(object.list = object.list, - anchor.features = features, + anchor.features = features.diet, scale = FALSE, reduction = 'jpca', normalization.method = normalization.method, From 07559cabe2e6898f2af41d523c0a9841ba0038f0 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 8 Nov 2022 17:17:28 -0500 Subject: [PATCH 272/979] remove SCT model with no cells --- R/integration5.R | 4 ++++ R/objects.R | 5 ++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/R/integration5.R b/R/integration5.R index 2e1e3a59a..acc525e25 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -263,6 +263,10 @@ JointPCAIntegration <- function( object.sct$split <- groups object.list <- SplitObject(object = object.sct,split.by = 'split') object.list <- PrepSCTIntegration(object.list, anchor.features = features.diet) + object.list <- lapply(object.list, function(x) { + x[['SCT']]@SCTModel.list <- list() + return(x) + }) } else { for (i in seq_along(along.with = layers)) { diff --git a/R/objects.R b/R/objects.R index 41e8931ed..6d495a4ad 100644 --- a/R/objects.R +++ b/R/objects.R @@ -2313,7 +2313,10 @@ subset.SCTAssay <- function(x, cells = NULL, features = NULL, ...) { attr <- SCTResults(object = x, slot = "cell.attributes", model = m) attr <- attr[intersect(x = rownames(x = attr), y = Cells(x = x)), , drop = FALSE] SCTResults(object = x, slot = "cell.attributes", model = m) <- attr - } + if (nrow(x = attr) == 0) { + slot(object = x, name = 'SCTModel.list')[[m]] <- NULL + } + } return(x) } From c8457ee12f39ed558c93b766e53099c0907c88cf Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 8 Nov 2022 17:30:42 -0500 Subject: [PATCH 273/979] rpca sct --- R/integration5.R | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/R/integration5.R b/R/integration5.R index acc525e25..8ef21b428 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -187,18 +187,33 @@ RPCAIntegration <- function( groups = NULL, verbose = TRUE, ...) { + normalization.method <- match.arg(arg = normalization.method) features <- features %||% SelectIntegrationFeatures5(object = object) assay <- assay %||% 'RNA' layers <- layers %||% Layers(object, search = 'data') - object.list <- list() - for (i in seq_along(along.with = layers)) { - object.list[[i]] <- CreateSeuratObject(counts = object[[layers[i]]][features,]) - VariableFeatures(object = object.list[[i]]) <- features - object.list[[i]] <- ScaleData(object = object.list[[i]], verbose = FALSE) - object.list[[i]] <- RunPCA(object = object.list[[i]], verbose = FALSE) - object.list[[i]][['RNA']]$counts <- NULL - } + if (normalization.method == 'SCT') { + object.sct <- CreateSeuratObject(counts = object[['SCT']], assay = 'SCT') + object.sct$split <- groups + + object.list <- SplitObject(object = object.sct,split.by = 'split') + object.list <- PrepSCTIntegration(object.list, anchor.features = features) + object.list <- lapply(object.list, function(x) { + x <- RunPCA(object = x, features = features, verbose = FALSE) + return(x) + } + ) + } else { + object.list <- list() + for (i in seq_along(along.with = layers)) { + object.list[[i]] <- CreateSeuratObject(counts = object[[layers[i]]][features,]) + VariableFeatures(object = object.list[[i]]) <- features + object.list[[i]] <- ScaleData(object = object.list[[i]], verbose = FALSE) + object.list[[i]] <- RunPCA(object = object.list[[i]], verbose = FALSE) + object.list[[i]][['RNA']]$counts <- NULL + } + } + anchor <- FindIntegrationAnchors(object.list = object.list, anchor.features = features, scale = FALSE, @@ -250,7 +265,7 @@ JointPCAIntegration <- function( assay <- assay %||% DefaultAssay(object) layers <- layers %||% Layers(object, search = 'data') - object.list <- list() + if (normalization.method == 'SCT') { object.sct <- CreateSeuratObject(counts = object[['SCT']], assay = 'SCT') object.sct <- DietSeurat(object = object.sct, features = features.diet) @@ -268,7 +283,7 @@ JointPCAIntegration <- function( return(x) }) } else { - + object.list <- list() for (i in seq_along(along.with = layers)) { object.list[[i]] <- CreateSeuratObject(counts = object[[layers[i]]][features.diet, ] ) object.list[[i]][['RNA']]$counts <- NULL From cbafb900678e9e2b39c79a5a27992f15da081c19 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 8 Nov 2022 17:56:07 -0500 Subject: [PATCH 274/979] sct cca --- R/integration5.R | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/R/integration5.R b/R/integration5.R index 8ef21b428..1754d5bfc 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -133,16 +133,24 @@ CCAIntegration <- function( scale.layer = 'scale.data', verbose = TRUE, ...) { + normalization.method <- match.arg(arg = normalization.method) features <- features %||% SelectIntegrationFeatures5(object = object) assay <- assay %||% 'RNA' layers <- layers %||% Layers(object, search = 'data') - + if (normalization.method == 'SCT') { + object.sct <- CreateSeuratObject(counts = object[['SCT']], assay = 'SCT') + object.sct$split <- groups + object.list <- SplitObject(object = object.sct,split.by = 'split') + object.list <- PrepSCTIntegration(object.list, anchor.features = features) + + } else { object.list <- list() for (i in seq_along(along.with = layers)) { object.list[[i]] <- CreateSeuratObject(counts = object[[layers[i]]][features,] ) object.list[[i]][['RNA']]$scale.data <- object[[scale.layer]][features, Cells(object.list[[i]])] object.list[[i]][['RNA']]$counts <- NULL } + } anchor <- FindIntegrationAnchors(object.list = object.list, anchor.features = features, @@ -155,6 +163,10 @@ CCAIntegration <- function( verbose = verbose, ... ) + anchor@object.list <- lapply(anchor@object.list, function(x) { + x <- DietSeurat(x, features = features[1:2]) + return(x) + }) object_merged <- IntegrateEmbeddings(anchorset = anchor, reductions = orig, new.reduction.name = new.reduction, @@ -212,7 +224,7 @@ RPCAIntegration <- function( object.list[[i]] <- RunPCA(object = object.list[[i]], verbose = FALSE) object.list[[i]][['RNA']]$counts <- NULL } - } + } anchor <- FindIntegrationAnchors(object.list = object.list, anchor.features = features, @@ -225,6 +237,10 @@ RPCAIntegration <- function( verbose = verbose, ... ) + anchor@object.list <- lapply(anchor@object.list, function(x) { + x <- DietSeurat(x, features = features[1:2]) + return(x) + }) object_merged <- IntegrateEmbeddings(anchorset = anchor, reductions = orig, new.reduction.name = new.reduction, From 8429c3b3bec5b58fd8c0171fde4c1a56a8435c6f Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 15 Nov 2022 16:22:00 -0500 Subject: [PATCH 275/979] fix version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index e2e3bd3bf..313d9e684 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 4.0.4.9012 +Version: 4.1.1.9001 Date: 2022-10-25 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. From f7002f274fe1f99a30d1b2cf1d4f814c6ed28753 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 15 Nov 2022 16:24:32 -0500 Subject: [PATCH 276/979] fix merge bug --- R/integration.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/integration.R b/R/integration.R index 90d100426..f04f5aa61 100644 --- a/R/integration.R +++ b/R/integration.R @@ -7281,7 +7281,7 @@ FindBridgeIntegrationAnchors <- function( verbose = verbose ) return(bridge_anchor) - +} ## project delayed array to reference PCA @@ -7446,5 +7446,5 @@ FastRPCAIntegration <- function( object_merged[['pca']] <- temp VariableFeatures(object = object_merged) <- anchor.feature return(object_merged) -} +} From 71f1ca58b3a998f1d4d12c9ff6a78c43fd817e4b Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 15 Nov 2022 23:00:19 -0500 Subject: [PATCH 277/979] bridge rewrite --- R/integration.R | 34 +++++++++++----------------------- 1 file changed, 11 insertions(+), 23 deletions(-) diff --git a/R/integration.R b/R/integration.R index f04f5aa61..30b832e0c 100644 --- a/R/integration.R +++ b/R/integration.R @@ -7102,24 +7102,13 @@ PrepareBridgeReference <- function ( laplacian.reduction = laplacian.reduction.name, laplacian.dims = laplacian.reduction.dims ) - bridge_reference.set <- new( - Class = "BridgeReferenceSet", - bridge = bridge, - reference = reference.bridge, - params = list( - reference.reduction = reference.reduction, - reference.dims = reference.dims, - reference.assay = reference.assay, - bridge.ref.assay = bridge.ref.assay, - bridge.query.assay = bridge.query.assay, - supervised.reduction = supervised.reduction, - bridge.ref.reduction = bridge.ref.reduction, - bridge.query.reduction = bridge.query.reduction, - laplacian.reduction.name = laplacian.reduction.name, - laplacian.reduction.dims = laplacian.reduction.dims - ) - ) - return(bridge_reference.set) + reference[['Bridge']] <- reference.bridge[['Bridge']] + reference <- merge(x = reference, y = bridge) + command <- LogSeuratCommand(object = reference, return.command = TRUE) + slot(object = command, name = "params")$bridge.query.features <- NULL + command.name <- slot(object = command, name = "name") + reference[[command.name]] <- command + return(reference) } @@ -7173,21 +7162,20 @@ FindBridgeTransferAnchors <- function( reduction <- match.arg(arg = reduction) query.assay <- query.assay %||% DefaultAssay(query) DefaultAssay(query) <- query.assay - params <- slot(object = extended.reference, name = "params") + params <- Command(object = extended.reference, command = 'PrepareBridgeReference') bridge.query.assay <- params$bridge.query.assay bridge.query.reduction <- params$bridge.query.reduction %||% params$supervised.reduction reference.reduction <- params$reference.reduction bridge.ref.reduction <- params$bridge.ref.reduction - DefaultAssay(extended.reference@bridge) <- bridge.query.assay - + DefaultAssay(extended.reference) <- bridge.query.assay query.anchor <- FindTransferAnchors( - reference = extended.reference@bridge, + reference = extended.reference, reference.reduction = bridge.query.reduction, dims = dims, query = query, reduction = reduction, scale = scale, - features = rownames(extended.reference@bridge[[bridge.query.reduction]]@feature.loadings), + features = rownames(Loadings(extended.reference[[bridge.query.reduction]])), k.filter = NA, verbose = verbose ) From 14b7e7067405181d0bb515b01227f05d9b0a47a7 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 16 Nov 2022 21:23:29 -0500 Subject: [PATCH 278/979] fix sct inte --- R/integration5.R | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/R/integration5.R b/R/integration5.R index 1754d5bfc..8e0618d44 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -138,8 +138,8 @@ CCAIntegration <- function( assay <- assay %||% 'RNA' layers <- layers %||% Layers(object, search = 'data') if (normalization.method == 'SCT') { - object.sct <- CreateSeuratObject(counts = object[['SCT']], assay = 'SCT') - object.sct$split <- groups + object.sct <- CreateSeuratObject(counts = object, assay = 'SCT') + object.sct$split <- groups[,1] object.list <- SplitObject(object = object.sct,split.by = 'split') object.list <- PrepSCTIntegration(object.list, anchor.features = features) @@ -205,8 +205,8 @@ RPCAIntegration <- function( layers <- layers %||% Layers(object, search = 'data') if (normalization.method == 'SCT') { - object.sct <- CreateSeuratObject(counts = object[['SCT']], assay = 'SCT') - object.sct$split <- groups + object.sct <- CreateSeuratObject(counts = object, assay = 'SCT') + object.sct$split <- groups[,1] object.list <- SplitObject(object = object.sct,split.by = 'split') object.list <- PrepSCTIntegration(object.list, anchor.features = features) @@ -280,10 +280,8 @@ JointPCAIntegration <- function( features.diet <- features[1:2] assay <- assay %||% DefaultAssay(object) layers <- layers %||% Layers(object, search = 'data') - - if (normalization.method == 'SCT') { - object.sct <- CreateSeuratObject(counts = object[['SCT']], assay = 'SCT') + object.sct <- CreateSeuratObject(counts = object, assay = 'SCT') object.sct <- DietSeurat(object = object.sct, features = features.diet) object.sct[['joint.pca']] <- CreateDimReducObject( embeddings = Embeddings(object = orig), @@ -291,7 +289,7 @@ JointPCAIntegration <- function( loadings = Loadings(orig), key = 'J_' ) - object.sct$split <- groups + object.sct$split <- groups[,1] object.list <- SplitObject(object = object.sct,split.by = 'split') object.list <- PrepSCTIntegration(object.list, anchor.features = features.diet) object.list <- lapply(object.list, function(x) { From 1c24544a2b3aead9a43a3466a5f4f389962d7ff3 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 17 Nov 2022 00:46:16 -0500 Subject: [PATCH 279/979] fix bridge transfer --- R/integration.R | 23 +++++++++++++++-------- R/objects.R | 8 ++++++++ 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/R/integration.R b/R/integration.R index 30b832e0c..5b0ea1c8d 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6413,10 +6413,10 @@ FindBridgeAnchor <- function(object.list, )@misc$bridge.sets <- list( bridge.weights = slot(object = bridge.object, name = "tools" - )$MapQuery$weights.matrix, + )$MapQuery_PrepareBridgeReference$weights.matrix, bridge.ref_anchor = slot(object = bridge.object, name = "tools" - )$MapQuery$anchor[,1], + )$MapQuery_PrepareBridgeReference$anchor[,1], query.weights = slot(object = object.list[[query]], name = "tools" )$MapQuery$weights.matrix, @@ -7103,7 +7103,8 @@ PrepareBridgeReference <- function ( laplacian.dims = laplacian.reduction.dims ) reference[['Bridge']] <- reference.bridge[['Bridge']] - reference <- merge(x = reference, y = bridge) + reference <- merge(x = reference, y = bridge, merge.dr = NA) + reference@tools$MapQuery_PrepareBridgeReference <- bridge@tools$MapQuery command <- LogSeuratCommand(object = reference, return.command = TRUE) slot(object = command, name = "params")$bridge.query.features <- NULL command.name <- slot(object = command, name = "name") @@ -7166,10 +7167,15 @@ FindBridgeTransferAnchors <- function( bridge.query.assay <- params$bridge.query.assay bridge.query.reduction <- params$bridge.query.reduction %||% params$supervised.reduction reference.reduction <- params$reference.reduction - bridge.ref.reduction <- params$bridge.ref.reduction + bridge.ref.reduction <- paste0('ref.', reference.reduction) DefaultAssay(extended.reference) <- bridge.query.assay + extended.reference.bridge <- DietSeurat( + object = extended.reference, + assays = bridge.query.assay, + dimreducs = c(bridge.ref.reduction, bridge.query.reduction, params$laplacian.reduction.name) + ) query.anchor <- FindTransferAnchors( - reference = extended.reference, + reference = extended.reference.bridge, reference.reduction = bridge.query.reduction, dims = dims, query = query, @@ -7181,13 +7187,14 @@ FindBridgeTransferAnchors <- function( ) query <- MapQuery(anchorset = query.anchor, - reference = extended.reference@bridge, + reference = extended.reference.bridge, query = query, store.weights = TRUE ) + DefaultAssay(extended.reference) <- 'Bridge' bridge_anchor <- FindBridgeAnchor( - object.list = list(extended.reference@reference, query), - bridge.object = extended.reference@bridge, + object.list = list(DietSeurat(object = extended.reference, assays = 'Bridge'), query), + bridge.object = extended.reference.bridge, object.reduction = c(reference.reduction, paste0('ref.', bridge.query.reduction)), bridge.reduction = c(bridge.ref.reduction, bridge.query.reduction), anchor.type = "Transfer", diff --git a/R/objects.R b/R/objects.R index 565a73865..a39ac46f9 100644 --- a/R/objects.R +++ b/R/objects.R @@ -600,6 +600,14 @@ DietSeurat <- function( for (ob in objects.to.remove) { object[[ob]] <- NULL } + cells.keep <- list() + for (assay in Assays(object = object)) { + cells.keep[[assay]] <- colnames(x = object[[assay]] ) + } + cells.keep <- intersect(colnames(x = object), unlist(cells.keep)) + if (length(cells.keep) <- ncol(x = object)) { + object <- subset(object, cells = cells.keep) + } return(object) } From 17f8a5178224b969db879fc3f97af01a16bb57c4 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 17 Nov 2022 07:45:44 -0500 Subject: [PATCH 280/979] bridge inte anchor --- R/integration.R | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/R/integration.R b/R/integration.R index 5b0ea1c8d..765d07cdb 100644 --- a/R/integration.R +++ b/R/integration.R @@ -7238,36 +7238,44 @@ FindBridgeIntegrationAnchors <- function( integration.reduction = c('direct', 'cca'), verbose = TRUE ) { + browser() reduction <- match.arg(arg = reduction) integration.reduction <- match.arg(arg = integration.reduction) query.assay <- query.assay %||% DefaultAssay(query) DefaultAssay(query) <- query.assay - params <- slot(object = extended.reference, name = "params") + + params <- Command(object = extended.reference, command = 'PrepareBridgeReference') bridge.query.assay <- params$bridge.query.assay bridge.query.reduction <- params$bridge.query.reduction %||% params$supervised.reduction reference.reduction <- params$reference.reduction - bridge.ref.reduction <- params$bridge.ref.reduction - DefaultAssay(extended.reference@bridge) <- bridge.query.assay + bridge.ref.reduction <- paste0( 'ref.', params$bridge.ref.reduction) + DefaultAssay(extended.reference) <- bridge.query.assay + extended.reference.bridge <- DietSeurat( + object = extended.reference, + assays = bridge.query.assay, + dimreducs = c(bridge.query.reduction, bridge.ref.reduction, params$laplacian.reduction.name) + ) query.anchor <- FindTransferAnchors( - reference = extended.reference@bridge, + reference = extended.reference.bridge, reference.reduction = bridge.query.reduction, dims = dims, query = query, reduction = reduction, scale = scale, - features = rownames(extended.reference@bridge[[bridge.query.reduction]]@feature.loadings), + features = rownames(Loadings(extended.reference.bridge[[bridge.query.reduction]])), k.filter = NA, verbose = verbose ) query <- MapQuery(anchorset = query.anchor, - reference = extended.reference@bridge, + reference = extended.reference.bridge, query = query, store.weights = TRUE ) + DefaultAssay(extended.reference) <- 'Bridge' bridge_anchor <- FindBridgeAnchor( - object.list = list(extended.reference@reference, query), - bridge.object = extended.reference@bridge, + object.list = list(DietSeurat(object = extended.reference, assays = 'Bridge'), query), + bridge.object = extended.reference.bridge, reduction = integration.reduction, object.reduction = c(reference.reduction, paste0('ref.', bridge.query.reduction)), bridge.reduction = c(bridge.ref.reduction, bridge.query.reduction), From 8d2b58e683b9747af1ab0c9d9a279411d387f489 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 17 Nov 2022 14:07:29 -0500 Subject: [PATCH 281/979] fix bridge inte anchor --- R/integration.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/integration.R b/R/integration.R index 765d07cdb..8fd54ce5d 100644 --- a/R/integration.R +++ b/R/integration.R @@ -7238,7 +7238,6 @@ FindBridgeIntegrationAnchors <- function( integration.reduction = c('direct', 'cca'), verbose = TRUE ) { - browser() reduction <- match.arg(arg = reduction) integration.reduction <- match.arg(arg = integration.reduction) query.assay <- query.assay %||% DefaultAssay(query) @@ -7250,6 +7249,7 @@ FindBridgeIntegrationAnchors <- function( reference.reduction <- params$reference.reduction bridge.ref.reduction <- paste0( 'ref.', params$bridge.ref.reduction) DefaultAssay(extended.reference) <- bridge.query.assay + extended.reference.bridge <- DietSeurat( object = extended.reference, assays = bridge.query.assay, From ee627fb21721cacb2b0930aaa16de3d2674e7f8c Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Thu, 17 Nov 2022 14:35:20 -0500 Subject: [PATCH 282/979] Merge upstream --- DESCRIPTION | 2 +- NAMESPACE | 1 + R/integration.R | 25 ++++----- R/integration5.R | 4 +- R/objects.R | 12 ++--- man/JointPCAIntegration.Rd | 2 +- man/LeverageScore.Rd | 99 ------------------------------------ man/LeverageScoreSampling.Rd | 40 --------------- 8 files changed, 24 insertions(+), 161 deletions(-) delete mode 100644 man/LeverageScore.Rd delete mode 100644 man/LeverageScoreSampling.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 313d9e684..18ee1644c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -103,7 +103,7 @@ Collate: 'sketching.R' 'tree.R' 'utilities.R' -RoxygenNote: 7.2.1 +RoxygenNote: 7.2.2 Encoding: UTF-8 Suggests: ape, diff --git a/NAMESPACE b/NAMESPACE index 74ee640a2..410efaa60 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -737,6 +737,7 @@ importFrom(rlang,as_name) importFrom(rlang,caller_env) importFrom(rlang,check_installed) importFrom(rlang,enquo) +importFrom(rlang,exec) importFrom(rlang,inform) importFrom(rlang,invoke) importFrom(rlang,is_na) diff --git a/R/integration.R b/R/integration.R index 8fd54ce5d..d1897fc22 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1605,10 +1605,11 @@ IntegrateEmbeddings.IntegrationAnchorSet <- function( intdr.assay <- DefaultAssay(object = reductions) int.assay <- DefaultAssay(object = object.list[[1]]) dims.names <- paste0("drtointegrate-", dims.to.integrate) - cell.names.map <- Cells(x = unintegrated) + # cell.names.map <- Cells(x = unintegrated) + cell.names.map <- colnames(x = unintegrated) names(x = cell.names.map) <- make.unique(names = unname(obj = do.call( what = c, - args = lapply(X = object.list, FUN = Cells))) + args = lapply(X = object.list, FUN = colnames))) ) for (i in 1:length(x = object.list)) { embeddings <- t(x = Embeddings(object = reductions)[cell.names.map[Cells(x = object.list[[i]])], dims.to.integrate]) @@ -5938,8 +5939,8 @@ ValidateParams_IntegrateEmbeddings_TransferAnchors <- function( #' Convert Neighbor class to an asymmetrical Graph class -#' -#' +#' +#' #' @param nn.object A neighbor class object #' @param col.cells Cells names of the neighbors, cell names in nn.object is used by default #' @param weighted Determine if use distance in the Graph @@ -7208,7 +7209,7 @@ FindBridgeTransferAnchors <- function( #' Find integration bridge anchors between query and extended bridge-reference -#' +#' #' Find a set of anchors between unimodal query and the other unimodal reference #' using a pre-computed \code{\link{BridgeReferenceSet}}. #' These integration anchors can later be used to integrate query and reference @@ -7223,7 +7224,7 @@ FindBridgeTransferAnchors <- function( #' \item{cca: perform cca on the on the bridge representation space and then find anchors #' } #' } -#' +#' #' @export #' @return Returns an \code{AnchorSet} object that can be used as input to #' \code{\link{IntegrateEmbeddings}}. @@ -7242,20 +7243,20 @@ FindBridgeIntegrationAnchors <- function( integration.reduction <- match.arg(arg = integration.reduction) query.assay <- query.assay %||% DefaultAssay(query) DefaultAssay(query) <- query.assay - + params <- Command(object = extended.reference, command = 'PrepareBridgeReference') bridge.query.assay <- params$bridge.query.assay bridge.query.reduction <- params$bridge.query.reduction %||% params$supervised.reduction reference.reduction <- params$reference.reduction bridge.ref.reduction <- paste0( 'ref.', params$bridge.ref.reduction) DefaultAssay(extended.reference) <- bridge.query.assay - + extended.reference.bridge <- DietSeurat( - object = extended.reference, - assays = bridge.query.assay, + object = extended.reference, + assays = bridge.query.assay, dimreducs = c(bridge.query.reduction, bridge.ref.reduction, params$laplacian.reduction.name) ) - + query.anchor <- FindTransferAnchors( reference = extended.reference.bridge, reference.reduction = bridge.query.reduction, @@ -7424,7 +7425,7 @@ FastRPCAIntegration <- function( y = object.list[2:length(object.list)] ) - + anchor.feature <- slot(object = anchor, name = 'anchor.features') if (normalization.method != 'SCT') { object_merged <- ScaleData(object = object_merged, diff --git a/R/integration5.R b/R/integration5.R index 8e0618d44..1de120031 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -142,7 +142,7 @@ CCAIntegration <- function( object.sct$split <- groups[,1] object.list <- SplitObject(object = object.sct,split.by = 'split') object.list <- PrepSCTIntegration(object.list, anchor.features = features) - + } else { object.list <- list() for (i in seq_along(along.with = layers)) { @@ -207,7 +207,7 @@ RPCAIntegration <- function( if (normalization.method == 'SCT') { object.sct <- CreateSeuratObject(counts = object, assay = 'SCT') object.sct$split <- groups[,1] - + object.list <- SplitObject(object = object.sct,split.by = 'split') object.list <- PrepSCTIntegration(object.list, anchor.features = features) object.list <- lapply(object.list, function(x) { diff --git a/R/objects.R b/R/objects.R index a39ac46f9..b287cf8b3 100644 --- a/R/objects.R +++ b/R/objects.R @@ -122,21 +122,21 @@ ModalityWeights <- setClass( #' The BridgeReferenceSet Class #' The BridgeReferenceSet is an output from PrepareBridgeReference #' @slot bridge The multi-omic object -#' @slot reference The Reference object only containing bridge representation assay +#' @slot reference The Reference object only containing bridge representation assay #' @slot params A list of parameters used in the PrepareBridgeReference #' @slot command Store log of parameters that were used -#' +#' #' @name BridgeReferenceSet-class #' @rdname BridgeReferenceSet-class #' @concept objects #' @exportClass BridgeReferenceSet -#' +#' BridgeReferenceSet <- setClass( Class = "BridgeReferenceSet", slots = list( bridge = "ANY", reference = "ANY", - params = "list", + params = "list", command = "ANY" ) ) @@ -2505,9 +2505,9 @@ setMethod( ncol(slot(object = object, name = 'bridge')), 'cells and a reference object with ', ncol(slot(object = object, name = 'reference')), - 'cells. \n','The bridge query reduction is ', + 'cells. \n','The bridge query reduction is ', slot(object = object, name = 'params')$bridge.query.reduction %||% - slot(object = object, name = 'params')$supervised.reduction, + slot(object = object, name = 'params')$supervised.reduction, "\n This can be used as input to FindBridgeTransferAnchors and FindBridgeIntegrationAnchors") } ) diff --git a/man/JointPCAIntegration.Rd b/man/JointPCAIntegration.Rd index 5a3b11beb..1880df070 100644 --- a/man/JointPCAIntegration.Rd +++ b/man/JointPCAIntegration.Rd @@ -12,7 +12,7 @@ JointPCAIntegration( new.reduction = "integrated.dr", reference = NULL, features = NULL, - normalization.method = NULL, + normalization.method = c("LogNormalize", "SCT"), dims = 1:30, k.anchor = 20, scale.layer = "scale.data", diff --git a/man/LeverageScore.Rd b/man/LeverageScore.Rd deleted file mode 100644 index b93babe99..000000000 --- a/man/LeverageScore.Rd +++ /dev/null @@ -1,99 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/generics.R, R/integration.R -\name{LeverageScore} -\alias{LeverageScore} -\alias{LeverageScore.default} -\alias{LeverageScore.Assay} -\alias{LeverageScore.Seurat} -\title{Calculate Leverage score for all cells} -\usage{ -LeverageScore(object, ...) - -\method{LeverageScore}{default}( - object, - features = NULL, - nsketch = 5000L, - ndims = NULL, - sampling.method = c("CountSketch", "Gaussian"), - eps = 0.5, - seed = 123, - verbose = TRUE, - ... -) - -\method{LeverageScore}{Assay}( - object, - features = NULL, - nsketch = 5000L, - ndims = NULL, - sampling.method = c("CountSketch", "Gaussian")[1], - slot = "data", - seed = 123, - eps = 0.5, - verbose = TRUE, - ... -) - -\method{LeverageScore}{Seurat}( - object, - features = NULL, - assay = NULL, - nsketch = 5000L, - ndims = NULL, - var.name = "leverage.score", - sampling.method = c("CountSketch", "Gaussian")[1], - slot = "data", - eps = 0.5, - seed = 123, - over.write = FALSE, - verbose = TRUE, - ... -) -} -\arguments{ -\item{object}{A Seurat object} - -\item{...}{Arguments passed to other methods} - -\item{features}{Features used to calculate leverage score} - -\item{nsketch}{Number of rows in the random sketch matrix (default is 5000)} - -\item{ndims}{Number of dimensions in the Johnson–Lindenstrauss (JL) embeddings (default is all dimensions)} - -\item{sampling.method}{Sampling method for generating random matrix -\itemize{ - \item{CountSketch: generate a sparsed \code{CountSketch} random matrix} - \item{Gaussian: generate a gaussian random matrix with mean = 0 and sd = 1 / (ncells ^ 2)} -}} - -\item{eps}{error tolerance for JL embeddings (default is 0.5)} - -\item{seed}{Set a random seed (default is 123)} - -\item{verbose}{Print message and process (default is TRUE)} - -\item{slot}{The slot used for leverage score calculation. data slot is used by default} - -\item{assay}{Assay used to calculate leverage score} - -\item{var.name}{Variable name stored leverage score in the meta.data (default is 'leverage.score')} - -\item{over.write}{Whether to over write the variable with leverage score (default is FALSE)} -} -\value{ -Returns a seurat object with additional column storing leverage score -} -\description{ -Leverage score can be used to sample representative cells from scRNA data. -The more abundant population will be assigned less leverage score. -Leverage-score can guarantee that both abundant and rare populations will -be sampled. We used variable features in the data slot to calculate leverage -score for all cells. -} -\references{ -Clarkson KL, Woodruff DP. -Low Rank Approximation and Regression in Input Sparsity Time. -Journal of the ACM (JACM). 2017 Jan 30;63(6):1-45. -\url{https://https://arxiv.org/abs/1207.6365}; -} diff --git a/man/LeverageScoreSampling.Rd b/man/LeverageScoreSampling.Rd deleted file mode 100644 index 36e482344..000000000 --- a/man/LeverageScoreSampling.Rd +++ /dev/null @@ -1,40 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/integration.R -\name{LeverageScoreSampling} -\alias{LeverageScoreSampling} -\title{Sampling cells from objects based on Leverage score} -\usage{ -LeverageScoreSampling( - object, - num.cells = 5000, - assay = NULL, - features = NULL, - var.name = "leverage.score", - over.write = FALSE, - seed = 123, - ... -) -} -\arguments{ -\item{object}{A Seurat object} - -\item{num.cells}{Number of sampled cells (default is 5000)} - -\item{assay}{Assay used to calculate leverage score} - -\item{features}{Features used to calculate leverage score} - -\item{var.name}{Variable name stored leverage score in the meta.data (default is 'leverage.score')} - -\item{over.write}{Whether to over write the variable with leverage score (default is FALSE)} - -\item{seed}{Set a random seed (default is 123)} - -\item{...}{Arguments passed to LeverageScore} -} -\value{ -Returns a subset Seurat object with sampled cells -} -\description{ -Sampling cells from objects based on Leverage score -} From db20ba2e57619fc504aef8b848be9e512996f904 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Thu, 17 Nov 2022 20:55:08 -0500 Subject: [PATCH 283/979] Update minimum version of SeuratObject --- DESCRIPTION | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 18ee1644c..20a9fd24b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.1.1.9001 -Date: 2022-10-25 +Version: 4.9.9.9000 +Date: 2022-11-17 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( @@ -27,7 +27,7 @@ BugReports: https://github.com/satijalab/seurat/issues Depends: R (>= 4.0.0), methods, - SeuratObject (>= 4.9.9.9036) + SeuratObject (>= 4.9.9.9041) Imports: cluster, cowplot, From bc9d53999c83d1a0eb225aaadf6bbdf700261e13 Mon Sep 17 00:00:00 2001 From: timoast <4591688+timoast@users.noreply.github.com> Date: Mon, 28 Nov 2022 14:22:11 -0500 Subject: [PATCH 284/979] Set key --- R/integration.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/integration.R b/R/integration.R index 05c7b4bf8..ab9174ca2 100644 --- a/R/integration.R +++ b/R/integration.R @@ -4484,7 +4484,8 @@ PairwiseIntegrateReference <- function( scale.data = matrix(), var.features = vector(), meta.features = data.frame(row.names = rownames(x = integrated.data)), - misc = NULL + misc = NULL, + key = paste0(new.assay.name, "_") ) unintegrated[[new.assay.name]] <- new.assay # "unintegrated" now contains the integrated assay @@ -5091,7 +5092,8 @@ TransformDataMatrix <- function( scale.data = matrix(), var.features = vector(), meta.features = data.frame(row.names = rownames(x = new.expression)), - misc = NULL + misc = NULL, + key = paste0(new.assay.name, "_") ) object[[new.assay.name]] <- new.assay return(object) From c1d2fb91770d5d6c15a48aeae07cc090fdacc210 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Tue, 29 Nov 2022 16:43:44 -0500 Subject: [PATCH 285/979] Add v5 methods for FindMarkers --- NAMESPACE | 2 ++ R/differential_expression.R | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 410efaa60..7b4fbafa1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -21,6 +21,7 @@ S3method(FindMarkers,Assay) S3method(FindMarkers,DimReduc) S3method(FindMarkers,SCTAssay) S3method(FindMarkers,Seurat) +S3method(FindMarkers,StdAssay) S3method(FindMarkers,default) S3method(FindNeighbors,Assay) S3method(FindNeighbors,Seurat) @@ -39,6 +40,7 @@ S3method(FindVariableFeatures,default) S3method(FoldChange,Assay) S3method(FoldChange,DimReduc) S3method(FoldChange,Seurat) +S3method(FoldChange,StdAssay) S3method(FoldChange,default) S3method(GetAssay,Seurat) S3method(GetImage,STARmap) diff --git a/R/differential_expression.R b/R/differential_expression.R index e08ff249e..e04141159 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -685,6 +685,11 @@ FindMarkers.Assay <- function( return(de.results) } +#' @method FindMarkers StdAssay +#' @export +#' +FindMarkers.StdAssay <- FindMarkers.Assay + #' @param recorrect_umi Recalculate corrected UMI counts using minimum of the median UMIs when performing DE using multiple SCT objects; default is TRUE #' #' @rdname FindMarkers @@ -1112,6 +1117,11 @@ FoldChange.Assay <- function( ) } +#' @method FoldChange StdAssay +#' @export +#' +FoldChange.StdAssay <- FoldChange.Assay + #' @importFrom Matrix rowMeans #' @rdname FoldChange #' @concept differential_expression From 911afbb9b2bed8ed8bc272eed37fe31a7b4239a9 Mon Sep 17 00:00:00 2001 From: timoast <4591688+timoast@users.noreply.github.com> Date: Wed, 30 Nov 2022 12:38:14 -0500 Subject: [PATCH 286/979] Update meta feature assignment --- R/preprocessing.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index 71ddb051f..221ceaa52 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -2008,7 +2008,7 @@ FindVariableFeatures.Assay <- function( verbose = verbose, ... ) - object[[names(x = hvf.info)]] <- hvf.info + object[names(x = hvf.info)] <- hvf.info hvf.info <- hvf.info[which(x = hvf.info[, 1, drop = TRUE] != 0), ] if (selection.method == "vst") { hvf.info <- hvf.info[order(hvf.info$vst.variance.standardized, decreasing = TRUE), , drop = FALSE] @@ -2039,7 +2039,7 @@ FindVariableFeatures.Assay <- function( no = 'mvp' ) vf.name <- paste0(vf.name, '.variable') - object[[vf.name]] <- rownames(x = object[[]]) %in% top.features + object[vf.name] <- rownames(x = object[]) %in% top.features return(object) } From 9d62f48f093fd053b06aa37211961bd4f9dcbb7c Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 30 Nov 2022 21:59:07 -0500 Subject: [PATCH 287/979] style updates --- R/integration.R | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/R/integration.R b/R/integration.R index d1897fc22..e0feb2198 100644 --- a/R/integration.R +++ b/R/integration.R @@ -3048,7 +3048,13 @@ SelectIntegrationFeatures5 <- function( #' @export #' -SelectSCTIntegrationFeatures <- function(object, nfeatures = 3000, assay = NULL, verbose = TRUE, ...) { +SelectSCTIntegrationFeatures <- function( + object, + nfeatures = 3000, + assay = NULL, + verbose = TRUE, + ... +) { assay <- assay %||% DefaultAssay(object = object) if (!inherits(x = object[[assay]], what = 'SCTAssay')) { abort(message = "'assay' must be an SCTAssay") From 24fc098e477702d2f7186c17aa0783874a2d9f87 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 30 Nov 2022 21:59:43 -0500 Subject: [PATCH 288/979] Use VariableFeatures in ScaleData for v5 assays --- R/preprocessing5.R | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index c94472c6e..62e7794c8 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -638,17 +638,9 @@ ScaleData.StdAssay <- function( message = "'use.umi' is TRUE, please make sure 'layer' specifies raw counts" ) } - features <- features %||% Reduce( - f = union, - x = lapply( - X = layer, - FUN = function(x) { - return(VariableFeatures(object = object, layer = x)) - } - ) - ) + features <- features %||% VariableFeatures(object = object) if (!length(x = features)) { - features <- Reduce(f = union, x = lapply(X = layer, FUN = Features, x = object)) + features <- Features(x = object, layer = layer) } if (isTRUE(x = by.layer)) { if (length(x = save) != length(x = layer)) { From c0effff7d65e92bd3030b705ea0950918ec1fe65 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 30 Nov 2022 22:00:10 -0500 Subject: [PATCH 289/979] Improvements for sketching --- R/sketching.R | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/R/sketching.R b/R/sketching.R index 756a53681..4e2aa7544 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -115,9 +115,13 @@ LeverageScoreSampling <- function( if (!is.na(x = seed)) { set.seed(seed = seed) } + lcells <- Cells(x = object[[assay]], layer = vars[i]) + if (length(x = lcells) < ncells) { + return(lcells) + } return(sample( - x = Cells(x = object[[assay]], layer = vars[i]), - size = ncells, + x = lcells, + size = min(ncells), prob = object[[names(x = vars)[i], drop = TRUE, na.rm = TRUE]] )) }, @@ -167,10 +171,14 @@ LeverageScore.default <- function( ) { # Check the dimensions of the object, nsketch, and ndims ncells <- ncol(x = object) + if (ncells < nsketch) { + warn(message = "Too few cells to sketch, returning score of 1") + return(rep_len(x = 1L, length.out = ncells)) + } if (nrow(x = object) > 5000L) { - stop("too slow", call. = FALSE) + abort(message = "too slow") } else if (nrow(x = object) > (ncells / 1.1)) { - stop("too square", call. = FALSE) + abort(message = "too square") } ndims <- ndims %||% ncells if (nsketch < (1.1 * nrow(x = object))) { @@ -341,7 +349,7 @@ LeverageScore.StdAssay <- function( #' @method LeverageScore Assay #' @export -#' +#' LeverageScore.Assay <- LeverageScore.StdAssay @@ -502,9 +510,9 @@ SketchMatrixProd <- function( method = CountSketch, block.size = 1e9, nsketch = 5000L, - seed = 123L, + seed = 123L, ...) { - + if (is_quosure(x = method)) { method <- eval( expr = quo_get_expr(quo = method), @@ -522,7 +530,7 @@ SketchMatrixProd <- function( for (i in seq_len(length.out = length(x = cells.grid))) { vp <- cells.grid[[i]] block <- DelayedArray::read_block(x = object, viewport = vp, as.sparse = sparse) - + if (sparse) { block <- as(object = block, Class = 'dgCMatrix') } else { From 592c264708fc25a60d8d7bfe00ff19c059cc7d1f Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 30 Nov 2022 22:00:24 -0500 Subject: [PATCH 290/979] Improvements to IntegrateLayers --- R/integration5.R | 12 +++++++++--- man/IntegrateLayers.Rd | 2 +- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/R/integration5.R b/R/integration5.R index 1de120031..65dec750c 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -361,7 +361,7 @@ attr(x = JointPCAIntegration, which = 'Seurat.method') <- 'integration' IntegrateLayers <- function( object, method, - orig = NULL, + orig = 'pca', group.by = NULL, assay = NULL, features = NULL, @@ -394,10 +394,16 @@ IntegrateLayers <- function( } else if (inherits(x = object[[assay]], what = 'StdAssay')) { layers <- Layers(object = object, assay = assay, search = layers) scale.layer <- Layers(object = object, search = scale.layer) - features <- features %||% SelectIntegrationFeatures5( + features <- features %||% VariableFeatures( object = object, - assay = assay + assay = assay, + layer = layers, + nfeatures = 2000L ) + # features <- features %||% SelectIntegrationFeatures5( + # object = object, + # assay = assay + # ) } else { abort(message = "'assay' must be a v5 or SCT assay") } diff --git a/man/IntegrateLayers.Rd b/man/IntegrateLayers.Rd index 85b34ed32..895045fb8 100644 --- a/man/IntegrateLayers.Rd +++ b/man/IntegrateLayers.Rd @@ -7,7 +7,7 @@ IntegrateLayers( object, method, - orig = NULL, + orig = "pca", group.by = NULL, assay = NULL, features = NULL, From d7cd8deb0c11fffe9885206f44500c8cfdb1e073 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Wed, 30 Nov 2022 22:01:01 -0500 Subject: [PATCH 291/979] bump v5 version --- DESCRIPTION | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 20a9fd24b..a5dd16f9f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9000 -Date: 2022-11-17 +Version: 4.9.9.9010 +Date: 2022-11-30 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( @@ -27,7 +27,7 @@ BugReports: https://github.com/satijalab/seurat/issues Depends: R (>= 4.0.0), methods, - SeuratObject (>= 4.9.9.9041) + SeuratObject (>= 4.9.9.9043) Imports: cluster, cowplot, From 0f06a3eaa2206ec95ea8220e7150aa79381dbad0 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Thu, 1 Dec 2022 13:17:58 -0500 Subject: [PATCH 292/979] FindMarkers should only use cells present --- R/differential_expression.R | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/R/differential_expression.R b/R/differential_expression.R index e04141159..ee2bc13cd 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -972,6 +972,18 @@ FindMarkers.Seurat <- function( ident.2 = ident.2, cellnames.use = cellnames.use ) + cells <- sapply( + X = cells, + FUN = intersect, + y = cellnames.use, + simplify = FALSE, + USE.NAMES = TRUE + ) + if (!all(vapply(X = cells, FUN = length, FUN.VALUE = integer(length = 1L)))) { + abort( + message = "Cells in one or both identity groups are not present in the data requested" + ) + } # fetch latent.vars if (!is.null(x = latent.vars)) { latent.vars <- FetchData( From f085cbb9caa18e9a0642b9aeaddb85ef751af671 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 1 Dec 2022 14:40:05 -0500 Subject: [PATCH 293/979] leverage score for small data --- R/sketching.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/sketching.R b/R/sketching.R index 4e2aa7544..3c5bf9bd1 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -155,6 +155,7 @@ LeverageScoreSampling <- function( #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #' @importFrom Matrix qrR t +#' @importFrom irlba irlba #' #' @method LeverageScore default #' @export @@ -172,8 +173,8 @@ LeverageScore.default <- function( # Check the dimensions of the object, nsketch, and ndims ncells <- ncol(x = object) if (ncells < nsketch) { - warn(message = "Too few cells to sketch, returning score of 1") - return(rep_len(x = 1L, length.out = ncells)) + Z <- irlba(A = object, nv = 50, nu = 0, verbose = FALSE)$v + return(rowSums(x = Z ^ 2)) } if (nrow(x = object) > 5000L) { abort(message = "too slow") From 29fe5a49fff0a6a7025dfcc43c8045508336e66d Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 1 Dec 2022 15:53:08 -0500 Subject: [PATCH 294/979] pseudo bulk for delayed array --- R/utilities.R | 41 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index ec4867408..9dab957a6 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -1219,7 +1219,7 @@ PseudobulkExpression <- function( if (!(pb.method %in% c('average', 'aggregate'))) { stop("'pb.method' must be either 'average' or 'aggregate'") } - object.assays <- FilterObjects(object = object, classes.keep = 'Assay') + object.assays <- FilterObjects(object = object, classes.keep = c('Assay', 'Assay5')) assays <- assays %||% object.assays if (!all(assays %in% object.assays)) { assays <- assays[assays %in% object.assays] @@ -1295,6 +1295,8 @@ PseudobulkExpression <- function( }) } data.return <- list() + + for (i in 1:length(x = assays)) { data.use <- GetAssayData( object = object, @@ -1332,7 +1334,11 @@ PseudobulkExpression <- function( warning("Exponentiation yielded infinite values. `data` may not be log-normed.") } } - data.return[[i]] <- data.use %*% category.matrix + if (inherits(x = data.use, what = 'DelayedArray')) { + data.return[[i]] <- tcrossprod_DelayedAssay(x = data.use, y = t(category.matrix)) + } else { + data.return[[i]] <- data.use %*% category.matrix + } names(x = data.return)[i] <- assays[[i]] } if (return.seurat) { @@ -2468,6 +2474,37 @@ crossprod_DelayedAssay <- function(x, y, block.size = 1e8) { rownames(product.mat) <- rownames(x) return(product.mat) } + +# transpose cross product from delayed array +# +tcrossprod_DelayedAssay <- function(x, y, block.size = 1e8) { + # perform x %*% t(y) in blocks for x + if (!inherits(x = x, 'DelayedMatrix')) { + stop('y should a DelayedMatrix') + } + if (ncol(x) != ncol(y)) { + stop('column of x and y should be the same') + } + sparse <- DelayedArray::is_sparse(x = x) + suppressMessages(setAutoBlockSize(size = block.size)) + cells.grid <- DelayedArray::colAutoGrid(x = x) + product.list <- list() + for (i in seq_len(length.out = length(x = cells.grid))) { + vp <- cells.grid[[i]] + vp.range <- vp@ranges[2]@start : (vp@ranges[2]@start + vp@ranges[2]@width - 1) + block <- DelayedArray::read_block(x = x, viewport = vp, as.sparse = sparse) + if (sparse) { + block <- as(object = block, Class = 'dgCMatrix') + } else { + block <- as(object = block, Class = 'Matrix') + } + product.list[[i]] <- as.matrix( block %*% t(y[,vp.range])) + } + product.mat <- Reduce(f = '+', product.list) + colnames(product.mat) <- rownames(y) + rownames(product.mat) <- rownames(x) + return(product.mat) +} # cross product row norm from delayed array # From 35f38a659d2984af4ca49bcb225391fe90f05fba Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Thu, 1 Dec 2022 17:38:58 -0500 Subject: [PATCH 295/979] Suppress warnings for FeaturePlot --- DESCRIPTION | 6 +++--- R/visualization.R | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index a5dd16f9f..cf3b14ccd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9010 -Date: 2022-11-30 +Version: 4.9.9.9011 +Date: 2022-12-01 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( @@ -27,7 +27,7 @@ BugReports: https://github.com/satijalab/seurat/issues Depends: R (>= 4.0.0), methods, - SeuratObject (>= 4.9.9.9043) + SeuratObject (>= 4.9.9.9044) Imports: cluster, cowplot, diff --git a/R/visualization.R b/R/visualization.R index a449e158a..3e7e403cc 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -830,7 +830,7 @@ DimPlot <- function( } reduction <- reduction %||% DefaultDimReduc(object = object) # cells <- cells %||% colnames(x = object) - + ##### Cells for all cells in the assay. #### Cells function should not only get default layer cells <- cells %||% Cells( @@ -1117,7 +1117,7 @@ FeaturePlot <- function( } # Name the reductions dims <- paste0(Key(object = object[[reduction]]), dims) - cells <- cells %||% colnames(x = object) + cells <- cells %||% Cells(x = object[[reduction]]) # Get plotting data data <- FetchData( object = object, From 5cd8465a1cfcf3d930e0d1f034f80ae4222254d4 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Fri, 2 Dec 2022 16:17:50 -0500 Subject: [PATCH 296/979] Always use variable features in LeverageScore, allow users to change VF method for selecting variable featurs --- R/sketching.R | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/R/sketching.R b/R/sketching.R index 3c5bf9bd1..b8ca4474e 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -298,16 +298,16 @@ LeverageScore.DelayedMatrix <- function( } - #' @method LeverageScore StdAssay #' @export #' LeverageScore.StdAssay <- function( object, - features = NULL, + # features = NULL, nsketch = 5000L, ndims = NULL, method = CountSketch, + vf.method = NULL, layer = 'data', eps = 0.5, seed = 123L, @@ -331,7 +331,11 @@ LeverageScore.StdAssay <- function( object = LayerData( object = object, layer = l, - features = features %||% VariableFeatures(object = object, layer = l), + features = VariableFeatures( + object = object, + method = vf.method, + layer = l + ), fast = TRUE ), nsketch = nsketch, @@ -347,24 +351,22 @@ LeverageScore.StdAssay <- function( return(scores) } - #' @method LeverageScore Assay #' @export #' LeverageScore.Assay <- LeverageScore.StdAssay - - #' @method LeverageScore Seurat #' @export #' LeverageScore.Seurat <- function( object, assay = NULL, - features = NULL, + # features = NULL, nsketch = 5000L, ndims = NULL, method = CountSketch, + vf.method = NULL, layer = 'data', eps = 0.5, seed = 123L, @@ -376,10 +378,11 @@ LeverageScore.Seurat <- function( method <- enquo(arg = method) scores <- LeverageScore( object = object[[assay]], - features = features, + # features = features, nsketch = nsketch, ndims = ndims, method = method, + vf.method = vf.method, layer = layer, eps = eps, seed = seed, From 29aa4a9aaf6f7b1f8dde67231d4ba1198f560095 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sun, 4 Dec 2022 14:30:28 -0500 Subject: [PATCH 297/979] set options --- R/integration5.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/R/integration5.R b/R/integration5.R index 65dec750c..39ffcaf5b 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -133,6 +133,7 @@ CCAIntegration <- function( scale.layer = 'scale.data', verbose = TRUE, ...) { + op <- options(Seurat.object.assay.version = "v3") normalization.method <- match.arg(arg = normalization.method) features <- features %||% SelectIntegrationFeatures5(object = object) assay <- assay %||% 'RNA' @@ -174,6 +175,7 @@ CCAIntegration <- function( ) output.list <- list(object_merged[[new.reduction]]) names(output.list) <- c(new.reduction) + on.exit(expr = options(op), add = TRUE) return(output.list) } @@ -199,6 +201,7 @@ RPCAIntegration <- function( groups = NULL, verbose = TRUE, ...) { + op <- options(Seurat.object.assay.version = "v3") normalization.method <- match.arg(arg = normalization.method) features <- features %||% SelectIntegrationFeatures5(object = object) assay <- assay %||% 'RNA' @@ -249,6 +252,7 @@ RPCAIntegration <- function( output.list <- list(object_merged[[new.reduction]]) names(output.list) <- c(new.reduction) + on.exit(expr = options(op), add = TRUE) return(output.list) } @@ -275,6 +279,7 @@ JointPCAIntegration <- function( groups = NULL, ... ) { + op <- options(Seurat.object.assay.version = "v3") normalization.method <- match.arg(arg = normalization.method) features <- features %||% SelectIntegrationFeatures5(object = object) features.diet <- features[1:2] @@ -328,6 +333,7 @@ JointPCAIntegration <- function( verbose = verbose) output.list <- list(object_merged[[new.reduction]]) names(output.list) <- c(new.reduction) + on.exit(expr = options(op), add = TRUE) return(output.list) } From 330ac262f9f2af62a566843f74b4c85fe95e574c Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Mon, 5 Dec 2022 16:37:44 -0500 Subject: [PATCH 298/979] RunUMAP reduction.key defaults to the keyed version of reduction.name Addresses #680 --- R/dimensional_reduction.R | 6 +++--- man/RunUMAP.Rd | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R index cf997f445..bf9bd88e9 100644 --- a/R/dimensional_reduction.R +++ b/R/dimensional_reduction.R @@ -1469,7 +1469,7 @@ RunUMAP.default <- function( } if (is.list(x = object)) { if (ncol(object$idx) != model$n_neighbors) { - warning("Number of neighbors between query and reference ", + warning("Number of neighbors between query and reference ", "is not equal to the number of neighbors within reference") model$n_neighbors <- ncol(object$idx) } @@ -1773,7 +1773,7 @@ RunUMAP.Seurat <- function( dens.var.shift = 0.1, verbose = TRUE, reduction.name = 'umap', - reduction.key = 'UMAP_', + reduction.key = NULL, ... ) { CheckDots(...) @@ -1857,7 +1857,7 @@ RunUMAP.Seurat <- function( dens.lambda = dens.lambda, dens.frac = dens.frac, dens.var.shift = dens.var.shift, - reduction.key = reduction.key, + reduction.key = reduction.key %||% Key(object = reduction.name, quiet = TRUE), verbose = verbose ) object <- LogSeuratCommand(object = object) diff --git a/man/RunUMAP.Rd b/man/RunUMAP.Rd index 54b2319ae..fba7ec869 100644 --- a/man/RunUMAP.Rd +++ b/man/RunUMAP.Rd @@ -103,7 +103,7 @@ RunUMAP(object, ...) dens.var.shift = 0.1, verbose = TRUE, reduction.name = "umap", - reduction.key = "UMAP_", + reduction.key = NULL, ... ) } From 52a20cad386bdfe58a2aa5a16112d5f5d8df9dde Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Mon, 5 Dec 2022 17:45:47 -0500 Subject: [PATCH 299/979] Fixes to DimPlot Addresses #679 --- DESCRIPTION | 2 +- NAMESPACE | 1 + R/visualization.R | 22 +++++++++++++++------- R/zzz.R | 1 + 4 files changed, 18 insertions(+), 8 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index cf3b14ccd..cb5f1d3ad 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,7 +27,7 @@ BugReports: https://github.com/satijalab/seurat/issues Depends: R (>= 4.0.0), methods, - SeuratObject (>= 4.9.9.9044) + SeuratObject (>= 4.9.9.9045) Imports: cluster, cowplot, diff --git a/NAMESPACE b/NAMESPACE index 7b4fbafa1..3f384898f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -742,6 +742,7 @@ importFrom(rlang,enquo) importFrom(rlang,exec) importFrom(rlang,inform) importFrom(rlang,invoke) +importFrom(rlang,is_integerish) importFrom(rlang,is_na) importFrom(rlang,is_quosure) importFrom(rlang,is_scalar_character) diff --git a/R/visualization.R b/R/visualization.R index 3e7e403cc..e47d8a1b1 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -825,25 +825,33 @@ DimPlot <- function( raster = NULL, raster.dpi = c(512, 512) ) { - if (length(x = dims) != 2) { - stop("'dims' must be a two-length vector") + if (!is_bare_integerish(x = dims, n = 2L, finite = TRUE) || !all(dims > 0L)) { + abort(message = "'dims' must be a two-length integer vector") } reduction <- reduction %||% DefaultDimReduc(object = object) # cells <- cells %||% colnames(x = object) - ##### Cells for all cells in the assay. #### Cells function should not only get default layer cells <- cells %||% Cells( x = object, assay = DefaultAssay(object = object[[reduction]]) ) - data <- Embeddings(object = object[[reduction]])[cells, dims] - data <- as.data.frame(x = data) + # data <- Embeddings(object = object[[reduction]])[cells, dims] + # data <- as.data.frame(x = data) dims <- paste0(Key(object = object[[reduction]]), dims) - object[['ident']] <- Idents(object = object) orig.groups <- group.by group.by <- group.by %||% 'ident' - data <- cbind(data, object[[group.by]][cells, , drop = FALSE]) + data <- FetchData( + object = object, + vars = c(dims, group.by), + cells = cells, + clean = 'project' + ) + # cells <- rownames(x = object) + # object[['ident']] <- Idents(object = object) + # orig.groups <- group.by + # group.by <- group.by %||% 'ident' + # data <- cbind(data, object[[group.by]][cells, , drop = FALSE]) group.by <- colnames(x = data)[3:ncol(x = data)] for (group in group.by) { if (!is.factor(x = data[, group])) { diff --git a/R/zzz.R b/R/zzz.R index 7c5dff062..1d42b8ee7 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -8,6 +8,7 @@ #' check_installed #' enquo #' inform +#' is_integerish #' is_na #' is_quosure #' is_scalar_integerish From 0968b300800cc41652dfe314a7a09920ba90c688 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Mon, 5 Dec 2022 19:07:55 -0500 Subject: [PATCH 300/979] Fixes for FeaturePlot for v5 Improve argument checking Use FetchData for more things --- NAMESPACE | 1 + R/visualization.R | 124 +++++++++++++++++++++------------------------ R/zzz.R | 1 + man/FeaturePlot.Rd | 2 +- 4 files changed, 62 insertions(+), 66 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 3f384898f..0ba816320 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -734,6 +734,7 @@ importFrom(reticulate,py_set_seed) importFrom(rlang,"!!") importFrom(rlang,abort) importFrom(rlang,arg_match) +importFrom(rlang,arg_match0) importFrom(rlang,as_label) importFrom(rlang,as_name) importFrom(rlang,caller_env) diff --git a/R/visualization.R b/R/visualization.R index e47d8a1b1..c7eb4932f 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -825,7 +825,7 @@ DimPlot <- function( raster = NULL, raster.dpi = c(512, 512) ) { - if (!is_bare_integerish(x = dims, n = 2L, finite = TRUE) || !all(dims > 0L)) { + if (!is_integerish(x = dims, n = 2L, finite = TRUE) || !all(dims > 0L)) { abort(message = "'dims' must be a two-length integer vector") } reduction <- reduction %||% DefaultDimReduc(object = object) @@ -1026,25 +1026,21 @@ FeaturePlot <- function( ncol = NULL, coord.fixed = FALSE, by.col = TRUE, - sort.cell = NULL, + sort.cell = deprecated(), interactive = FALSE, combine = TRUE, raster = NULL, raster.dpi = c(512, 512) ) { # TODO: deprecate fully on 3.2.0 - if (!is.null(x = sort.cell)) { - warning( - "The sort.cell parameter is being deprecated. Please use the order ", - "parameter instead for equivalent functionality.", - call. = FALSE, - immediate. = TRUE + if (is_present(arg = sort.cell)) { + deprecate_stop( + when = '4.9.0', + what = 'FeaturePlot(sort.cell = )', + with = 'FeaturePlot(order = )' ) - if (isTRUE(x = sort.cell)) { - order <- sort.cell - } } - if (interactive) { + if (isTRUE(x = interactive)) { return(IFeaturePlot( object = object, feature = features[1], @@ -1054,8 +1050,8 @@ FeaturePlot <- function( )) } # Check keep.scale param for valid entries - if (!(is.null(x = keep.scale)) && !(keep.scale %in% c("feature", "all"))) { - stop("`keep.scale` must be set to either `feature`, `all`, or NULL") + if (!is.null(x = keep.scale)) { + keep.scale <- arg_match0(arg = keep.scale, values = c('feature', 'all')) } # Set a theme to remove right-hand Y axis lines # Also sets right-hand Y axis text label formatting @@ -1071,57 +1067,47 @@ FeaturePlot <- function( ) # Get the DimReduc to use reduction <- reduction %||% DefaultDimReduc(object = object) - if (length(x = dims) != 2 || !is.numeric(x = dims)) { - stop("'dims' must be a two-length integer vector") + if (!is_integerish(x = dims, n = 2L, finite = TRUE) && !all(dims > 0L)) { + abort(message = "'dims' must be a two-length integer vector") } # Figure out blending stuff - if (blend && length(x = features) != 2) { - stop("Blending feature plots only works with two features") + if (isTRUE(x = blend) && length(x = features) != 2) { + abort(message = "Blending feature plots only works with two features") } # Set color scheme for blended FeaturePlots - if (blend) { + if (isTRUE(x = blend)) { default.colors <- eval(expr = formals(fun = FeaturePlot)$cols) cols <- switch( EXPR = as.character(x = length(x = cols)), '0' = { - warning( - "No colors provided, using default colors", - call. = FALSE, - immediate. = TRUE - ) + warn(message = "No colors provided, using default colors") default.colors }, '1' = { - warning( - "Only one color provided, assuming specified is double-negative and augmenting with default colors", - call. = FALSE, - immediate. = TRUE - ) + warn(message = paste( + "Only one color provided, assuming", + sQuote(x = cols), + "is double-negative and augmenting with default colors" + )) c(cols, default.colors[2:3]) }, '2' = { - warning( - "Only two colors provided, assuming specified are for features and agumenting with '", - default.colors[1], - "' for double-negatives", - call. = FALSE, - immediate. = TRUE - ) + warn(message = paste( + "Only two colors provided, assuming specified are for features and agumenting with", + sQuote(default.colors[1]), + "for double-negatives", + )) c(default.colors[1], cols) }, '3' = cols, { - warning( - "More than three colors provided, using only first three", - call. = FALSE, - immediate. = TRUE - ) + warn(message = "More than three colors provided, using only first three") cols[1:3] } ) } - if (blend && length(x = cols) != 3) { - stop("Blending feature plots only works with three colors; first one for negative cells") + if (isTRUE(x = blend) && length(x = cols) != 3) { + abort("Blending feature plots only works with three colors; first one for negative cells") } # Name the reductions dims <- paste0(Key(object = object[[reduction]]), dims) @@ -1135,15 +1121,14 @@ FeaturePlot <- function( ) # Check presence of features/dimensions if (ncol(x = data) < 4) { - stop( - "None of the requested features were found: ", + abort(message = paste( + "None of the requested features were found:", paste(features, collapse = ', '), - " in slot ", - slot, - call. = FALSE - ) + "in slot ", + slot + )) } else if (!all(dims %in% colnames(x = data))) { - stop("The dimensions requested were not found", call. = FALSE) + abort(message = "The dimensions requested were not found") } features <- setdiff(x = names(x = data), y = c(dims, 'ident')) # Determine cutoffs @@ -1175,7 +1160,9 @@ FeaturePlot <- function( FUN.VALUE = numeric(length = 1) )) if (length(x = check.lengths) != 1) { - stop("There must be the same number of minimum and maximum cuttoffs as there are features") + abort( + message = "There must be the same number of minimum and maximum cuttoffs as there are features" + ) } names(x = min.cutoff) <- names(x = max.cutoff) <- features brewer.gran <- ifelse( @@ -1253,15 +1240,14 @@ FeaturePlot <- function( ident <- levels(x = data$split)[i] data.plot <- data[as.character(x = data$split) == ident, , drop = FALSE] # Blend expression values - if (blend) { + if (isTRUE(x = blend)) { features <- features[1:2] no.expression <- features[colMeans(x = data.plot[, features]) == 0] if (length(x = no.expression) != 0) { - stop( - "The following features have no value: ", - paste(no.expression, collapse = ', '), - call. = FALSE - ) + abort(message = paste( + "The following features have no value:", + paste(no.expression, collapse = ', ') + )) } data.plot <- cbind(data.plot[, c(dims, 'ident')], BlendExpression(data = data.plot[, features[1:2]])) features <- colnames(x = data.plot)[4:ncol(x = data.plot)] @@ -1270,7 +1256,7 @@ FeaturePlot <- function( for (j in 1:length(x = features)) { feature <- features[j] # Get blended colors - if (blend) { + if (isTRUE(x = blend)) { cols.use <- as.numeric(x = as.character(x = data.plot[, feature])) + 1 cols.use <- colors[[j]][sort(x = unique(x = cols.use))] } else { @@ -1297,7 +1283,7 @@ FeaturePlot <- function( CenterTitle() # theme(plot.title = element_text(hjust = 0.5)) # Add labels - if (label) { + if (isTRUE(x = label)) { plot <- LabelClusters( plot = plot, id = 'ident', @@ -1356,7 +1342,12 @@ FeaturePlot <- function( } else if (length(x = cols) > 1) { unique.feature.exp <- unique(data.plot[, feature]) if (length(unique.feature.exp) == 1) { - warning("All cells have the same value (", unique.feature.exp, ") of ", feature, ".") + warn(message = paste0( + "All cells have the same value (", + unique.feature.exp, + ") of ", + dQuote(x = feature) + )) if (unique.feature.exp == 0) { cols.grad <- cols[1] } else{ @@ -1388,7 +1379,7 @@ FeaturePlot <- function( } } # Add blended color key - if (blend) { + if (isTRUE(x = blend)) { blend.legend <- BlendMap(color.matrix = color.matrix) for (ii in 1:length(x = levels(x = data$split))) { suppressMessages(expr = plots <- append( @@ -1434,17 +1425,17 @@ FeaturePlot <- function( } } ncol <- ifelse( - test = is.null(x = split.by) || blend, + test = is.null(x = split.by) || isTRUE(x = blend), yes = ncol, no = length(x = features) ) - legend <- if (blend) { + legend <- if (isTRUE(x = blend)) { 'none' } else { split.by %iff% 'none' } # Transpose the FeatureHeatmap matrix (not applicable for blended FeaturePlots) - if (combine) { + if (isTRUE(x = combine)) { if (by.col && !is.null(x = split.by) && !blend) { plots <- lapply( X = plots, @@ -1493,7 +1484,10 @@ FeaturePlot <- function( } plots <- plots[c(do.call( what = rbind, - args = split(x = 1:length(x = plots), f = ceiling(x = seq_along(along.with = 1:length(x = plots)) / length(x = features))) + args = split( + x = 1:length(x = plots), + f = ceiling(x = seq_along(along.with = 1:length(x = plots)) / length(x = features)) + ) ))] # Set ncol to number of splits (nrow) and nrow to number of features (ncol) plots <- wrap_plots(plots, ncol = nrow, nrow = ncol) diff --git a/R/zzz.R b/R/zzz.R index 1d42b8ee7..7c11ea7bd 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -3,6 +3,7 @@ #' deprecate_warn is_present #' @importFrom rlang abort #' arg_match +#' arg_match0 #' as_name #' caller_env #' check_installed diff --git a/man/FeaturePlot.Rd b/man/FeaturePlot.Rd index 0c05355ff..6778ee488 100644 --- a/man/FeaturePlot.Rd +++ b/man/FeaturePlot.Rd @@ -35,7 +35,7 @@ FeaturePlot( ncol = NULL, coord.fixed = FALSE, by.col = TRUE, - sort.cell = NULL, + sort.cell = deprecated(), interactive = FALSE, combine = TRUE, raster = NULL, From ee092e13dc0d1961f6a219dcd8750bf787bf1ca6 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Mon, 5 Dec 2022 19:08:41 -0500 Subject: [PATCH 301/979] Bump v5 version Addresses #679 --- DESCRIPTION | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index cb5f1d3ad..e296a7ed5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9011 -Date: 2022-12-01 +Version: 4.9.9.9012 +Date: 2022-12-05 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( @@ -27,7 +27,7 @@ BugReports: https://github.com/satijalab/seurat/issues Depends: R (>= 4.0.0), methods, - SeuratObject (>= 4.9.9.9045) + SeuratObject (>= 4.9.9.9046) Imports: cluster, cowplot, From b380c519692a0dbe4316c0dc61d010d2cf7be9dc Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Tue, 6 Dec 2022 16:07:56 -0500 Subject: [PATCH 302/979] Remove unused code --- R/objects.R | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/R/objects.R b/R/objects.R index b287cf8b3..5bdf0fac7 100644 --- a/R/objects.R +++ b/R/objects.R @@ -2609,24 +2609,6 @@ Collections <- function(object) { return(names(x = collections)) } -# Calculate nCount and nFeature -# -# @param object An Assay object -# -# @return A named list with nCount and nFeature -# -#' @importFrom Matrix colSums -# -CalcN <- function(object) { - if (IsMatrixEmpty(x = GetAssayData(object = object, slot = "counts"))) { - return(NULL) - } - return(list( - nCount = colSums(x = object, slot = 'counts'), - nFeature = colSums(x = GetAssayData(object = object, slot = 'counts') > 0) - )) -} - # Get the default image of an object # # Attempts to find all images associated with the default assay of the object. From 6841649f193d5d2ad9d873c3f8d8d1aa61bb14c5 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Tue, 6 Dec 2022 16:08:53 -0500 Subject: [PATCH 303/979] Minor improvements to PercentageFeatureSet Addresses #673 --- R/utilities.R | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index 9dab957a6..0399e04a4 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -1159,10 +1159,14 @@ PercentageFeatureSet <- function( ) { assay <- assay %||% DefaultAssay(object = object) if (!is.null(x = features) && !is.null(x = pattern)) { - warning("Both pattern and features provided. Pattern is being ignored.") + warn(message = "Both pattern and features provided. Pattern is being ignored.") } - features <- features %||% grep(pattern = pattern, x = rownames(x = object[[assay]]), value = TRUE) - percent.featureset <- colSums(x = GetAssayData(object = object, assay = assay, slot = "counts")[features, , drop = FALSE])/ + features <- features %||% grep( + pattern = pattern, + x = rownames(x = object[[assay]]), + value = TRUE + ) + percent.featureset <- colSums(x = GetAssayData(object = object, assay = assay, slot = "counts")[features, , drop = FALSE]) / object[[paste0("nCount_", assay)]] * 100 if (!is.null(x = col.name)) { object <- AddMetaData(object = object, metadata = percent.featureset, col.name = col.name) @@ -1295,7 +1299,7 @@ PseudobulkExpression <- function( }) } data.return <- list() - + for (i in 1:length(x = assays)) { data.use <- GetAssayData( @@ -2474,7 +2478,7 @@ crossprod_DelayedAssay <- function(x, y, block.size = 1e8) { rownames(product.mat) <- rownames(x) return(product.mat) } - + # transpose cross product from delayed array # tcrossprod_DelayedAssay <- function(x, y, block.size = 1e8) { @@ -2532,7 +2536,7 @@ crossprodNorm_DelayedAssay <- function(x, y, block.size = 1e8) { } norm.vector <- unlist(norm.list) return(norm.vector) - + } # row mean from delayed array @@ -2577,12 +2581,12 @@ RowVarDelayedAssay <- function(x, block.size = 1e8) { } else { row.sum.function <- rowSums2 } - + suppressMessages(setAutoBlockSize(size = block.size)) cells.grid <- DelayedArray::colAutoGrid(x = x) sum2.list <- list() sum.list <- list() - + for (i in seq_len(length.out = length(x = cells.grid))) { vp <- cells.grid[[i]] block <- DelayedArray::read_block(x = x, viewport = vp, as.sparse = sparse) @@ -2596,7 +2600,7 @@ RowVarDelayedAssay <- function(x, block.size = 1e8) { } sum.mat <- Reduce('+', sum.list) sum2.mat <- Reduce('+', sum2.list) - var.mat <- sum2.mat/ncol(x) - (sum.mat/ncol(x))**2 + var.mat <- sum2.mat/ncol(x) - (sum.mat/ncol(x))**2 var.mat <- var.mat * ncol(counts) / (ncol(counts) - 1) return(var.mat) } From e22cbd60b806a71b7982d44919b4b2833abab443 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Tue, 6 Dec 2022 16:09:34 -0500 Subject: [PATCH 304/979] Minor improvements to FeatureScatter Addresses #673 --- R/visualization.R | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/R/visualization.R b/R/visualization.R index c7eb4932f..23db42f0c 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -1951,7 +1951,6 @@ FeatureScatter <- function( set.seed(seed = seed) cells <- sample(x = cells) } - object[['ident']] <- Idents(object = object) group.by <- group.by %||% 'ident' data <- FetchData( object = object, @@ -1959,15 +1958,15 @@ FeatureScatter <- function( cells = cells, slot = slot ) - if (!grepl(pattern = feature1, x = colnames(x = data)[1])) { - stop("Feature 1 (", feature1, ") not found.", call. = FALSE) + if (!grepl(pattern = feature1, x = names(x = data)[1])) { + abort(message = paste("Feature 1", sQuote(x = feature1), "not found")) } - if (!grepl(pattern = feature2, x = colnames(x = data)[2])) { - stop("Feature 2 (", feature2, ") not found.", call. = FALSE) + if (!grepl(pattern = feature2, x = names(x = data)[2])) { + abort(message = paste("Feature 2", sQuote(x = feature2), "not found")) } - data <- as.data.frame(x = data) - feature1 <- colnames(x = data)[1] - feature2 <- colnames(x = data)[2] + feature1 <- names(x = data)[1] + feature2 <- names(x = data)[2] + group.by <- intersect(x = group.by, y = names(x = data)[3:ncol(x = data)]) for (group in group.by) { if (!is.factor(x = data[, group])) { data[, group] <- factor(x = data[, group]) From 1da06b3637e781cafeb53ff4b8777fa9154dea3e Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Tue, 6 Dec 2022 17:23:37 -0500 Subject: [PATCH 305/979] bump v5 version Addresses #673 --- DESCRIPTION | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e296a7ed5..5db617eba 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9012 -Date: 2022-12-05 +Version: 4.9.9.9013 +Date: 2022-12-06 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( @@ -27,7 +27,7 @@ BugReports: https://github.com/satijalab/seurat/issues Depends: R (>= 4.0.0), methods, - SeuratObject (>= 4.9.9.9046) + SeuratObject (>= 4.9.9.9048) Imports: cluster, cowplot, From 3f7f0c326768ebed431b2c948768bac7a5d19be3 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Tue, 6 Dec 2022 17:30:38 -0500 Subject: [PATCH 306/979] Use [ for feature-level meta data instead of [[ Addresses #674 --- R/utilities.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index 0399e04a4..987077d06 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -972,9 +972,9 @@ GroupCorrelation <- function( grp.cors <- as.data.frame(x = grp.cors[which(x = !is.na(x = grp.cors))]) grp.cors$gene_grp <- gene.grp[rownames(x = grp.cors)] colnames(x = grp.cors) <- c("cor", "feature_grp") - object[[assay]][["feature.grp"]] <- grp.cors[, "feature_grp", drop = FALSE] - object[[assay]][[paste0(var, "_cor")]] <- grp.cors[, "cor", drop = FALSE] - if (do.plot) { + object[[assay]]["feature.grp"] <- grp.cors[, "feature_grp", drop = FALSE] + object[[assay]][paste0(var, "_cor")] <- grp.cors[, "cor", drop = FALSE] + if (isTRUE(x = do.plot)) { print(GroupCorrelationPlot( object = object, assay = assay, From e5fca8894c55cdc04a64667e9f94da09c831bd57 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Tue, 6 Dec 2022 17:36:21 -0500 Subject: [PATCH 307/979] fix VariableFeaturePlot with v5 assay --- DESCRIPTION | 2 +- R/visualization.R | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e296a7ed5..08a7ed05a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 4.9.9.9012 +Version: 4.9.9.9013 Date: 2022-12-05 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. diff --git a/R/visualization.R b/R/visualization.R index c7eb4932f..a623d099d 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -2044,9 +2044,12 @@ VariableFeaturePlot <- function( selection.method = selection.method, status = TRUE ) - var.status <- c('no', 'yes')[unlist(x = hvf.info[, ncol(x = hvf.info)]) + 1] + status.col <- colnames(hvf.info)[grepl("variable", colnames(hvf.info))][[1]] + var.status <- c('no', 'yes')[unlist(hvf.info[[status.col]]) + 1] if (colnames(x = hvf.info)[3] == 'dispersion.scaled') { hvf.info <- hvf.info[, c(1, 2)] + } else if (colnames(x = hvf.info)[3] == 'variance.expected') { + hvf.info <- hvf.info[, c(1, 4)] } else { hvf.info <- hvf.info[, c(1, 3)] } From 678141f74c3bada0fc2b599378430d1ba46b441d Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 6 Dec 2022 18:43:22 -0500 Subject: [PATCH 308/979] fix option settings --- R/integration5.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/R/integration5.R b/R/integration5.R index 39ffcaf5b..5a3c7859a 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -133,7 +133,8 @@ CCAIntegration <- function( scale.layer = 'scale.data', verbose = TRUE, ...) { - op <- options(Seurat.object.assay.version = "v3") + op <- options(Seurat.object.assay.version = "v3", Seurat.object.assay.calcn = FALSE) + on.exit(expr = options(op), add = TRUE) normalization.method <- match.arg(arg = normalization.method) features <- features %||% SelectIntegrationFeatures5(object = object) assay <- assay %||% 'RNA' @@ -175,7 +176,6 @@ CCAIntegration <- function( ) output.list <- list(object_merged[[new.reduction]]) names(output.list) <- c(new.reduction) - on.exit(expr = options(op), add = TRUE) return(output.list) } @@ -201,7 +201,8 @@ RPCAIntegration <- function( groups = NULL, verbose = TRUE, ...) { - op <- options(Seurat.object.assay.version = "v3") + op <- options(Seurat.object.assay.version = "v3", Seurat.object.assay.calcn = FALSE) + on.exit(expr = options(op), add = TRUE) normalization.method <- match.arg(arg = normalization.method) features <- features %||% SelectIntegrationFeatures5(object = object) assay <- assay %||% 'RNA' @@ -252,7 +253,6 @@ RPCAIntegration <- function( output.list <- list(object_merged[[new.reduction]]) names(output.list) <- c(new.reduction) - on.exit(expr = options(op), add = TRUE) return(output.list) } @@ -279,7 +279,8 @@ JointPCAIntegration <- function( groups = NULL, ... ) { - op <- options(Seurat.object.assay.version = "v3") + op <- options(Seurat.object.assay.version = "v3", Seurat.object.assay.calcn = FALSE) + on.exit(expr = options(op), add = TRUE) normalization.method <- match.arg(arg = normalization.method) features <- features %||% SelectIntegrationFeatures5(object = object) features.diet <- features[1:2] @@ -333,7 +334,6 @@ JointPCAIntegration <- function( verbose = verbose) output.list <- list(object_merged[[new.reduction]]) names(output.list) <- c(new.reduction) - on.exit(expr = options(op), add = TRUE) return(output.list) } From 835c66c8aa4d440b4507913c8e7bfc71fab40cdf Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Wed, 7 Dec 2022 14:40:56 -0500 Subject: [PATCH 309/979] coerce to valid key name for predicted* assays --- DESCRIPTION | 2 +- R/integration.R | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6d2c1bccf..43860a6b3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 4.9.9.9013 +Version: 4.9.9.9014 Date: 2022-12-06 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. diff --git a/R/integration.R b/R/integration.R index e7d9b262d..bd951b54c 100644 --- a/R/integration.R +++ b/R/integration.R @@ -3467,7 +3467,7 @@ TransferData <- function( predictions <- CreateAssayObject( data = t(x = as.matrix(x = prediction.scores)), check.matrix = FALSE ) - Key(object = predictions) <- paste0("predictionscore", rd.name, "_") + Key(object = predictions) <- paste0("predictionscore", gsub("[._-]", "", rd.name), "_") } if (is.null(x = query)) { transfer.results[[rd]] <- predictions @@ -3494,7 +3494,7 @@ TransferData <- function( } else if (slot == "data") { new.assay <- CreateAssayObject(data = new.data, check.matrix = FALSE) } - Key(object = new.assay) <- paste0(rd.name, "_") + Key(object = new.assay) <- paste0(gsub("[._-]", "", rd.name), "_") if (is.null(x = query)) { transfer.results[[rd]] <- new.assay } else { From 6947fc1ba1ba71917981b2ace39bc925fed2df9d Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Wed, 7 Dec 2022 14:58:39 -0500 Subject: [PATCH 310/979] use Key function --- R/integration.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/integration.R b/R/integration.R index bd951b54c..e842e62f8 100644 --- a/R/integration.R +++ b/R/integration.R @@ -3467,7 +3467,7 @@ TransferData <- function( predictions <- CreateAssayObject( data = t(x = as.matrix(x = prediction.scores)), check.matrix = FALSE ) - Key(object = predictions) <- paste0("predictionscore", gsub("[._-]", "", rd.name), "_") + Key(object = predictions) <- Key(paste0("predictionscore", rd.name), quiet = TRUE) } if (is.null(x = query)) { transfer.results[[rd]] <- predictions @@ -3494,7 +3494,7 @@ TransferData <- function( } else if (slot == "data") { new.assay <- CreateAssayObject(data = new.data, check.matrix = FALSE) } - Key(object = new.assay) <- paste0(gsub("[._-]", "", rd.name), "_") + Key(object = new.assay) <- Key(rd.name, quiet = TRUE) if (is.null(x = query)) { transfer.results[[rd]] <- new.assay } else { From 64b06d26ef1df0f2fc38ec3f74c8f1eac700ee27 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Thu, 8 Dec 2022 16:22:04 -0500 Subject: [PATCH 311/979] Fix duplicate key error in TransformMatrix() --- R/integration.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/integration.R b/R/integration.R index e842e62f8..7bb75c50f 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5291,14 +5291,15 @@ TransformDataMatrix <- function( new.expression <- new.expression[, colnames(object)] new.assay <- new( Class = 'Assay', - key = paste0(new.assay.name,"_"), + # key = paste0(new.assay.name,"_"), counts = new(Class = "dgCMatrix"), data = new.expression, scale.data = matrix(), var.features = vector(), meta.features = data.frame(row.names = rownames(x = new.expression)), misc = NULL, - key = paste0(new.assay.name, "_") + # key = paste0(new.assay.name, "_") + key = Key(object = new.assay.name, quiet = TRUE) ) object[[new.assay.name]] <- new.assay return(object) From 4fb99f3b2748f65f114d3c6b0e57088aa34520c0 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sat, 10 Dec 2022 00:10:09 -0500 Subject: [PATCH 312/979] leverage score for BPCells --- R/sketching.R | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/R/sketching.R b/R/sketching.R index b8ca4474e..5f9cde7e3 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -213,6 +213,9 @@ LeverageScore.default <- function( message("Performing QR decomposition") } sa <- S %*% object + if (!inherits(x = sa, what = 'dgCMatrix')) { + sa <- as(object = sa, Class = 'dgCMatrix') + } qr.sa <- base::qr(x = sa) R <- if (inherits(x = qr.sa, what = 'sparseQR')) { qrR(qr = qr.sa) @@ -230,7 +233,12 @@ LeverageScore.default <- function( seed = seed )) Z <- object %*% (R.inv %*% JL) - return(rowSums(x = Z ^ 2)) + if (inherits(x = Z, what = 'MatrixMultiply')) { + Z.score <- matrix_stats(matrix = Z, row_stats = 'variance')$row_stats['variance',]*ncol(Z) + } else { + Z.score <- rowSums(x = Z ^ 2) + } + return(Z.score) } #' @importFrom Matrix qrR t From 2e41494d4d5ff96701e47c2c67763cc07f673e74 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sat, 10 Dec 2022 00:55:28 -0500 Subject: [PATCH 313/979] BP cells VST --- NAMESPACE | 1 + R/preprocessing5.R | 49 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 0ba816320..3dd056e53 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -128,6 +128,7 @@ S3method(ScaleFactors,VisiumV1) S3method(ScoreJackStraw,DimReduc) S3method(ScoreJackStraw,JackStrawData) S3method(ScoreJackStraw,Seurat) +S3method(VST,BPCells) S3method(VST,DelayedMatrix) S3method(VST,default) S3method(VST,dgCMatrix) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 62e7794c8..6184e606c 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -781,6 +781,55 @@ VST.default <- function( .NotYetImplemented() } +#' @rdname VST +#' @method VST BPcells +#' @export +#' +VST.BPCells <-function( + data, + nselect = 2000L, + span = 0.3, + clip = NULL, + verbose = TRUE, + ... +) { + nfeatures <- nrow(x = data) + hvf.info <- SeuratObject:::EmptyDF(n = nfeatures) + # Calculate feature means + hvf.info$mean <- matrix_stats( + matrix = data, + row_stats = 'mean')$row_stats['mean',] + # Calculate feature variance + hvf.info$variance <- matrix_stats( + matrix = data, + row_stats = 'variance')$row_stats['variance',] + hvf.info$variance.expected <- 0L + not.const <- hvf.info$variance > 0 + fit <- loess( + formula = log10(x = variance) ~ log10(x = mean), + data = hvf.info[not.const, , drop = TRUE], + span = span + ) + hvf.info$variance.expected[not.const] <- 10 ^ fit$fitted + data.standard <- (data - hvf.info$mean) / sqrt(x = hvf.info$variance.expected) + data.standard <- min_scalar( + mat = data.standard, + val = clip %||% sqrt(x = ncol(x = data)) + ) + hvf.info$variance.standardized <- matrix_stats(matrix = data.standard, row_stats = 'variance')$row_stats['variance',] + # Set variable features + hvf.info$variable <- FALSE + hvf.info$rank <- NA + vf <- head( + x = order(hvf.info$variance.standardized, decreasing = TRUE), + n = nselect + ) + hvf.info$variable[vf] <- TRUE + hvf.info$rank[vf] <- seq_along(along.with = vf) + rownames(x = hvf.info) <- rownames(x = data) + return(hvf.info) +} + #' @method VST DelayedMatrix #' @export #' From bcf5e54bbb5f494fa8845db45abd5f1ae8c9f188 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sat, 10 Dec 2022 19:10:02 -0500 Subject: [PATCH 314/979] update unsketch embedding --- NAMESPACE | 1 + R/integration.R | 128 ++++++++++++++++++++++++++++-------------------- 2 files changed, 77 insertions(+), 52 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 3dd056e53..10d3513b3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -401,6 +401,7 @@ export(UMAPPlot) export(UpdateSCTAssays) export(UpdateSeuratObject) export(UpdateSymbolList) +export(UnSketchEmbeddings) export(VST) export(VariableFeaturePlot) export(VariableFeatures) diff --git a/R/integration.R b/R/integration.R index 7bb75c50f..9b82927d6 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1936,6 +1936,26 @@ IntegrateSketchEmbeddings <- function( if (length(atoms.layers) == 1) { atoms.layers <- rep(atoms.layers, length(layers)) } + sketch.matrix <- switch( + EXPR = method, + data = { + R = as.sparse( + x = diag( + x = length( + x = features) + ) + ) + R + }, + sketch = { + R <- t(x = CountSketch( + nsketch = round(x = ratio * length(x = features)), + ncells = length(x = features), + seed = seed) + ) + R + } + ) emb.list <- list() cells.list <- list() for (i in seq_along(along.with = layers)) { @@ -1951,63 +1971,28 @@ IntegrateSketchEmbeddings <- function( ' atomic cells identified in the atoms' ) message("Correcting embeddings") - } - if (inherits(x = object[[orig]][[layers[i]]], what = 'DelayedMatrix') ) { - matrix.prod.function <- crossprod_DelayedAssay - } else { - matrix.prod.function <- crossprod - } - - emb <- switch( - EXPR = method, - 'data' = { - exp.mat <- t( - x = as.matrix( - LayerData( - object = object[[atoms]], - layer = layers[i], - features = features - ) - ) - ) - sketch.transform <- ginv(X = exp.mat) %*% - Embeddings(object = object[[reduction]])[cells.sketch ,] - emb <- matrix.prod.function(x = sketch.transform, - y = LayerData( - object = object[[orig]], - layer = layers[i], - features = features - )) - emb - }, - 'sketch' = { - R <- t(x = CountSketch( - nsketch = round(x = ratio * length(x = features)), - ncells = length(x = features), - seed = seed - )) - exp.mat <- as.matrix(x = t(x = LayerData( - object = object[[atoms]], - layer = atoms.layers[i], - features = features - )[,cells.sketch]) %*% R) - sketch.transform <- ginv(X = exp.mat) %*% - Embeddings(object = object[[reduction]])[cells.sketch ,] - emb <- matrix.prod.function(x = R %*% sketch.transform, - y = LayerData( - object = object[[orig]], - layer = layers[i], - features = features - )) - emb - } + } + emb <- UnSketchEmbeddings( + atom.data = LayerData( + object = object[[atoms]], + layer = layers[i], + features = features + ), + atom.cells = cells.sketch, + orig.data = LayerData( + object = object[[orig]], + layer = layers[i], + features = features + ), + embeddings = Embeddings(object = object[[reduction]]), + sketch.matrix = sketch.matrix ) - emb.list[[i]] <- as.matrix(x = emb) + emb.list[[i]] <- emb cells.list[[i]] <- colnames(x = emb) } emb.all <- t(matrix(data = unlist(emb.list), nrow = length(x = object[[reduction]]), - ncol = ncol(x = object[[orig]]) + ncol = length(unlist(cells.list)) )) rownames(emb.all) <- unlist(cells.list) emb.all <- emb.all[colnames(object[[orig]]), ] @@ -7461,3 +7446,42 @@ FastRPCAIntegration <- function( return(object_merged) } + + +#' Transfer embeddings from sketched cells to the full data +#' +#' @importFrom MASS ginv +#' @importFrom Matrix t +#' +#' @export +#' +UnSketchEmbeddings <- function(atom.data, + atom.cells, + orig.data, + embeddings, + sketch.matrix = NULL +) { + if(!all(rownames(atom.data) == rownames(orig.data))) { + stop('fetures in atom.data and orig.data are not identical') + } else { + features = rownames(atom.data) + } + if (inherits(x = orig.data, what = 'DelayedMatrix') ) { + matrix.prod.function <- crossprod_DelayedAssay + } else { + matrix.prod.function <- crossprod + } + sketch.matrix <- sketch.matrix %||% as.sparse(diag(length(features))) + atom.data <- atom.data[, atom.cells] + orig.data <- orig.data[features,] + embeddings <- embeddings[,atom.cells] + exp.mat <- as.matrix(x = t(x = atom.data) %*% sketch.matrix) + sketch.transform <- ginv(X = exp.mat) %*% embeddings + emb <- matrix.prod.function(x = sketch.matrix %*% sketch.transform, + y = orig.data + ) + emb <- as.matrix(x = emb) + return(emb) +} + + From 1edb073a6584a0130578b515c93a78397babb044 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sat, 10 Dec 2022 19:16:05 -0500 Subject: [PATCH 315/979] features sketch --- R/integration.R | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/R/integration.R b/R/integration.R index 9b82927d6..c13e0a95b 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1948,11 +1948,10 @@ IntegrateSketchEmbeddings <- function( R }, sketch = { - R <- t(x = CountSketch( - nsketch = round(x = ratio * length(x = features)), - ncells = length(x = features), - seed = seed) - ) + R <- FeatureSketch(features = features, + ratio = ratio, + seed = seed + ) R } ) @@ -7485,3 +7484,11 @@ UnSketchEmbeddings <- function(atom.data, } +FeatureSketch <- function(features, ratio = 0.8, seed = 123) { + sketch.R <- t(x = CountSketch( + nsketch = round(x = ratio * length(x = features)), + ncells = length(x = features), + seed = seed) + ) + return(sketch.R) +} \ No newline at end of file From a6cb6636af181edfe398706fc176738de86fb239 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sat, 10 Dec 2022 19:35:28 -0500 Subject: [PATCH 316/979] fix bug in UnSketchEmbeddings --- R/integration.R | 10 ++++++---- R/utilities.R | 13 ++++++++++++- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/R/integration.R b/R/integration.R index c13e0a95b..71b2f5bf9 100644 --- a/R/integration.R +++ b/R/integration.R @@ -7467,17 +7467,19 @@ UnSketchEmbeddings <- function(atom.data, } if (inherits(x = orig.data, what = 'DelayedMatrix') ) { matrix.prod.function <- crossprod_DelayedAssay + } else if(inherits(x = orig.data, what = 'TransformLog1p')) { + matrix.prod.function <- crossprod_BPCells } else { matrix.prod.function <- crossprod } sketch.matrix <- sketch.matrix %||% as.sparse(diag(length(features))) atom.data <- atom.data[, atom.cells] - orig.data <- orig.data[features,] - embeddings <- embeddings[,atom.cells] + embeddings <- embeddings[atom.cells,] exp.mat <- as.matrix(x = t(x = atom.data) %*% sketch.matrix) sketch.transform <- ginv(X = exp.mat) %*% embeddings - emb <- matrix.prod.function(x = sketch.matrix %*% sketch.transform, - y = orig.data + emb <- matrix.prod.function( + x = as.matrix(sketch.matrix %*% sketch.transform), + y = orig.data ) emb <- as.matrix(x = emb) return(emb) diff --git a/R/utilities.R b/R/utilities.R index 987077d06..d2a26898c 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -2475,7 +2475,18 @@ crossprod_DelayedAssay <- function(x, y, block.size = 1e8) { } product.mat <- matrix(data = unlist(product.list), nrow = ncol(x) , ncol = ncol(y)) colnames(product.mat) <- colnames(y) - rownames(product.mat) <- rownames(x) + rownames(product.mat) <- colnames(x) + return(product.mat) +} + + +# cross product from BPCells +# +crossprod_BPCells <- function(x, y) { + # perform t(x) %*% y, y is from BPCells + product.mat <- t(x) %*% y + colnames(product.mat) <- colnames(y) + rownames(product.mat) <- colnames(x) return(product.mat) } From ba4217999276d4fa8e897b5b6e9e768a08280f3d Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Mon, 12 Dec 2022 13:07:52 -0500 Subject: [PATCH 317/979] Add BPCells to Suggests --- DESCRIPTION | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 43860a6b3..933328420 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -24,6 +24,8 @@ Authors@R: c( ) URL: https://satijalab.org/seurat, https://github.com/satijalab/seurat BugReports: https://github.com/satijalab/seurat/issues +Remotes: + bnprks/BPCells Depends: R (>= 4.0.0), methods, @@ -128,4 +130,5 @@ Suggests: metap, enrichR, mixtools, - ggrastr + ggrastr, + BPCells (>= 0.0.0.9000) From 17a597083c1381101e539922bd439a2758c4b57a Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 12 Dec 2022 16:03:00 -0500 Subject: [PATCH 318/979] BPcell convertion --- NAMESPACE | 2 ++ R/integration.R | 5 +++-- R/objects.R | 16 ++++++++++++++++ 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 10d3513b3..a5685b95d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -141,6 +141,8 @@ S3method(as.Seurat,SingleCellExperiment) S3method(as.SingleCellExperiment,Seurat) S3method(as.data.frame,Matrix) S3method(as.sparse,H5Group) +S3method(as.sparse,IterableMatrix) +S3method(as.matrix,IterableMatrix) S3method(components,SCTAssay) S3method(dim,STARmap) S3method(dim,SlideSeq) diff --git a/R/integration.R b/R/integration.R index 71b2f5bf9..5b069c5ec 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1990,7 +1990,7 @@ IntegrateSketchEmbeddings <- function( cells.list[[i]] <- colnames(x = emb) } emb.all <- t(matrix(data = unlist(emb.list), - nrow = length(x = object[[reduction]]), + nrow = ncol(x = object[[reduction]]), ncol = length(unlist(cells.list)) )) rownames(emb.all) <- unlist(cells.list) @@ -7455,7 +7455,7 @@ FastRPCAIntegration <- function( #' @export #' UnSketchEmbeddings <- function(atom.data, - atom.cells, + atom.cells = NULL, orig.data, embeddings, sketch.matrix = NULL @@ -7465,6 +7465,7 @@ UnSketchEmbeddings <- function(atom.data, } else { features = rownames(atom.data) } + atom.cells <- atom.cells %||% colnames(x = atom.data) if (inherits(x = orig.data, what = 'DelayedMatrix') ) { matrix.prod.function <- crossprod_DelayedAssay } else if(inherits(x = orig.data, what = 'TransformLog1p')) { diff --git a/R/objects.R b/R/objects.R index 5bdf0fac7..60eb261eb 100644 --- a/R/objects.R +++ b/R/objects.R @@ -1386,6 +1386,22 @@ as.sparse.H5Group <- function(x, ...) { )) } + +#' @method as.sparse IterableMatrix +#' @export +#' +as.sparse.IterableMatrix <- function(x, ...) { + return(as(object = x, Class = 'dgCMatrix')) +} + + +#' @method as.matrix IterableMatrix +#' @export +#' +as.matrix.IterableMatrix <- function(x, ...) { + return(as.matrix(x = as.sparse(x = x))) +} + #' Get Cell Names #' #' @inheritParams SeuratObject::Cells From 1498b5e888d7778483415e459160a534ca1b30e1 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 12 Dec 2022 17:58:39 -0500 Subject: [PATCH 319/979] BPcells for basic seurat --- NAMESPACE | 4 +++- R/dimensional_reduction.R | 16 ++++++++++++++-- R/preprocessing.R | 38 ++++++++++++++++++++++++++++++++++++++ R/preprocessing5.R | 20 ++++++++++++++++++-- 4 files changed, 73 insertions(+), 5 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index a5685b95d..f50e17da5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -60,6 +60,7 @@ S3method(LeverageScore,default) S3method(LogNormalize,DelayedMatrix) S3method(LogNormalize,H5ADMatrix) S3method(LogNormalize,HDF5Matrix) +S3method(LogNormalize,IterableMatrix) S3method(LogNormalize,TENxMatrix) S3method(LogNormalize,TileDBMatrix) S3method(LogNormalize,V3Matrix) @@ -120,6 +121,7 @@ S3method(SCTransform,Seurat) S3method(SCTransform,StdAssay) S3method(SCTransform,default) S3method(ScaleData,Assay) +S3method(ScaleData,IterableMatrix) S3method(ScaleData,Seurat) S3method(ScaleData,Seurat5) S3method(ScaleData,StdAssay) @@ -128,10 +130,10 @@ S3method(ScaleFactors,VisiumV1) S3method(ScoreJackStraw,DimReduc) S3method(ScoreJackStraw,JackStrawData) S3method(ScoreJackStraw,Seurat) -S3method(VST,BPCells) S3method(VST,DelayedMatrix) S3method(VST,default) S3method(VST,dgCMatrix) +S3method(VST,IterableMatrix) S3method(VST,matrix) S3method(VariableFeatures,SCTAssay) S3method(VariableFeatures,SCTModel) diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R index bf9bd88e9..1ab86d15a 100644 --- a/R/dimensional_reduction.R +++ b/R/dimensional_reduction.R @@ -860,10 +860,22 @@ RunPCA.default <- function( if (!is.null(x = seed.use)) { set.seed(seed = seed.use) } + if (inherits(x = object, what = 'matrix')) { + RowVar.function <- RowVar + } else if (inherits(x = object, what = 'dgCMatrix')) { + RowVar.function <- RowVarSparse + } else if (inherits(x = object, what = 'IterableMatrix')) { + RowVar.function <- function(x) { + return(BPCells::matrix_stats( + matrix = x, + row_stats = 'variance' + )$row_stats['variance',]) + } + } if (rev.pca) { npcs <- min(npcs, ncol(x = object) - 1) pca.results <- irlba(A = object, nv = npcs, ...) - total.variance <- sum(RowVar(x = t(x = object))) + total.variance <- sum(RowVar.function(x = t(x = object))) sdev <- pca.results$d/sqrt(max(1, nrow(x = object) - 1)) if (weight.by.var) { feature.loadings <- pca.results$u %*% diag(pca.results$d) @@ -873,7 +885,7 @@ RunPCA.default <- function( cell.embeddings <- pca.results$v } else { - total.variance <- sum(RowVar(x = object)) + total.variance <- sum(RowVar.function(x = object)) if (approx) { npcs <- min(npcs, nrow(x = object) - 1) pca.results <- irlba(A = t(x = object), nv = npcs, ...) diff --git a/R/preprocessing.R b/R/preprocessing.R index 9487fc307..0cc69ef51 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -2912,6 +2912,44 @@ ScaleData.default <- function( return(scaled.data) } +#' @rdname ScaleData +#' @concept preprocessing +#' @export +#' @method ScaleData IterableMatrix +#' +ScaleData.IterableMatrix <- function( + object, + features = NULL, + do.scale = TRUE, + do.center = TRUE, + scale.max = 10, + ... +) { + features <- features %||% rownames(x = object) + features <- as.vector(x = intersect(x = features, y = rownames(x = object))) + object <- object[features, , drop = FALSE] + if (do.center) { + features.mean <- BPCells::matrix_stats( + matrix = object, + row_stats = 'mean')$row_stats['mean',] + } else { + features.mean <- 0 + } + if (do.scale) { + features.sd <- sqrt(BPCells::matrix_stats( + matrix = object, + row_stats = 'variance')$row_stats['variance',]) + } else { + features.sd <- 1 + } + scaled.data <- (object - features.mean) / features.sd + if (scale.max != Inf) { + scaled.data <- BPCells::min_scalar(mat = scaled.data, val = scale.max) + } +return(scaled.data) +} + + #' @rdname ScaleData #' @concept preprocessing #' @export diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 6184e606c..6afcc07ee 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -351,6 +351,22 @@ LogNormalize.HDF5Matrix <- function( )) } + +#' @method LogNormalize IterableMatrix +#' @export +#' +LogNormalize.IterableMatrix <- function( + data, + scale.factor = 1e4, + margin = 2L, + verbose = TRUE, + ... +) { + data <- BPCells::t(BPCells::t(data) / colSums(data)) + # Log normalization + data <- log1p(data * scale.factor) + return(data) +} #' @method LogNormalize TileDBMatrix #' @export #' @@ -782,10 +798,10 @@ VST.default <- function( } #' @rdname VST -#' @method VST BPcells +#' @method VST IterableMatrix #' @export #' -VST.BPCells <-function( +VST.IterableMatrix <-function( data, nselect = 2000L, span = 0.3, From c2517719d30328ca56135abd7350d956166dff32 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 13 Dec 2022 13:50:06 -0500 Subject: [PATCH 320/979] project BPcels --- R/integration.R | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/R/integration.R b/R/integration.R index 5b069c5ec..6a02b841e 100644 --- a/R/integration.R +++ b/R/integration.R @@ -7334,6 +7334,33 @@ ProjectCellEmbeddings_DelayedAssay <- function( } + +ProjectCellEmbeddings_IterableMatrix <- function( + query.data, + reference, + assay = NULL, + reduction, + dims = NULL, + feature.mean = NULL, + feature.sd = NULL +) { + dims <- dims %||% 1:ncol(reference[[reduction]]) + assay <- assay %||% DefaultAssay(reference) + features <- intersect(rownames(query.data), + rownames(reference[[reduction]]@feature.loadings)) + query.data <- query.data[features,] + feature.mean <- feature.mean[features] %||% + RowMeanSparse(mat = LayerData(object = reference[[assay]], layer = 'data')[features,]) + feature.sd <- feature.sd[features] %||% + sqrt(RowVarSparse(mat = LayerData(object = reference[[assay]], layer = 'data')[features,])) + feature.sd <- MinMax(feature.sd, max = max(feature.sd), min = 0.1) + query.scale <- (query.data - feature.mean)/feature.sd + emb.mat <- t(query.scale) %*% Loadings(object = reference[[reduction]])[features,dims] + rownames(emb.mat) <- colnames(query.data) + colnames(emb.mat) <- colnames(Embeddings(object = reference[[reduction]]))[dims] + return(emb.mat) +} + #' Perform integration on the joint PCA cell embeddings. #' #' This is a convenience wrapper function around the following three functions @@ -7468,7 +7495,7 @@ UnSketchEmbeddings <- function(atom.data, atom.cells <- atom.cells %||% colnames(x = atom.data) if (inherits(x = orig.data, what = 'DelayedMatrix') ) { matrix.prod.function <- crossprod_DelayedAssay - } else if(inherits(x = orig.data, what = 'TransformLog1p')) { + } else if(inherits(x = orig.data, what = 'IterableMatrix')) { matrix.prod.function <- crossprod_BPCells } else { matrix.prod.function <- crossprod From b26bf01bfc4cbabf4071aa38eebf3173093bffb2 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 13 Dec 2022 14:15:24 -0500 Subject: [PATCH 321/979] export projection --- NAMESPACE | 1 + R/integration.R | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index f50e17da5..86c281f38 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -330,6 +330,7 @@ export(PrepareBridgeReference) export(Project) export(ProjectDim) export(ProjectDimReduc) +export(ProjectCellEmbeddings_IterableMatrix) export(ProjectUMAP) export(PurpleAndYellow) export(RPCAIntegration) diff --git a/R/integration.R b/R/integration.R index 6a02b841e..97d82802b 100644 --- a/R/integration.R +++ b/R/integration.R @@ -7334,7 +7334,8 @@ ProjectCellEmbeddings_DelayedAssay <- function( } - +#' @export +#' ProjectCellEmbeddings_IterableMatrix <- function( query.data, reference, From 6caee67d3fcdb500092a4402ca2b9d35194027ca Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Tue, 13 Dec 2022 15:50:42 -0500 Subject: [PATCH 322/979] Better handling of idents in FindClusters --- R/clustering.R | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/R/clustering.R b/R/clustering.R index 63a36a662..e4661e6e0 100644 --- a/R/clustering.R +++ b/R/clustering.R @@ -452,9 +452,16 @@ FindClusters.Seurat <- function( verbose = verbose, ... ) - colnames(x = clustering.results) <- paste0(graph.name, "_", colnames(x = clustering.results)) - object <- AddMetaData(object = object, metadata = clustering.results) - Idents(object = object) <- colnames(x = clustering.results)[ncol(x = clustering.results)] + names(x = clustering.results) <- paste( + graph.name, + names(x = clustering.results), + sep = '_' + ) + # object <- AddMetaData(object = object, metadata = clustering.results) + # Idents(object = object) <- colnames(x = clustering.results)[ncol(x = clustering.results)] + idents.use <- names(x = clustering.results)[ncol(x = clustering.results)] + object[[]] <- clustering.results + Idents(object = object, replace = TRUE) <- object[[idents.use, drop = TRUE]] levels <- levels(x = object) levels <- tryCatch( expr = as.numeric(x = levels), @@ -1610,7 +1617,7 @@ NNHelper <- function(data, query = data, k, method, cache.index = FALSE, ...) { "hnsw" = { args <- args[intersect(x = names(x = args), y = names(x = formals(fun = HnswNN)))] do.call(what = 'HnswNN', args = args) - }, + }, stop("Invalid method. Please choose one of 'rann', 'annoy'") ) ) From 365e3d85bf106656ffe84883a4b8f567e5ee1d4a Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 14 Dec 2022 15:26:38 -0500 Subject: [PATCH 323/979] sweep non zero element --- R/integration.R | 2 -- R/utilities.R | 7 ++++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/R/integration.R b/R/integration.R index 97d82802b..972a09010 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6467,7 +6467,6 @@ TransferLablesNN <- function( } else { stop('wrong weights matrix input') } - reference.labels.matrix <- as.sparse( x = dummy_cols( reference.object[[group.by]] @@ -6478,7 +6477,6 @@ TransferLablesNN <- function( replacement = "", x = colnames(reference.labels.matrix) ) - query.label.mat <- nn.matrix %*% reference.labels.matrix query.label.mat <- query.label.mat/k.nn rownames(x = query.label.mat) <- Cells(nn.object) diff --git a/R/utilities.R b/R/utilities.R index d2a26898c..3313504b7 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -1285,7 +1285,7 @@ PseudobulkExpression <- function( category.matrix <- category.matrix[, colsums > 0] colsums <- colsums[colsums > 0] if (pb.method == 'average') { - category.matrix <- SweepSparse( + category.matrix <- SweepNonzero( x = category.matrix, MARGIN = 2, STATS = colsums, @@ -2618,8 +2618,9 @@ RowVarDelayedAssay <- function(x, block.size = 1e8) { -# sparse version of sweep -SweepSparse <- function( +# nonzero element version of sweep +# +SweepNonzero <- function( x, MARGIN, STATS, From ba89ddbd11b7b365e6d717d2611feb17dadef96d Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 14 Dec 2022 15:53:27 -0500 Subject: [PATCH 324/979] add create category matrix and FindweightsNN --- R/integration.R | 40 ++++++++++++++++++++++++++++++---------- R/utilities.R | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 10 deletions(-) diff --git a/R/integration.R b/R/integration.R index 972a09010..971b8bb28 100644 --- a/R/integration.R +++ b/R/integration.R @@ -4307,6 +4307,34 @@ FindWeights <- function( } +# Find weight matrix between query and reference cells from a neighbor object +# +# +FindWeightsNN <- function( + nn.obj, + query.cells, + reference.cells, + verbose = TRUE +) { + distances <- Distances(object = nn.obj) + distances <- 1 - (distances / distances[, ncol(x = distances)]) + cell.index <- Indices(object = nn.obj) + weights <- Seurat:::FindWeightsC( + cells2 = 0:(length(query.cells) - 1), + distances = as.matrix(x = distances), + anchor_cells2 = reference.cells, + integration_matrix_rownames = reference.cells, + cell_index = cell.index, + anchor_score = rep(1, length(reference.cells)), + min_dist = 0, + sd = 1, + display_progress = verbose + ) + colnames(weights) <- query.cells + return(weights) +} + + # Work out the anchor cell offsets for given set of cells in anchor list # # @param anchors A dataframe of anchors, from AnchorSet object @@ -6467,16 +6495,8 @@ TransferLablesNN <- function( } else { stop('wrong weights matrix input') } - reference.labels.matrix <- as.sparse( - x = dummy_cols( - reference.object[[group.by]] - )[, -1] - ) - colnames(reference.labels.matrix) <- gsub( - pattern = paste0(group.by, "_"), - replacement = "", - x = colnames(reference.labels.matrix) - ) + + reference.labels.matrix <- CreateCategoryMatrix(labels = reference.object[[group.by]]) query.label.mat <- nn.matrix %*% reference.labels.matrix query.label.mat <- query.label.mat/k.nn rownames(x = query.label.mat) <- Cells(nn.object) diff --git a/R/utilities.R b/R/utilities.R index 3313504b7..639d6e118 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -2645,3 +2645,44 @@ SweepNonzero <- function( return(x) } + +#' Create one hot matrix for a given label +#' @export + +CreateCategoryMatrix <- function( + labels, + method = c('sum', 'average') + ) { + method <- match.arg(arg = method) + data <- cbind(labels = labels) + group.by <- colnames(x = data) + category.matrix <- sparse.model.matrix(object = as.formula( + object = paste0( + '~0+', + paste0( + "data[,", + 1:length(x = group.by), + "]", + collapse = ":" + ) + ) + )) + colsums <- colSums(x = category.matrix) + category.matrix <- category.matrix[, colsums > 0] + colsums <- colsums[colsums > 0] + + if (method =='average') { + category.matrix <- Seurat:::SweepNonzero( + x = category.matrix, + MARGIN = 2, + STATS = colsums, + FUN = "/") + } + colnames(x = category.matrix) <- sapply( + X = colnames(x = category.matrix), + FUN = function(name) { + name <- gsub(pattern = "data\\[, [1-9]*\\]", replacement = "", x = name) + return(paste0(rev(x = unlist(x = strsplit(x = name, split = ":"))), collapse = "_")) + }) + return(category.matrix) +} From 62e103e6ef325457f9e0c4f055ec6eefa6d82510 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 14 Dec 2022 18:14:18 -0500 Subject: [PATCH 325/979] transfer sketch to full --- NAMESPACE | 2 + R/integration.R | 99 +++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 90 insertions(+), 11 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 86c281f38..0a2dcd94f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -203,6 +203,7 @@ export(CombinePlots) export(Command) export(CountSketch) export(CreateAssayObject) +export(CreateCategoryMatrix) export(CreateDimReducObject) export(CreateSCTAssayObject) export(CreateSeuratObject) @@ -402,6 +403,7 @@ export(TopCells) export(TopFeatures) export(TopNeighbors) export(TransferData) +export(TransferSketchLabels) export(UMAPPlot) export(UpdateSCTAssays) export(UpdateSeuratObject) diff --git a/R/integration.R b/R/integration.R index 971b8bb28..55eb2f28d 100644 --- a/R/integration.R +++ b/R/integration.R @@ -3509,6 +3509,79 @@ TransferData <- function( } } + + + +#' Transfer data from sketch data to full data +#' @export +#' +TransferSketchLabels <- function( + object, + atoms = 'sketch', + reduction, + dims, + refdata, + k = 50, + reduction.model = NULL, + neighbors = NULL +){ + full_sketch.nn <- neighbors %||% Tool(object = object, slot = 'TransferSketchLabels')$full_sketch.nn + + if (is.null(full_sketch.nn)) { + full_sketch.nn <- Seurat:::NNHelper( + query = Embeddings(object[[reduction]])[, dims], + data = Embeddings(object[[reduction]])[colnames(object[[atoms]]), dims], + k = k, + method = "annoy" + ) + } + full_sketch.weight <- Tool(object = object, slot = 'TransferSketchLabels')$full_sketch.weight + if(is.null(full_sketch.weight)) { + full_sketch.weight <- FindWeightsNN(nn.obj = full_sketch.nn, + query.cells = Cells(object[[reduction]]), + reference = colnames(object[[atoms]]), + verbose = verbose) + } + object@tools$TransferSketchLabels$full_sketch.nn <- full_sketch.nn + object@tools$TransferSketchLabels$full_sketch.weight <- full_sketch.weight + + if (length(refdata) == 1 & is.character(refdata)) { + refdata <- list(refdata) + names(refdata) <- unlist(refdata) + } + + for (rd in 1:length(x = refdata)) { + if (isFALSE(x = refdata[[rd]])) { + transfer.results[[rd]] <- NULL + next + } + rd.name <- names(x = refdata)[rd] + label.rd <- refdata[[rd]] + ## FetchData not work + reference.labels <- object@meta.data[colnames(object[[atoms]]), label.rd] + predicted.labels.list <- TransferLablesNN( + reference.labels = reference.labels, + weight.matrix = full_sketch.weight) + + object[[paste0('predicted.', label.rd)]] <- predicted.labels.list$labels + object[[paste0('predicted.', label.rd, '.score')]] <- predicted.labels.list$scores + } + if (!is.null(reduction.model)) { + if (is.nul(object[[reduction.model]]@misc$model)) { + warning(reduction.model, ' does not have a stored umap model') + return(object) + } + if (ncol(full_sketch.nn) > object[[reduction.model]]@misc$model$n_neighbors) { + full_sketch.nn@nn.idx <- full_sketch.nn@nn.idx[, 1:object[[reduction.model]]@misc$model$n_neighbors] + full_sketch.nn@nn.dist <- full_sketch.nn@nn.dist[, 1:object[[reduction.model]]@misc$model$n_neighbors] + } + proj.umap <- RunUMAP(object = full_sketch.nn, reduction.model = object[[reduction.model]], verbose = verbose) + proj.umap@assay.used <- object[[reduction]]@assay.used + Key(proj.umap) <- paste0('ref', Key(proj.umap)) + object[[paste0('ref.',reduction.model )]] <- proj.umap + } + return(object) +} #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Methods for Seurat-defined generics #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -6468,10 +6541,10 @@ FindBridgeAnchor <- function(object.list, #' TransferLablesNN <- function( nn.object = NULL, - reference.object, - group.by = NULL, - weight.matrix = NULL + weight.matrix = NULL, + reference.labels ){ + reference.labels.matrix <- CreateCategoryMatrix(labels = as.character(reference.labels)) if (!is.null(x = weight.matrix) & !is.null(x = nn.object)) { warning('both nn.object and weight matrix are set. Only weight matrix is used for label transfer') } @@ -6484,26 +6557,28 @@ TransferLablesNN <- function( i = i, j = j, x = 1, - dims = c(nrow(select_nn), ncol(x = reference.object)) + dims = c(nrow(select_nn), nrow(reference.labels.matrix)) ) - } else if (nrow(weight.matrix) == ncol(reference.object)) { + rownames(nn.matrix) <- Cells(nn.object) + } else if (nrow(weight.matrix) == nrow(reference.labels.matrix)) { nn.matrix <- t(weight.matrix) k.nn <- 1 - } else if (ncol(weight.matrix) == ncol(reference.object)) { + } else if (ncol(weight.matrix) == nrow(reference.labels.matrix)) { nn.matrix <- weight.matrix k.nn <- 1 } else { stop('wrong weights matrix input') } - - reference.labels.matrix <- CreateCategoryMatrix(labels = reference.object[[group.by]]) query.label.mat <- nn.matrix %*% reference.labels.matrix query.label.mat <- query.label.mat/k.nn - rownames(x = query.label.mat) <- Cells(nn.object) prediction.max <- apply(X = query.label.mat, MARGIN = 1, FUN = which.max) + query.label <- colnames(x = query.label.mat)[prediction.max] query.label.score <- apply(X = query.label.mat, MARGIN = 1, FUN = max) - + names(query.label) <- names(query.label.score) <- rownames(query.label.mat) + if (is.factor(reference.labels)) { + levels(query.label) <- levels(reference.labels) + } output.list <- list(labels = query.label, scores = query.label.score, prediction.mat = query.label.mat @@ -7540,4 +7615,6 @@ FeatureSketch <- function(features, ratio = 0.8, seed = 123) { seed = seed) ) return(sketch.R) -} \ No newline at end of file +} + + From 99dfe98843ffe65e9b1b5d5eb06cba710128e8a4 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 14 Dec 2022 18:46:08 -0500 Subject: [PATCH 326/979] add verbose --- R/integration.R | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/R/integration.R b/R/integration.R index 55eb2f28d..83d016f8b 100644 --- a/R/integration.R +++ b/R/integration.R @@ -3523,11 +3523,15 @@ TransferSketchLabels <- function( refdata, k = 50, reduction.model = NULL, - neighbors = NULL + neighbors = NULL, + verbose = TRUE ){ - full_sketch.nn <- neighbors %||% Tool(object = object, slot = 'TransferSketchLabels')$full_sketch.nn + full_sketch.nn <- neighbors %||% Tool(object = object, slot = 'TransferSketchLabels')$full_sketch.nn if (is.null(full_sketch.nn)) { + if (verbose) { + message("Finding sketch neighbors") + } full_sketch.nn <- Seurat:::NNHelper( query = Embeddings(object[[reduction]])[, dims], data = Embeddings(object[[reduction]])[colnames(object[[atoms]]), dims], @@ -3537,6 +3541,9 @@ TransferSketchLabels <- function( } full_sketch.weight <- Tool(object = object, slot = 'TransferSketchLabels')$full_sketch.weight if(is.null(full_sketch.weight)) { + if (verbose) { + message("Finding sketch weight matrix") + } full_sketch.weight <- FindWeightsNN(nn.obj = full_sketch.nn, query.cells = Cells(object[[reduction]]), reference = colnames(object[[atoms]]), @@ -3549,7 +3556,9 @@ TransferSketchLabels <- function( refdata <- list(refdata) names(refdata) <- unlist(refdata) } - + if (verbose) { + message("Transfering refdata from sketch") + } for (rd in 1:length(x = refdata)) { if (isFALSE(x = refdata[[rd]])) { transfer.results[[rd]] <- NULL @@ -3567,10 +3576,13 @@ TransferSketchLabels <- function( object[[paste0('predicted.', label.rd, '.score')]] <- predicted.labels.list$scores } if (!is.null(reduction.model)) { - if (is.nul(object[[reduction.model]]@misc$model)) { + if (is.null(object[[reduction.model]]@misc$model)) { warning(reduction.model, ' does not have a stored umap model') return(object) } + if (verbose) { + message("Projection to sketch umap") + } if (ncol(full_sketch.nn) > object[[reduction.model]]@misc$model$n_neighbors) { full_sketch.nn@nn.idx <- full_sketch.nn@nn.idx[, 1:object[[reduction.model]]@misc$model$n_neighbors] full_sketch.nn@nn.dist <- full_sketch.nn@nn.dist[, 1:object[[reduction.model]]@misc$model$n_neighbors] From 1f2ce4edc41b693ac47c19b4d80964eda3c30295 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 15 Dec 2022 18:06:17 -0500 Subject: [PATCH 327/979] BPcells mean and variance --- R/preprocessing5.R | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 6afcc07ee..f4fa9d8d2 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -811,14 +811,13 @@ VST.IterableMatrix <-function( ) { nfeatures <- nrow(x = data) hvf.info <- SeuratObject:::EmptyDF(n = nfeatures) - # Calculate feature means - hvf.info$mean <- matrix_stats( + hvf.stats <- matrix_stats( matrix = data, - row_stats = 'mean')$row_stats['mean',] + row_stats = 'variance')$row_stats + # Calculate feature means + hvf.info$mean <- hvf.stats['mean',] # Calculate feature variance - hvf.info$variance <- matrix_stats( - matrix = data, - row_stats = 'variance')$row_stats['variance',] + hvf.info$variance <- hvf.stats['variance',] hvf.info$variance.expected <- 0L not.const <- hvf.info$variance > 0 fit <- loess( From 399d67b7edee3403dd5a627cbff5d0a40226610f Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sun, 18 Dec 2022 00:41:43 -0500 Subject: [PATCH 328/979] BPCells mapping --- R/integration.R | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/R/integration.R b/R/integration.R index 83d016f8b..0ed69a5fa 100644 --- a/R/integration.R +++ b/R/integration.R @@ -924,15 +924,24 @@ FindTransferAnchors <- function( approx = approx.pca ) } - projected.pca <- ProjectCellEmbeddings( - reference = reference, - reduction = reference.reduction, - query = query, - scale = scale, - dims = dims, - feature.mean = feature.mean, - verbose = verbose - ) + if (inherits(x = query[[query.assay]]$data, what = 'IterableMatrix')) { + projected.pca <- ProjectCellEmbeddings_IterableMatrix( + query.data = query[[query.assay]]$data, + reference = reference, + dims = dims, + reduction = reference.reduction) + } else { + projected.pca <- ProjectCellEmbeddings( + reference = reference, + reduction = reference.reduction, + query = query, + scale = scale, + dims = dims, + feature.mean = feature.mean, + verbose = verbose + ) + } + orig.embeddings <- Embeddings(object = reference[[reference.reduction]])[, dims] orig.loadings <- Loadings(object = reference[[reference.reduction]]) } @@ -3594,6 +3603,8 @@ TransferSketchLabels <- function( } return(object) } + + #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Methods for Seurat-defined generics #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% From db8653ee27d5d999131c4e9a836890bed59431b5 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sun, 18 Dec 2022 08:58:38 -0500 Subject: [PATCH 329/979] project embeddings generic --- R/generics.R | 37 +++++++++++++++++++++++++++++++++++++ R/integration.R | 37 ++++++++++++++++++++++++++++++------- 2 files changed, 67 insertions(+), 7 deletions(-) diff --git a/R/generics.R b/R/generics.R index c8ba1c8a7..d9da8e9e5 100644 --- a/R/generics.R +++ b/R/generics.R @@ -376,6 +376,43 @@ NormalizeData <- function(object, ...) { UseMethod(generic = 'NormalizeData', object = object) } +#' Project query data to the reference dimensional reduction +#' +#' +#' @param query An object for query cells +#' @param reference An object for reference cells +#' @param query.assay Assay name for query object +#' @param reference.assay Assay name for reference object +#' @param reduction Name of dimensional reduction from reference object +#' @param dims Dimensions used for reference dimensional reduction +#' @param scale Determine if scale query data based on reference data variance +#' @param verbose Print progress +#' @param feature.mean Mean of features in reference +#' @param feature.sd Standard variance of features in reference +#' +#' @return A matrix with projected cell embeddings +#' +#' @rdname ProjectCellEmbeddings +#' @export ProjectCellEmbeddings +#' +#' @keywords internal +#' +ProjectCellEmbeddings <- function( + query, + reference, + query.assay = NULL, + reference.assay = NULL, + reduction = "pca", + dims = 1:50, + scale = TRUE, + verbose = TRUE, + feature.mean = NULL, + feature.sd = NULL + ... +) { + UseMethod(generic = 'ProjectCellEmbeddings', object = query) +} + #' Project query into UMAP coordinates of a reference #' #' This function will take a query dataset and project it into the coordinates diff --git a/R/integration.R b/R/integration.R index 0ed69a5fa..5320c84b4 100644 --- a/R/integration.R +++ b/R/integration.R @@ -4932,7 +4932,27 @@ RescaleQuery <- function( return(proj.data) } -ProjectCellEmbeddings <- function( +#' @rdname ProjectCellEmbeddings +#' @method ProjectCellEmbeddings Assay +#' @export +#' +#' +ProjectCellEmbeddings.Assay <- function() + + + +#' @rdname ProjectCellEmbeddings +#' @method ProjectCellEmbeddings StdAssay +#' @export +#' +ProjectCellEmbeddings.StdAssay <- function() + + +#' @rdname ProjectCellEmbeddings +#' @method ProjectCellEmbeddings default +#' @export +#' +ProjectCellEmbeddings.default <- function( reference, query, reduction = "pca", @@ -7395,10 +7415,13 @@ FindBridgeIntegrationAnchors <- function( ) return(bridge_anchor) } -## project delayed array to reference PCA -ProjectCellEmbeddings_DelayedAssay <- function( +#' @rdname ProjectCellEmbeddings +#' @method ProjectCellEmbeddings DelayedMatrix +#' @export +#' +ProjectCellEmbeddings.DelayedMatrix <- function( query.data, block.size = 1e9, reference, @@ -7420,7 +7443,6 @@ ProjectCellEmbeddings_DelayedAssay <- function( } else { feature.mean <- feature.mean[features] %||% RowMeanSparse(mat = LayerData(object = reference[[assay]], layer = 'data')[features,]) - feature.sd <- feature.sd[features] %||% sqrt(RowVarSparse(mat = LayerData(object = reference[[assay]], layer = 'data')[features,])) feature.sd <- MinMax(feature.sd, max = max(feature.sd), min = 0.1) @@ -7445,14 +7467,15 @@ ProjectCellEmbeddings_DelayedAssay <- function( rownames(emb.mat) <- colnames(query.data) colnames(emb.mat) <- colnames(reference[[reduction]]@cell.embeddings)[dims] } - return(emb.mat) } - +#' @rdname ProjectCellEmbeddings +#' @method ProjectCellEmbeddings IterableMatrix #' @export +#' #' -ProjectCellEmbeddings_IterableMatrix <- function( +ProjectCellEmbeddings.IterableMatrix <- function( query.data, reference, assay = NULL, From 92db4b3a8d29fccb50584decc2b75344e27b28a4 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sun, 18 Dec 2022 18:45:24 -0500 Subject: [PATCH 330/979] complete ProjectCellEmbeddings generic --- NAMESPACE | 15 +- R/generics.R | 9 - R/integration.R | 494 ++++++++++++++++++++----------------- man/FeaturePlot.Rd | 8 +- man/IntegrateData.Rd | 6 +- man/IntegrateEmbeddings.Rd | 6 +- man/PolyFeaturePlot.Rd | 5 +- man/ScaleData.Rd | 10 + man/Seurat-package.Rd | 2 +- man/VST.Rd | 3 + man/reexports.Rd | 2 +- 11 files changed, 305 insertions(+), 255 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 0a2dcd94f..c23de4ee8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -74,6 +74,13 @@ S3method(NormalizeData,Seurat5) S3method(NormalizeData,StdAssay) S3method(NormalizeData,V3Matrix) S3method(NormalizeData,default) +S3method(ProjectCellEmbeddings,Assay) +S3method(ProjectCellEmbeddings,DelayedMatrix) +S3method(ProjectCellEmbeddings,IterableMatrix) +S3method(ProjectCellEmbeddings,SCTAssay) +S3method(ProjectCellEmbeddings,Seurat) +S3method(ProjectCellEmbeddings,StdAssay) +S3method(ProjectCellEmbeddings,default) S3method(ProjectUMAP,DimReduc) S3method(ProjectUMAP,Seurat) S3method(ProjectUMAP,default) @@ -131,9 +138,9 @@ S3method(ScoreJackStraw,DimReduc) S3method(ScoreJackStraw,JackStrawData) S3method(ScoreJackStraw,Seurat) S3method(VST,DelayedMatrix) +S3method(VST,IterableMatrix) S3method(VST,default) S3method(VST,dgCMatrix) -S3method(VST,IterableMatrix) S3method(VST,matrix) S3method(VariableFeatures,SCTAssay) S3method(VariableFeatures,SCTModel) @@ -142,9 +149,9 @@ S3method(as.Seurat,CellDataSet) S3method(as.Seurat,SingleCellExperiment) S3method(as.SingleCellExperiment,Seurat) S3method(as.data.frame,Matrix) +S3method(as.matrix,IterableMatrix) S3method(as.sparse,H5Group) S3method(as.sparse,IterableMatrix) -S3method(as.matrix,IterableMatrix) S3method(components,SCTAssay) S3method(dim,STARmap) S3method(dim,SlideSeq) @@ -329,9 +336,9 @@ export(PrepSCTFindMarkers) export(PrepSCTIntegration) export(PrepareBridgeReference) export(Project) +export(ProjectCellEmbeddings) export(ProjectDim) export(ProjectDimReduc) -export(ProjectCellEmbeddings_IterableMatrix) export(ProjectUMAP) export(PurpleAndYellow) export(RPCAIntegration) @@ -405,10 +412,10 @@ export(TopNeighbors) export(TransferData) export(TransferSketchLabels) export(UMAPPlot) +export(UnSketchEmbeddings) export(UpdateSCTAssays) export(UpdateSeuratObject) export(UpdateSymbolList) -export(UnSketchEmbeddings) export(VST) export(VariableFeaturePlot) export(VariableFeatures) diff --git a/R/generics.R b/R/generics.R index d9da8e9e5..6d0c54072 100644 --- a/R/generics.R +++ b/R/generics.R @@ -399,15 +399,6 @@ NormalizeData <- function(object, ...) { #' ProjectCellEmbeddings <- function( query, - reference, - query.assay = NULL, - reference.assay = NULL, - reduction = "pca", - dims = 1:50, - scale = TRUE, - verbose = TRUE, - feature.mean = NULL, - feature.sd = NULL ... ) { UseMethod(generic = 'ProjectCellEmbeddings', object = query) diff --git a/R/integration.R b/R/integration.R index 5320c84b4..56a436c1a 100644 --- a/R/integration.R +++ b/R/integration.R @@ -924,13 +924,6 @@ FindTransferAnchors <- function( approx = approx.pca ) } - if (inherits(x = query[[query.assay]]$data, what = 'IterableMatrix')) { - projected.pca <- ProjectCellEmbeddings_IterableMatrix( - query.data = query[[query.assay]]$data, - reference = reference, - dims = dims, - reduction = reference.reduction) - } else { projected.pca <- ProjectCellEmbeddings( reference = reference, reduction = reference.reduction, @@ -940,8 +933,6 @@ FindTransferAnchors <- function( feature.mean = feature.mean, verbose = verbose ) - } - orig.embeddings <- Embeddings(object = reference[[reference.reduction]])[, dims] orig.loadings <- Loadings(object = reference[[reference.reduction]]) } @@ -4867,123 +4858,317 @@ ParseRow <- function(clustering, i){ return(unlist(datasets)) } -# Rescale query with mean and sd from reference, or known mean and SD -# -# @param reference A reference object -# @param query A query object -# @param features Features to scale -# @param scale Scale data (divide by SD) -# @return Returns a matrix containing the scaled query data -RescaleQuery <- function( + +#' @rdname ProjectCellEmbeddings +#' @method ProjectCellEmbeddings Seurat +#' @export +#' +#' +ProjectCellEmbeddings.Seurat <- function( + query, reference, + query.assay = NULL, + reference.assay = NULL, + reduction = "pca", + dims = 1:50, + scale = TRUE, + verbose = TRUE, + feature.mean = NULL, + feature.sd = NULL +) { + if (verbose) { + message("Projecting cell embeddings") + } + query.assay <- query.assay %||% DefaultAssay(object = query) + proj.pca <- ProjectCellEmbeddings( + query = query[[query.assay]], + reference = reference, + reference.assay = reference.assay, + reduction = reduction, + dims = dims, + scale = scale, + verbose = verbose, + feature.mean = feature.mean, + feature.sd = feature.sd + ) + return(proj.pca) +} + +#' @rdname ProjectCellEmbeddings +#' @method ProjectCellEmbeddings Assay +#' @export +#' +ProjectCellEmbeddings.Assay <- function( query, + reference, reference.assay = NULL, - query.assay = NULL, - features = NULL, + reduction = "pca", + dims = 1:50, + scale = TRUE, + verbose = TRUE, feature.mean = NULL, - feature.sd = NULL, - scale = TRUE + feature.sd = NULL ) { reference.assay <- reference.assay %||% DefaultAssay(object = reference) - query.assay <- query.assay %||% DefaultAssay(object = query) - features <- features %||% intersect( + features <- Reduce( + f = intersect, + x = list( + rownames(x = Loadings(object = reference[[reduction]])), rownames(x = reference[[reference.assay]]), - rownames(x = query[[query.assay]]) + rownames(x = query) + ) ) + proj.pca <- ProjectCellEmbeddings( + query = GetAssayData( + object = query, + slot = "data")[features,], + reference = reference, + reference.assay = reference.assay, + reduction = reduction, + dims = dims, + scale = scale, + verbose = verbose, + feature.mean = feature.mean, + feature.sd = feature.sd + ) + return(proj.pca) +} + +#' @rdname ProjectCellEmbeddings +#' @method ProjectCellEmbeddings SCTAssay +#' @export +#' +ProjectCellEmbeddings.SCTAssay <- function( + query, + reference, + reference.assay = NULL, + reduction = "pca", + dims = 1:50, + scale = TRUE, + verbose = TRUE, + feature.mean = NULL, + feature.sd = NULL +) { + features <- Reduce( + f = intersect, + x = list( + rownames(x = Loadings(object = reference[[reduction]])), + rownames(x = reference[[reference.assay]]), + rownames(x = query) + ) + ) + query.data <- GetAssayData( + object = query, + slot = "data")[features,] + ref.feature.loadings <- Loadings(object = reference[[reduction]])[features, dims] + proj.pca <- t(crossprod(x = ref.feature.loadings, y = query.data)) + return(proj.pca) +} + +#' @rdname ProjectCellEmbeddings +#' @method ProjectCellEmbeddings StdAssay +#' @export +#' +ProjectCellEmbeddings.StdAssay <- function( + query, + reference, + reference.assay = NULL, + reduction = "pca", + dims = 1:50, + scale = TRUE, + verbose = TRUE, + feature.mean = NULL, + feature.sd = NULL +) { + reference.assay <- reference.assay %||% DefaultAssay(object = reference) + features <- Reduce( + f = intersect, + x = list( + rownames(x = Loadings(object = reference[[reduction]])), + rownames(x = reference[[reference.assay]]), + rownames(x = query) + ) + ) + layers.set <- Layers(object = query, search = 'data') + proj.pca.list <- list() + cell.list <- list() + for (i in seq_along(layers.set)) { + proj.pca.list[[i]] <- t(ProjectCellEmbeddings( + query = LayerData(object = query, layer = layers.set[i], features = features), + reference = reference, + reference.assay = reference.assay, + reduction = reduction, + dims = dims, + scale = scale, + verbose = verbose, + feature.mean = feature.mean, + feature.sd = feature.sd + )) + cell.list[[i]] <- colnames(proj.pca.list[[i]]) + } + proj.pca <- matrix( + data = unlist(proj.pca.list), + nrow = nrow(proj.pca.list[[1]]), + ncol = ncol(query) + ) + rownames(proj.pca) <- rownames(proj.pca.list[[1]]) + colnames(proj.pca) <- unlist(cell.list) + proj.pca <- t(proj.pca) + proj.pca <- proj.pca[colnames(query),] + return(proj.pca) +} + +#' @rdname ProjectCellEmbeddings +#' @method ProjectCellEmbeddings default +#' @export +#' +ProjectCellEmbeddings.default <- function( + query, + reference, + reference.assay = NULL, + reduction = "pca", + dims = 1:50, + scale = TRUE, + verbose = TRUE, + feature.mean = NULL, + feature.sd = NULL +){ + features <- rownames(query) reference.data <- GetAssayData( object = reference, assay = reference.assay, slot = "data")[features, ] - query.data <- GetAssayData( - object = query, - assay = query.assay, - slot = "data")[features, ] - if (is.null(x = feature.mean)) { - feature.mean <- rowMeans(x = reference.data) +if (is.null(x = feature.mean)) { + if (inherits(x = reference.data, what = 'dgCMatrix')) { + feature.mean <- RowMeanSparse(mat = reference.data) + } else { + feature.mean <- rowMeans(mat = reference.data) + } if (scale) { - feature.sd <- sqrt( - x = SparseRowVar2( - mat = as.sparse(x = reference.data), - mu = feature.mean, - display_progress = FALSE + feature.sd <- sqrt( + x = RowVarSparse( + mat = as.sparse(reference.data) ) ) feature.sd[is.na(x = feature.sd)] <- 1 } else { - feature.sd <- rep(x = 1, nrow( reference.data)) + feature.sd <- rep(x = 1, nrow(x = reference.data)) } feature.mean[is.na(x = feature.mean)] <- 1 } - proj.data <- GetAssayData( - object = query, - assay = query.assay, - slot = "data" - )[features, ] - store.names <- dimnames(x = proj.data) - if (is.numeric(x = feature.mean) && feature.mean[[1]] != "SCT") { - proj.data <- FastSparseRowScaleWithKnownStats( - mat = as.sparse(x = proj.data), + store.names <- dimnames(x = query) + if (is.numeric(x = feature.mean)) { + query <- FastSparseRowScaleWithKnownStats( + mat = as.sparse(x = query), mu = feature.mean, sigma = feature.sd, display_progress = FALSE ) } - dimnames(x = proj.data) <- store.names - return(proj.data) -} - -#' @rdname ProjectCellEmbeddings -#' @method ProjectCellEmbeddings Assay -#' @export -#' -#' -ProjectCellEmbeddings.Assay <- function() - - - -#' @rdname ProjectCellEmbeddings -#' @method ProjectCellEmbeddings StdAssay -#' @export -#' -ProjectCellEmbeddings.StdAssay <- function() + dimnames(x = query) <- store.names + ref.feature.loadings <- Loadings(object = reference[[reduction]])[features, dims] + proj.pca <- t(crossprod(x = ref.feature.loadings, y = query)) + return(proj.pca) +} #' @rdname ProjectCellEmbeddings -#' @method ProjectCellEmbeddings default +#' @method ProjectCellEmbeddings IterableMatrix #' @export #' -ProjectCellEmbeddings.default <- function( - reference, +#' +ProjectCellEmbeddings.IterableMatrix <- function( query, - reduction = "pca", + reference, reference.assay = NULL, - query.assay = NULL, + reduction = "pca", dims = 1:50, scale = TRUE, verbose = TRUE, feature.mean = NULL, feature.sd = NULL ) { - if (verbose) { - message("Projecting cell embeddings") + features <- rownames(query) + reference.data <- LayerData(object = reference[[reference.assay]], layer = 'data')[features,] + if (is.null(x = feature.mean)) { + if (inherits(x = reference.data, what = 'dgCMatrix')) { + feature.mean <- RowMeanSparse(mat = reference.data) + } else { + feature.mean <- rowMeans(mat = reference.data) + } + if (scale) { + feature.sd <- sqrt( + x = RowVarSparse( + mat = as.sparse(reference.data) + ) + ) + feature.sd[is.na(x = feature.sd)] <- 1 + } else { + feature.sd <- rep(x = 1, nrow(x = reference.data)) + } + feature.mean[is.na(x = feature.mean)] <- 1 } - reference.assay <- reference.assay %||% DefaultAssay(object = reference) - query.assay <- query.assay %||% DefaultAssay(object = query) - features <- rownames(x = Loadings(object = reference[[reduction]])) - features <- intersect(x = features, y = rownames(x = query[[query.assay]])) - proj.data <- RescaleQuery( - reference = reference, - query = query, - features = features, - scale = scale, - feature.mean = feature.mean, - feature.sd = feature.sd - ) - ref.feature.loadings <- Loadings(object = reference[[reduction]])[features, dims] - proj.pca <- t(crossprod(x = ref.feature.loadings, y = proj.data)) + query.scale <- (query - feature.mean)/feature.sd + query.scale <- BPCells::min_scalar(mat = query.scale, val = 10) + proj.pca <- t(query.scale) %*% Loadings(object = reference[[reduction]])[features,dims] + rownames(proj.pca) <- colnames(query) + colnames(proj.pca) <- colnames(Embeddings(object = reference[[reduction]]))[dims] return(proj.pca) } +#' @rdname ProjectCellEmbeddings +#' @method ProjectCellEmbeddings DelayedMatrix +#' @export +#' +ProjectCellEmbeddings.DelayedMatrix <- function( + query.data, + block.size = 1e9, + reference, + assay = NULL, + reduction, + dims = NULL, + feature.mean = NULL, + feature.sd = NULL +) { + dims <- dims %||% 1:ncol(reference[[reduction]]) + assay <- assay %||% DefaultAssay(reference) + features <- intersect(rownames(query.data), + rownames(reference[[reduction]]@feature.loadings)) + query.data <- query.data[features,] + if (IsSCT(object[[assay]])) { + # TODO: SCT reiduals projection + } else { + feature.mean <- feature.mean[features] %||% + RowMeanSparse(mat = LayerData(object = reference[[assay]], layer = 'data')[features,]) + feature.sd <- feature.sd[features] %||% + sqrt(RowVarSparse(mat = LayerData(object = reference[[assay]], layer = 'data')[features,])) + feature.sd <- MinMax(feature.sd, max = max(feature.sd), min = 0.1) + suppressMessages(setAutoBlockSize(size = block.size)) + cells.grid <- DelayedArray::colAutoGrid(x = query.data) + emb.list <- list() + for (i in seq_len(length.out = length(x = cells.grid))) { + vp <- cells.grid[[i]] + data.block <- DelayedArray::read_block(x = query.data, + viewport = vp, + as.sparse = TRUE) + data.block <- apply(data.block, MARGIN = 2, function(x) { + x <- (x - feature.mean)/feature.sd + return(x) + }) + emb.block <- t(reference[[reduction]]@feature.loadings[features,dims]) %*% data.block + emb.list[[i]] <- emb.block + } + # list to matrix, column has to be cells + emb.mat <- t(matrix(data = unlist(emb.list), nrow = length(dims) , ncol = ncol(query.data))) + rownames(emb.mat) <- colnames(query.data) + colnames(emb.mat) <- colnames(reference[[reduction]]@cell.embeddings)[dims] + } + return(emb.mat) +} + + + + # Project new data onto SVD (LSI or PCA) # # A = U∑V SVD @@ -6815,63 +7000,6 @@ IntegrationReferenceIndex <- function(object) { } -# Project data slot to the dimensional reduction -# -ProjectDataEmbeddings <- function(object, - assay = 'RNA', - feature.loadings, - ref.mean, - ref.sd, - block.size = NULL, - scale.max = 10, - verbose = TRUE ){ - features <- Reduce(f = intersect, - x = list(names(ref.mean), - rownames(object[[assay]]), - rownames(feature.loadings) - ) - ) - feature.loadings <- feature.loadings[features,] - if (verbose) { - message( paste0(length(features)," features are used")) - } - mat <- GetAssayData(object = object[[assay]], slot = 'data')[features,] - ref.mean <- ref.mean[features] - ref.sd <- ref.sd[features] - if (verbose) { - message("ScaleData and Project to feature loadings") - } - my.lapply <- ifelse( - test = verbose && nbrOfWorkers() == 1, - yes = pblapply, - no = future_lapply - ) - if (!is.null(block.size)) { - block.size = min(block.size, ncol(object)) - cell.index <- rep(x = 1:ceiling(ncol(mat)/block.size), - each = block.size )[1:ncol(mat)] - cells.list <- split(x = 1:ncol(mat), f = cell.index) - emb.list <- my.lapply(X = cells.list, - FUN = function(x) { - mat.x <- as.matrix(mat[,x]) - mat.x <- (mat.x - ref.mean) / ref.sd - mat.x[mat.x > scale.max] <- scale.max - cell.emb.x <- t(mat.x ) %*% feature.loadings - return (cell.emb.x) - } - ) - all.emb <- Reduce(rbind, emb.list) - } else { - if (inherits(x = mat, what = "dgCMatrix")) { - mat <- as.matrix(mat) - } - mat <- (mat - ref.mean) / ref.sd - mat[mat > scale.max] <- scale.max - all.emb <- t(mat) %*% feature.loadings - } - return(all.emb) -} - # Calculate mean and sd # SparseMeanSd <- function(object, @@ -7417,90 +7545,6 @@ FindBridgeIntegrationAnchors <- function( } -#' @rdname ProjectCellEmbeddings -#' @method ProjectCellEmbeddings DelayedMatrix -#' @export -#' -ProjectCellEmbeddings.DelayedMatrix <- function( - query.data, - block.size = 1e9, - reference, - assay = NULL, - reduction, - dims = NULL, - feature.mean = NULL, - feature.sd = NULL -) { - - dims <- dims %||% 1:ncol(reference[[reduction]]) - assay <- assay %||% DefaultAssay(reference) - features <- intersect(rownames(query.data), - rownames(reference[[reduction]]@feature.loadings)) - query.data <- query.data[features,] - if (IsSCT(object[[assay]])) { -# TODO: SCT reiduals projection - - } else { - feature.mean <- feature.mean[features] %||% - RowMeanSparse(mat = LayerData(object = reference[[assay]], layer = 'data')[features,]) - feature.sd <- feature.sd[features] %||% - sqrt(RowVarSparse(mat = LayerData(object = reference[[assay]], layer = 'data')[features,])) - feature.sd <- MinMax(feature.sd, max = max(feature.sd), min = 0.1) - - suppressMessages(setAutoBlockSize(size = block.size)) - cells.grid <- DelayedArray::colAutoGrid(x = query.data) - emb.list <- list() - for (i in seq_len(length.out = length(x = cells.grid))) { - vp <- cells.grid[[i]] - data.block <- DelayedArray::read_block(x = query.data, - viewport = vp, - as.sparse = TRUE) - data.block <- apply(data.block, MARGIN = 2, function(x) { - x <- (x - feature.mean)/feature.sd - return(x) - }) - emb.block <- t(reference[[reduction]]@feature.loadings[features,dims]) %*% data.block - emb.list[[i]] <- emb.block - } - # list to matrix, column has to be cells - emb.mat <- t(matrix(data = unlist(emb.list), nrow = length(dims) , ncol = ncol(query.data))) - rownames(emb.mat) <- colnames(query.data) - colnames(emb.mat) <- colnames(reference[[reduction]]@cell.embeddings)[dims] - } - return(emb.mat) -} - -#' @rdname ProjectCellEmbeddings -#' @method ProjectCellEmbeddings IterableMatrix -#' @export -#' -#' -ProjectCellEmbeddings.IterableMatrix <- function( - query.data, - reference, - assay = NULL, - reduction, - dims = NULL, - feature.mean = NULL, - feature.sd = NULL -) { - dims <- dims %||% 1:ncol(reference[[reduction]]) - assay <- assay %||% DefaultAssay(reference) - features <- intersect(rownames(query.data), - rownames(reference[[reduction]]@feature.loadings)) - query.data <- query.data[features,] - feature.mean <- feature.mean[features] %||% - RowMeanSparse(mat = LayerData(object = reference[[assay]], layer = 'data')[features,]) - feature.sd <- feature.sd[features] %||% - sqrt(RowVarSparse(mat = LayerData(object = reference[[assay]], layer = 'data')[features,])) - feature.sd <- MinMax(feature.sd, max = max(feature.sd), min = 0.1) - query.scale <- (query.data - feature.mean)/feature.sd - emb.mat <- t(query.scale) %*% Loadings(object = reference[[reduction]])[features,dims] - rownames(emb.mat) <- colnames(query.data) - colnames(emb.mat) <- colnames(Embeddings(object = reference[[reduction]]))[dims] - return(emb.mat) -} - #' Perform integration on the joint PCA cell embeddings. #' #' This is a convenience wrapper function around the following three functions diff --git a/man/FeaturePlot.Rd b/man/FeaturePlot.Rd index 6778ee488..8df3185d9 100644 --- a/man/FeaturePlot.Rd +++ b/man/FeaturePlot.Rd @@ -10,12 +10,8 @@ FeaturePlot( features, dims = c(1, 2), cells = NULL, - cols = if (blend) { - c("lightgrey", "#ff0000", "#00ff00") - } else { - - c("lightgrey", "blue") - }, + cols = if (blend) { c("lightgrey", "#ff0000", "#00ff00") } else { + c("lightgrey", "blue") }, pt.size = NULL, alpha = 1, order = FALSE, diff --git a/man/IntegrateData.Rd b/man/IntegrateData.Rd index e08bd682e..c02543005 100644 --- a/man/IntegrateData.Rd +++ b/man/IntegrateData.Rd @@ -64,12 +64,10 @@ should be encoded in a matrix, where each row represents one of the pairwise integration steps. Negative numbers specify a dataset, positive numbers specify the integration results from a given row (the format of the merge matrix included in the \code{\link{hclust}} function output). For example: -\code{matrix(c(-2, 1, -3, -1), ncol = 2)} gives: - -\if{html}{\out{
}}\preformatted{ [,1] [,2] +\code{matrix(c(-2, 1, -3, -1), ncol = 2)} gives:\preformatted{ [,1] [,2] [1,] -2 -3 [2,] 1 -1 -}\if{html}{\out{
}} +} Which would cause dataset 2 and 3 to be integrated first, then the resulting object integrated with dataset 1. diff --git a/man/IntegrateEmbeddings.Rd b/man/IntegrateEmbeddings.Rd index dc0469132..c3f96ffa5 100644 --- a/man/IntegrateEmbeddings.Rd +++ b/man/IntegrateEmbeddings.Rd @@ -75,12 +75,10 @@ should be encoded in a matrix, where each row represents one of the pairwise integration steps. Negative numbers specify a dataset, positive numbers specify the integration results from a given row (the format of the merge matrix included in the \code{\link{hclust}} function output). For example: -\code{matrix(c(-2, 1, -3, -1), ncol = 2)} gives: - -\if{html}{\out{
}}\preformatted{ [,1] [,2] +\code{matrix(c(-2, 1, -3, -1), ncol = 2)} gives:\preformatted{ [,1] [,2] [1,] -2 -3 [2,] 1 -1 -}\if{html}{\out{
}} +} Which would cause dataset 2 and 3 to be integrated first, then the resulting object integrated with dataset 1. diff --git a/man/PolyFeaturePlot.Rd b/man/PolyFeaturePlot.Rd index a2b2fc588..1eacd0ecd 100644 --- a/man/PolyFeaturePlot.Rd +++ b/man/PolyFeaturePlot.Rd @@ -33,7 +33,10 @@ PolyFeaturePlot( \item{ncol}{Number of columns to split the plot into} -\item{min.cutoff, max.cutoff}{Vector of minimum and maximum cutoff values for each feature, +\item{min.cutoff}{Vector of minimum and maximum cutoff values for each feature, +may specify quantile in the form of 'q##' where '##' is the quantile (eg, 'q1', 'q10')} + +\item{max.cutoff}{Vector of minimum and maximum cutoff values for each feature, may specify quantile in the form of 'q##' where '##' is the quantile (eg, 'q1', 'q10')} \item{common.scale}{...} diff --git a/man/ScaleData.Rd b/man/ScaleData.Rd index 6deefcc8e..c77576375 100644 --- a/man/ScaleData.Rd +++ b/man/ScaleData.Rd @@ -3,6 +3,7 @@ \name{ScaleData} \alias{ScaleData} \alias{ScaleData.default} +\alias{ScaleData.IterableMatrix} \alias{ScaleData.Assay} \alias{ScaleData.Seurat} \title{Scale and center the data.} @@ -26,6 +27,15 @@ ScaleData(object, ...) ... ) +\method{ScaleData}{IterableMatrix}( + object, + features = NULL, + do.scale = TRUE, + do.center = TRUE, + scale.max = 10, + ... +) + \method{ScaleData}{Assay}( object, features = NULL, diff --git a/man/Seurat-package.Rd b/man/Seurat-package.Rd index 351af75c9..9b3fc3749 100644 --- a/man/Seurat-package.Rd +++ b/man/Seurat-package.Rd @@ -6,7 +6,7 @@ \alias{Seurat-package} \title{Seurat: Tools for Single Cell Genomics} \description{ -A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) \doi{10.1038/nbt.3192}, Macosko E, Basu A, Satija R, et al (2015) \doi{10.1016/j.cell.2015.05.002}, Stuart T, Butler A, et al (2019) \doi{10.1016/j.cell.2019.05.031}, and Hao, Hao, et al (2020) \doi{10.1101/2020.10.12.335331} for more details. +A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. } \section{Package options}{ diff --git a/man/VST.Rd b/man/VST.Rd index 646e4e882..fde1a91eb 100644 --- a/man/VST.Rd +++ b/man/VST.Rd @@ -3,6 +3,7 @@ \name{VST} \alias{VST} \alias{VST.default} +\alias{VST.IterableMatrix} \alias{VST.dgCMatrix} \alias{VST.matrix} \title{Variance Stabilizing Transformation} @@ -11,6 +12,8 @@ VST(data, margin = 1L, nselect = 2000L, span = 0.3, clip = NULL, ...) \method{VST}{default}(data, margin = 1L, nselect = 2000L, span = 0.3, clip = NULL, ...) +\method{VST}{IterableMatrix}(data, nselect = 2000L, span = 0.3, clip = NULL, verbose = TRUE, ...) + \method{VST}{dgCMatrix}( data, margin = 1L, diff --git a/man/reexports.Rd b/man/reexports.Rd index aa8615d0c..6320f6d47 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -73,6 +73,6 @@ below to see their documentation. \describe{ \item{generics}{\code{\link[generics]{components}}} - \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject]{Index}}, \code{\link[SeuratObject:Index]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}} + \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}} }} From c471cd62ba60517fc1bfa5b6c4c6b9f0adf6f030 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sun, 18 Dec 2022 23:33:44 -0500 Subject: [PATCH 331/979] update sektch label transfer --- R/integration.R | 46 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/R/integration.R b/R/integration.R index 56a436c1a..481c80b64 100644 --- a/R/integration.R +++ b/R/integration.R @@ -3524,11 +3524,30 @@ TransferSketchLabels <- function( k = 50, reduction.model = NULL, neighbors = NULL, + recompute.neighbors = FALSE, + recompute.weights = FALSE, verbose = TRUE ){ + + full_sketch.nn <- neighbors %||% Tool( + object = object, + slot = 'TransferSketchLabels' + )$full_sketch.nn + full_sketch.weight <- Tool( + object = object, + slot = 'TransferSketchLabels' + )$full_sketch.weight + + compute.neighbors <- is.null(x = full_sketch.nn) || + !all(Cells(full_sketch.nn) == Cells(object[[reduction]])) || + max(Indices(full_sketch.nn)) > ncol(object[[atoms]]) || + recompute.neighbors + compute.weights <- is.null(x = full_sketch.weight) || + !all(colnames(full_sketch.weight) == Cells(object[[reduction]])) || + !all(rownames(full_sketch.weight) == colnames(object[[atoms]])) || + recompute.weights - full_sketch.nn <- neighbors %||% Tool(object = object, slot = 'TransferSketchLabels')$full_sketch.nn - if (is.null(full_sketch.nn)) { + if (compute.neighbors) { if (verbose) { message("Finding sketch neighbors") } @@ -3539,8 +3558,7 @@ TransferSketchLabels <- function( method = "annoy" ) } - full_sketch.weight <- Tool(object = object, slot = 'TransferSketchLabels')$full_sketch.weight - if(is.null(full_sketch.weight)) { + if (compute.weights) { if (verbose) { message("Finding sketch weight matrix") } @@ -3548,6 +3566,8 @@ TransferSketchLabels <- function( query.cells = Cells(object[[reduction]]), reference = colnames(object[[atoms]]), verbose = verbose) + rownames(full_sketch.weight) <- colnames(object[[atoms]]) + colnames(full_sketch.weight) <- Cells(object[[reduction]]) } object@tools$TransferSketchLabels$full_sketch.nn <- full_sketch.nn object@tools$TransferSketchLabels$full_sketch.weight <- full_sketch.weight @@ -3571,24 +3591,28 @@ TransferSketchLabels <- function( predicted.labels.list <- TransferLablesNN( reference.labels = reference.labels, weight.matrix = full_sketch.weight) - object[[paste0('predicted.', label.rd)]] <- predicted.labels.list$labels object[[paste0('predicted.', label.rd, '.score')]] <- predicted.labels.list$scores } if (!is.null(reduction.model)) { - if (is.null(object[[reduction.model]]@misc$model)) { + umap.model <- Misc(object = object[[reduction.model]], slot = 'model') + if (is.null(umap.model)) { warning(reduction.model, ' does not have a stored umap model') return(object) } if (verbose) { message("Projection to sketch umap") } - if (ncol(full_sketch.nn) > object[[reduction.model]]@misc$model$n_neighbors) { - full_sketch.nn@nn.idx <- full_sketch.nn@nn.idx[, 1:object[[reduction.model]]@misc$model$n_neighbors] - full_sketch.nn@nn.dist <- full_sketch.nn@nn.dist[, 1:object[[reduction.model]]@misc$model$n_neighbors] + if (ncol(full_sketch.nn) > umap.model$n_neighbors) { + full_sketch.nn@nn.idx <- full_sketch.nn@nn.idx[, 1:umap.model$n_neighbors] + full_sketch.nn@nn.dist <- full_sketch.nn@nn.dist[, 1:umap.model$n_neighbors] } - proj.umap <- RunUMAP(object = full_sketch.nn, reduction.model = object[[reduction.model]], verbose = verbose) - proj.umap@assay.used <- object[[reduction]]@assay.used + proj.umap <- RunUMAP( + object = full_sketch.nn, + reduction.model = object[[reduction.model]], + verbose = verbose, + assay = slot(object = object[[reduction]], name = 'assay.used') + ) Key(proj.umap) <- paste0('ref', Key(proj.umap)) object[[paste0('ref.',reduction.model )]] <- proj.umap } From cfcf7b0c9d8b49b2cb86834e219a0663f1943523 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 20 Dec 2022 10:43:47 -0500 Subject: [PATCH 332/979] calcN iterablematrix --- R/preprocessing5.R | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index f4fa9d8d2..d1ac28381 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1005,6 +1005,16 @@ VST.matrix <- function( # Internal #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#' @method .CalcN IterableMatrix +#' +.CalcN.IterableMatrix <- function(object) { + col_stat <- BPCells::matrix_stats(matrix = object, col_stats = 'mean')$col_stats + return(list( + nCount = round(col_stat['mean',] *nrow(object)), + nFeature = col_stat['nonzero',] + )) +} + .FeatureVar <- function( data, mu, From 150aa4d48590258ae37c16a7868a76b26a0387f1 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 20 Dec 2022 11:02:38 -0500 Subject: [PATCH 333/979] export calcN --- NAMESPACE | 1 + 1 file changed, 1 insertion(+) diff --git a/NAMESPACE b/NAMESPACE index c23de4ee8..3bd919689 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -193,6 +193,7 @@ export(BlueAndRed) export(BoldTitle) export(BridgeCellsRepresentation) export(BuildClusterTree) +export(.CalcN.IterableMatrix) export(CCAIntegration) export(CalcPerturbSig) export(CalculateBarcodeInflections) From 7f9973d5e69cadb7948e446f6d8b4a6a8b482746 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 20 Dec 2022 11:26:52 -0500 Subject: [PATCH 334/979] export function --- NAMESPACE | 2 +- R/preprocessing5.R | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index 3bd919689..58a33cda3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,6 +5,7 @@ S3method("SCTResults<-",SCTModel) S3method("[",SlideSeq) S3method("[",VisiumV1) S3method("levels<-",SCTAssay) +S3method(.CalcN,IterableMatrix) S3method(AnnotateAnchors,IntegrationAnchorSet) S3method(AnnotateAnchors,TransferAnchorSet) S3method(AnnotateAnchors,default) @@ -193,7 +194,6 @@ export(BlueAndRed) export(BoldTitle) export(BridgeCellsRepresentation) export(BuildClusterTree) -export(.CalcN.IterableMatrix) export(CCAIntegration) export(CalcPerturbSig) export(CalculateBarcodeInflections) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index d1ac28381..c1ebb86a9 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1006,6 +1006,7 @@ VST.matrix <- function( #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #' @method .CalcN IterableMatrix +#' @export #' .CalcN.IterableMatrix <- function(object) { col_stat <- BPCells::matrix_stats(matrix = object, col_stats = 'mean')$col_stats From 801070dc5e052f46fff17d44a7b4205d31354369 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 21 Dec 2022 01:01:25 -0500 Subject: [PATCH 335/979] fix project embedding sct --- R/integration.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/integration.R b/R/integration.R index 481c80b64..856bfee41 100644 --- a/R/integration.R +++ b/R/integration.R @@ -4904,6 +4904,7 @@ ProjectCellEmbeddings.Seurat <- function( message("Projecting cell embeddings") } query.assay <- query.assay %||% DefaultAssay(object = query) + reference.assay <- reference.assay %||% DefaultAssay(object = reference) proj.pca <- ProjectCellEmbeddings( query = query[[query.assay]], reference = reference, @@ -4933,7 +4934,6 @@ ProjectCellEmbeddings.Assay <- function( feature.mean = NULL, feature.sd = NULL ) { - reference.assay <- reference.assay %||% DefaultAssay(object = reference) features <- Reduce( f = intersect, x = list( From f48cb8efed0794687c87aaae0d06f25191fd1509 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 21 Dec 2022 11:26:45 -0500 Subject: [PATCH 336/979] fix transfer label var name --- R/integration.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/integration.R b/R/integration.R index 856bfee41..146396d47 100644 --- a/R/integration.R +++ b/R/integration.R @@ -3591,8 +3591,8 @@ TransferSketchLabels <- function( predicted.labels.list <- TransferLablesNN( reference.labels = reference.labels, weight.matrix = full_sketch.weight) - object[[paste0('predicted.', label.rd)]] <- predicted.labels.list$labels - object[[paste0('predicted.', label.rd, '.score')]] <- predicted.labels.list$scores + object[[paste0('predicted.', rd.name)]] <- predicted.labels.list$labels + object[[paste0('predicted.', rd.name, '.score')]] <- predicted.labels.list$scores } if (!is.null(reduction.model)) { umap.model <- Misc(object = object[[reduction.model]], slot = 'model') From e082e625e321fca098142859a505117026eac6ca Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 21 Dec 2022 15:58:53 -0500 Subject: [PATCH 337/979] fix key bug --- R/integration.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/integration.R b/R/integration.R index 856bfee41..89a895a9a 100644 --- a/R/integration.R +++ b/R/integration.R @@ -4805,7 +4805,6 @@ PairwiseIntegrateReference <- function( integrated.data <- integrated.data[, colnames(x = unintegrated)] new.assay <- new( Class = 'Assay', - key = paste0(new.assay.name, "_"), counts = new(Class = "dgCMatrix"), data = integrated.data, scale.data = matrix(), From a7085dfa2952bd09431fa7e73784c6bafe62ce67 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 4 Jan 2023 12:44:17 -0500 Subject: [PATCH 338/979] rewrite average expression --- R/utilities.R | 380 +++++++++++++++++++++++++------------------------- 1 file changed, 188 insertions(+), 192 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index 639d6e118..1d6a061c5 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -426,24 +426,170 @@ AverageExpression <- function( return.seurat = FALSE, group.by = 'ident', add.ident = NULL, - slot = 'data', + slot = 'counts', + method = 'average', verbose = TRUE, ... ) { - return( - PseudobulkExpression( + CheckDots(..., fxns = 'CreateSeuratObject') + if (!is.null(x = add.ident)) { + .Deprecated(msg = "'add.ident' is a deprecated argument, please use the 'group.by' argument instead") + group.by <- c('ident', add.ident) + } + if (!(method %in% c('average', 'aggregate'))) { + stop("'method' must be either 'average' or 'aggregate'") + } + object.assays <- FilterObjects(object = object, classes.keep = c('Assay', 'Assay5')) + assays <- assays %||% object.assays + if (!all(assays %in% object.assays)) { + assays <- assays[assays %in% object.assays] + if (length(x = assays) == 0) { + stop("None of the requested assays are present in the object") + } else { + warning("Requested assays that do not exist in object. Proceeding with existing assays only.") + } + } + if (length(x = slot) == 1) { + slot <- rep_len(x = slot, length.out = length(x = assays)) + } else if (length(x = slot) != length(x = assays)) { + stop("Number of slots provided does not match number of assays") + } + data <- FetchData(object = object, vars = rev(x = group.by)) + data <- data[which(rowSums(x = is.na(x = data)) == 0), , drop = F] + if (nrow(x = data) < ncol(x = object)) { + message("Removing cells with NA for 1 or more grouping variables") + object <- subset(x = object, cells = rownames(x = data)) + } + for (i in 1:ncol(x = data)) { + data[, i] <- as.factor(x = data[, i]) + } + num.levels <- sapply( + X = 1:ncol(x = data), + FUN = function(i) { + length(x = levels(x = data[, i])) + } + ) + if (any(num.levels == 1)) { + message(paste0("The following grouping variables have 1 value and will be ignored: ", + paste0(colnames(x = data)[which(num.levels <= 1)], collapse = ", "))) + group.by <- colnames(x = data)[which(num.levels > 1)] + data <- data[, which(num.levels > 1), drop = F] + } + category.matrix <- CreateCategoryMatrix(labels = data, method = method) + data.return <- list() + for (i in 1:length(x = assays)) { + data.return[[i]] <- PseudobulkExpression( object = object, - pb.method = 'average', - assays = assays, + method = 'average', + assays = assays[i], features = features, - return.seurat = return.seurat, group.by = group.by, add.ident = add.ident, slot = slot, verbose = verbose, ... ) - ) + } + + if (return.seurat) { + if (slot[1] == 'scale.data') { + na.matrix <- as.matrix(x = as.madata.return[[1]]) + na.matrix[1:length(x = na.matrix)] <- NA + toRet <- CreateSeuratObject( + counts = na.matrix, + project = if (pb.method == "average") "Average" else "Aggregate", + assay = names(x = data.return)[1], + check.matrix = FALSE, + ... + ) + toRet <- SetAssayData( + object = toRet, + assay = names(x = data.return)[1], + slot = "counts", + new.data = matrix() + ) + toRet <- SetAssayData( + object = toRet, + assay = names(x = data.return)[1], + slot = "data", + new.data = na.matrix + ) + toRet <- SetAssayData( + object = toRet, + assay = names(x = data.return)[1], + slot = "scale.data", + new.data = data.return[[1]] + ) + } else { + toRet <- CreateSeuratObject( + counts = data.return[[1]], + project = if (pb.method == "average") "Average" else "Aggregate", + assay = names(x = data.return)[1], + check.matrix = FALSE, + ... + ) + toRet <- SetAssayData( + object = toRet, + assay = names(x = data.return)[1], + slot = "data", + new.data = log1p(x = as.matrix(x = data.return[[1]])) + ) + } + #for multimodal data + if (length(x = data.return) > 1) { + for (i in 2:length(x = data.return)) { + if (slot[i] == 'scale.data') { + na.matrix <- as.matrix(x = data.return[[i]]) + na.matrix[1:length(x = na.matrix)] <- NA + toRet[[names(x = data.return)[i]]] <- CreateAssayObject(counts = na.matrix, check.matrix = FALSE) + toRet <- SetAssayData( + object = toRet, + assay = names(x = data.return)[i], + slot = "counts", + new.data = matrix() + ) + toRet <- SetAssayData( + object = toRet, + assay = names(x = data.return)[i], + slot = "data", + new.data = na.matrix + ) + toRet <- SetAssayData( + object = toRet, + assay = names(x = data.return)[i], + slot = "scale.data", + new.data = as.matrix(x = data.return[[i]]) + ) + } else { + toRet[[names(x = data.return)[i]]] <- CreateAssayObject(counts = data.return[[i]], check.matrix = FALSE) + toRet <- SetAssayData( + object = toRet, + assay = names(x = data.return)[i], + slot = "data", + new.data = log1p(x = as.matrix(x = data.return[[i]])) + ) + } + + } + } + if (DefaultAssay(object = object) %in% names(x = data.return)) { + DefaultAssay(object = toRet) <- DefaultAssay(object = object) + if (slot[which(DefaultAssay(object = object) %in% names(x = data.return))[1]] != 'scale.data') { + toRet <- ScaleData(object = toRet, verbose = verbose) + } + } + if ('ident' %in% group.by) { + first.cells <- sapply(X = 1:ncol(x = category.matrix), + FUN = function(x) { + return(category.matrix[,x, drop = FALSE ]@i[1] + 1) + } + ) + Idents(object = toRet) <- Idents(object = object)[first.cells] + } + return(toRet) + } else { + return(data.return) + } } #' Match the case of character vectors @@ -1215,90 +1361,7 @@ PseudobulkExpression <- function( verbose = TRUE, ... ) { - CheckDots(..., fxns = 'CreateSeuratObject') - if (!is.null(x = add.ident)) { - .Deprecated(msg = "'add.ident' is a deprecated argument, please use the 'group.by' argument instead") - group.by <- c('ident', add.ident) - } - if (!(pb.method %in% c('average', 'aggregate'))) { - stop("'pb.method' must be either 'average' or 'aggregate'") - } - object.assays <- FilterObjects(object = object, classes.keep = c('Assay', 'Assay5')) - assays <- assays %||% object.assays - if (!all(assays %in% object.assays)) { - assays <- assays[assays %in% object.assays] - if (length(x = assays) == 0) { - stop("None of the requested assays are present in the object") - } else { - warning("Requested assays that do not exist in object. Proceeding with existing assays only.") - } - } - if (length(x = slot) == 1) { - slot <- rep_len(x = slot, length.out = length(x = assays)) - } else if (length(x = slot) != length(x = assays)) { - stop("Number of slots provided does not match number of assays") - } - data <- FetchData(object = object, vars = rev(x = group.by)) - data <- data[which(rowSums(x = is.na(x = data)) == 0), , drop = F] - if (nrow(x = data) < ncol(x = object)) { - message("Removing cells with NA for 1 or more grouping variables") - object <- subset(x = object, cells = rownames(x = data)) - } - for (i in 1:ncol(x = data)) { - data[, i] <- as.factor(x = data[, i]) - } - num.levels <- sapply( - X = 1:ncol(x = data), - FUN = function(i) { - length(x = levels(x = data[, i])) - } - ) - if (any(num.levels == 1)) { - message(paste0("The following grouping variables have 1 value and will be ignored: ", - paste0(colnames(x = data)[which(num.levels <= 1)], collapse = ", "))) - group.by <- colnames(x = data)[which(num.levels > 1)] - data <- data[, which(num.levels > 1), drop = F] - } - if (ncol(x = data) == 0) { - message("All grouping variables have 1 value only. Computing across all cells.") - category.matrix <- matrix( - data = 1, - nrow = ncol(x = object), - dimnames = list(Cells(x = object), 'all') - ) - if (pb.method == 'average') { - category.matrix <- category.matrix / sum(category.matrix) - } - } else { - category.matrix <- sparse.model.matrix(object = as.formula( - object = paste0( - '~0+', - paste0( - "data[,", - 1:length(x = group.by), - "]", - collapse = ":" - ) - ) - )) - colsums <- colSums(x = category.matrix) - category.matrix <- category.matrix[, colsums > 0] - colsums <- colsums[colsums > 0] - if (pb.method == 'average') { - category.matrix <- SweepNonzero( - x = category.matrix, - MARGIN = 2, - STATS = colsums, - FUN = "/") - } - colnames(x = category.matrix) <- sapply( - X = colnames(x = category.matrix), - FUN = function(name) { - name <- gsub(pattern = "data\\[, [1-9]*\\]", replacement = "", x = name) - return(paste0(rev(x = unlist(x = strsplit(x = name, split = ":"))), collapse = "_")) - }) - } - data.return <- list() + for (i in 1:length(x = assays)) { @@ -1341,109 +1404,12 @@ PseudobulkExpression <- function( if (inherits(x = data.use, what = 'DelayedArray')) { data.return[[i]] <- tcrossprod_DelayedAssay(x = data.use, y = t(category.matrix)) } else { + browser() data.return[[i]] <- data.use %*% category.matrix } names(x = data.return)[i] <- assays[[i]] } - if (return.seurat) { - if (slot[1] == 'scale.data') { - na.matrix <- as.matrix(x = as.madata.return[[1]]) - na.matrix[1:length(x = na.matrix)] <- NA - toRet <- CreateSeuratObject( - counts = na.matrix, - project = if (pb.method == "average") "Average" else "Aggregate", - assay = names(x = data.return)[1], - check.matrix = FALSE, - ... - ) - toRet <- SetAssayData( - object = toRet, - assay = names(x = data.return)[1], - slot = "counts", - new.data = matrix() - ) - toRet <- SetAssayData( - object = toRet, - assay = names(x = data.return)[1], - slot = "data", - new.data = na.matrix - ) - toRet <- SetAssayData( - object = toRet, - assay = names(x = data.return)[1], - slot = "scale.data", - new.data = data.return[[1]] - ) - } else { - toRet <- CreateSeuratObject( - counts = data.return[[1]], - project = if (pb.method == "average") "Average" else "Aggregate", - assay = names(x = data.return)[1], - check.matrix = FALSE, - ... - ) - toRet <- SetAssayData( - object = toRet, - assay = names(x = data.return)[1], - slot = "data", - new.data = log1p(x = as.matrix(x = data.return[[1]])) - ) - } - #for multimodal data - if (length(x = data.return) > 1) { - for (i in 2:length(x = data.return)) { - if (slot[i] == 'scale.data') { - na.matrix <- as.matrix(x = data.return[[i]]) - na.matrix[1:length(x = na.matrix)] <- NA - toRet[[names(x = data.return)[i]]] <- CreateAssayObject(counts = na.matrix, check.matrix = FALSE) - toRet <- SetAssayData( - object = toRet, - assay = names(x = data.return)[i], - slot = "counts", - new.data = matrix() - ) - toRet <- SetAssayData( - object = toRet, - assay = names(x = data.return)[i], - slot = "data", - new.data = na.matrix - ) - toRet <- SetAssayData( - object = toRet, - assay = names(x = data.return)[i], - slot = "scale.data", - new.data = as.matrix(x = data.return[[i]]) - ) - } else { - toRet[[names(x = data.return)[i]]] <- CreateAssayObject(counts = data.return[[i]], check.matrix = FALSE) - toRet <- SetAssayData( - object = toRet, - assay = names(x = data.return)[i], - slot = "data", - new.data = log1p(x = as.matrix(x = data.return[[i]])) - ) - } - - } - } - if (DefaultAssay(object = object) %in% names(x = data.return)) { - DefaultAssay(object = toRet) <- DefaultAssay(object = object) - if (slot[which(DefaultAssay(object = object) %in% names(x = data.return))[1]] != 'scale.data') { - toRet <- ScaleData(object = toRet, verbose = verbose) - } - } - if ('ident' %in% group.by) { - first.cells <- sapply(X = 1:ncol(x = category.matrix), - FUN = function(x) { - return(category.matrix[,x, drop = FALSE ]@i[1] + 1) - } - ) - Idents(object = toRet) <- Idents(object = object)[first.cells] - } - return(toRet) - } else { - return(data.return) - } + } #' Regroup idents based on meta.data info @@ -2651,10 +2617,35 @@ SweepNonzero <- function( CreateCategoryMatrix <- function( labels, - method = c('sum', 'average') + method = c('sum', 'average'), + cells.name = NULL ) { method <- match.arg(arg = method) - data <- cbind(labels = labels) + if (is.null(dim(labels))) { + if (length(x = unique(x = labels)) == 1) { + data <- matrix(nrow = length(x = labels), ncol = 0) + } else { + data <- cbind(labels = labels) + } + } else { + data <- labels + } + cells.name <- cells.name %||% rownames(data) + if (length(cells.name) != nrow(data)) { + stop('length of cells name should be equal to the length of input labels') + } + if (ncol(x = data) == 0) { + message("All grouping variables have 1 value only. Computing across all cells.") + category.matrix <- matrix( + data = 1, + nrow = nrow(x = data), + dimnames = list(cells.name, 'all') + ) + if (method == 'average') { + category.matrix <- category.matrix / sum(category.matrix) + } + return(category.matrix) + } group.by <- colnames(x = data) category.matrix <- sparse.model.matrix(object = as.formula( object = paste0( @@ -2678,11 +2669,16 @@ CreateCategoryMatrix <- function( STATS = colsums, FUN = "/") } + colnames(x = category.matrix) <- gsub(pattern = '_', + replacement = '-', + x = colnames(x = category.matrix) + ) colnames(x = category.matrix) <- sapply( X = colnames(x = category.matrix), FUN = function(name) { name <- gsub(pattern = "data\\[, [1-9]*\\]", replacement = "", x = name) return(paste0(rev(x = unlist(x = strsplit(x = name, split = ":"))), collapse = "_")) }) + rownames(category.matrix) <- cells.name return(category.matrix) } From a8b4d0bc15a847515c517642e57b0bc4ce5487cd Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Wed, 4 Jan 2023 13:19:54 -0500 Subject: [PATCH 339/979] init v5 site update --- R/differential_expression.R | 16 +- _pkgdown.yaml | 61 ++- index.md | 4 + vignettes/atomic_integration.Rmd | 0 vignettes/pbmc3k_tutorial.Rmd | 2 +- vignettes/seurat5_archive.Rmd | 41 ++ .../seurat5_atacseq_integration_vignette.Rmd | 235 +++++++++ vignettes/seurat5_atomic_integration.Rmd | 191 +++++++ .../seurat5_bridge_integration_vignette.Rmd | 285 +++++++++++ vignettes/seurat5_cell_cycle_vignette.Rmd | 150 ++++++ vignettes/seurat5_conversion_vignette.Rmd | 132 +++++ vignettes/seurat5_de_vignette.Rmd | 144 ++++++ vignettes/seurat5_dim_reduction_vignette.Rmd | 120 +++++ vignettes/seurat5_essential_commands.Rmd | 285 +++++++++++ vignettes/seurat5_extensions.Rmd | 35 ++ vignettes/seurat5_future_vignette.Rmd | 133 +++++ vignettes/seurat5_get_started.Rmd | 159 ++++++ vignettes/seurat5_hashing_vignette.Rmd | 291 +++++++++++ vignettes/seurat5_install.Rmd | 90 ++++ .../seurat5_integration_introduction.Rmd | 269 ++++++++++ .../seurat5_integration_large_datasets.Rmd | 119 +++++ vignettes/seurat5_integration_mapping.Rmd | 214 ++++++++ vignettes/seurat5_integration_rpca.Rmd | 188 +++++++ vignettes/seurat5_interaction_vignette.Rmd | 150 ++++++ vignettes/seurat5_merge_vignette.Rmd | 110 ++++ vignettes/seurat5_mixscape_vignette.Rmd | 372 ++++++++++++++ .../seurat5_multimodal_reference_mapping.Rmd | 394 +++++++++++++++ vignettes/seurat5_multimodal_vignette.Rmd | 240 +++++++++ vignettes/seurat5_pbmc3k_tutorial.Rmd | 395 +++++++++++++++ vignettes/seurat5_sctransform_v2_vignette.Rmd | 233 +++++++++ vignettes/seurat5_sctransform_vignette.Rmd | 156 ++++++ vignettes/seurat5_spatial_vignette.Rmd | 474 ++++++++++++++++++ vignettes/seurat5_spatial_vignette_2.Rmd | 339 +++++++++++++ vignettes/seurat5_v4_changes.Rmd | 38 ++ vignettes/seurat5_visualization_vignette.Rmd | 251 ++++++++++ ...at5_weighted_nearest_neighbor_analysis.Rmd | 443 ++++++++++++++++ vignettes/spatial_vignette_2.Rmd | 339 +++++++++++++ 37 files changed, 7091 insertions(+), 7 deletions(-) mode change 100755 => 100644 vignettes/atomic_integration.Rmd create mode 100644 vignettes/seurat5_archive.Rmd create mode 100644 vignettes/seurat5_atacseq_integration_vignette.Rmd create mode 100644 vignettes/seurat5_atomic_integration.Rmd create mode 100644 vignettes/seurat5_bridge_integration_vignette.Rmd create mode 100644 vignettes/seurat5_cell_cycle_vignette.Rmd create mode 100644 vignettes/seurat5_conversion_vignette.Rmd create mode 100644 vignettes/seurat5_de_vignette.Rmd create mode 100644 vignettes/seurat5_dim_reduction_vignette.Rmd create mode 100644 vignettes/seurat5_essential_commands.Rmd create mode 100644 vignettes/seurat5_extensions.Rmd create mode 100644 vignettes/seurat5_future_vignette.Rmd create mode 100644 vignettes/seurat5_get_started.Rmd create mode 100644 vignettes/seurat5_hashing_vignette.Rmd create mode 100644 vignettes/seurat5_install.Rmd create mode 100644 vignettes/seurat5_integration_introduction.Rmd create mode 100644 vignettes/seurat5_integration_large_datasets.Rmd create mode 100644 vignettes/seurat5_integration_mapping.Rmd create mode 100644 vignettes/seurat5_integration_rpca.Rmd create mode 100644 vignettes/seurat5_interaction_vignette.Rmd create mode 100644 vignettes/seurat5_merge_vignette.Rmd create mode 100644 vignettes/seurat5_mixscape_vignette.Rmd create mode 100644 vignettes/seurat5_multimodal_reference_mapping.Rmd create mode 100644 vignettes/seurat5_multimodal_vignette.Rmd create mode 100644 vignettes/seurat5_pbmc3k_tutorial.Rmd create mode 100644 vignettes/seurat5_sctransform_v2_vignette.Rmd create mode 100644 vignettes/seurat5_sctransform_vignette.Rmd create mode 100644 vignettes/seurat5_spatial_vignette.Rmd create mode 100644 vignettes/seurat5_spatial_vignette_2.Rmd create mode 100644 vignettes/seurat5_v4_changes.Rmd create mode 100644 vignettes/seurat5_visualization_vignette.Rmd create mode 100644 vignettes/seurat5_weighted_nearest_neighbor_analysis.Rmd create mode 100644 vignettes/spatial_vignette_2.Rmd diff --git a/R/differential_expression.R b/R/differential_expression.R index ee2bc13cd..9b714d161 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -2319,8 +2319,18 @@ WilcoxDETest <- function( yes = FALSE, no = TRUE ) + presto.check <- PackageCheck("presto", error = FALSE) limma.check <- PackageCheck("limma", error = FALSE) - if (limma.check[1] && overflow.check) { + group.info <- data.frame(row.names = c(cells.1, cells.2)) + group.info[cells.1, "group"] <- "Group1" + group.info[cells.2, "group"] <- "Group2" + group.info[, "group"] <- factor(x = group.info[, "group"]) + if (presto.check[1] && overflow.check) { + data.use <- data.use[, names(x = group.info), drop = FALSE] + res <- presto::wilcoxauc(X = data.use, y = group.info) + res <- res[1:(nrow(x = res)/2),] + p_val <- res$pval + } else if (limma.check[1] && overflow.check) { p_val <- my.sapply( X = 1:nrow(x = data.use), FUN = function(x) { @@ -2342,10 +2352,6 @@ WilcoxDETest <- function( ) options(Seurat.limma.wilcox.msg = FALSE) } - group.info <- data.frame(row.names = c(cells.1, cells.2)) - group.info[cells.1, "group"] <- "Group1" - group.info[cells.2, "group"] <- "Group2" - group.info[, "group"] <- factor(x = group.info[, "group"]) data.use <- data.use[, rownames(x = group.info), drop = FALSE] p_val <- my.sapply( X = 1:nrow(x = data.use), diff --git a/_pkgdown.yaml b/_pkgdown.yaml index 17c2a9464..326caab73 100644 --- a/_pkgdown.yaml +++ b/_pkgdown.yaml @@ -24,8 +24,10 @@ navbar: href: articles/pbmc3k_tutorial.html - text: "Using Seurat with multi-modal data" href: articles/multimodal_vignette.html - - text: "Analysis, visualization, and integration of spatial datasets with Seurat" + - text: "Analysis of spatial datasets (Sequencing-based)" href: articles/spatial_vignette.html + - text: "Analysis of spatial datasets (Imaging-based)" + href: articles/spatial_vignette_2.html - text: ------- - text: Data Integration - text: "Introduction to scRNA-seq integration" @@ -72,6 +74,63 @@ navbar: href: articles/interaction_vignette.html - text: "Merging Seurat objects" href: articles/merge_vignette.html + - text: "Seurat 5 Vignettes" + menu: + - text: Introductory Vignettes + - text: "PBMC 3K guided tutorial" + href: articles/seurat5_pbmc3k_tutorial.html + - text: "Using Seurat with multi-modal data" + href: articles/seurat5_multimodal_vignette.html + - text: "Analysis of spatial datasets (Sequencing-based)" + href: articles/seurat5_spatial_vignette.html + - text: "Analysis of spatial datasets (Imaging-based)" + href: articles/seurat5_spatial_vignette_2.html + - text: ------- + - text: Data Integration + - text: "Introduction to scRNA-seq integration" + href: articles/seurat5_integration_introduction.html + - text: "Mapping and annotating query datasets" + href: articles/seurat5_integration_mapping.html + - text: "Fast integration using reciprocal PCA (RPCA)" + href: articles/seurat5_integration_rpca.html + - text: "Tips for integrating large datasets" + href: articles/seurat5_integration_large_datasets.html + - text: "Integrating scRNA-seq and scATAC-seq data" + href: articles/seurat5_atacseq_integration_vignette.html + - text: "Multimodal reference mapping" + href: articles/seurat5_multimodal_reference_mapping.html + - text: ------- + - text: New Statistical Methods + - text: "Weighted Nearest Neighbor Analysis" + href: articles/seurat5_weighted_nearest_neighbor_analysis.html + - text: "Mixscape Vignette" + href: articles/seurat5_mixscape_vignette.html + - text: "Using sctransform in Seurat" + href: articles/seurat5_sctransform_vignette.html + - text: "SCTransform, v2 regularization" + href: articles/seurat5_sctransform_v2_vignette.html + - text: ------- + - text: Other + - text: "Data visualization vignette" + href: articles/seurat5_visualization_vignette.html + - text: "Cell-cycle scoring and regression" + href: articles/seurat5_cell_cycle_vignette.html + - text: "Differential expression testing" + href: articles/seurat5_de_vignette.html + - text: "Demultiplexing with hashtag oligos (HTOs)" + href: articles/seurat5_hashing_vignette.html + - text: "Interoperability between single-cell object formats" + href: articles/seurat5_conversion_vignette.html + - text: "Parallelization in Seurat with future" + href: articles/seurat5_future_vignette.html + - text: "Dimensional reduction vignette" + href: articles/seurat5_dim_reduction_vignette.html + - text: "Seurat essential commands list" + href: articles/seurat5_essential_commands.html + - text: "Seurat interaction tips" + href: articles/seurat5_interaction_vignette.html + - text: "Merging Seurat objects" + href: articles/seurat5_merge_vignette.html - text: Extensions href: articles/extensions.html - text: FAQ diff --git a/index.md b/index.md index 7925418e9..a72af2569 100644 --- a/index.md +++ b/index.md @@ -1,5 +1,9 @@ ![](articles/assets/seurat_banner.jpg) +# Pre-release of Seurat 5.0 + +This is some text about Seurat v5.0 + # Official release of Seurat 4.0 We are excited to release Seurat v4.0! This update brings the following new features and functionality: diff --git a/vignettes/atomic_integration.Rmd b/vignettes/atomic_integration.Rmd old mode 100755 new mode 100644 diff --git a/vignettes/pbmc3k_tutorial.Rmd b/vignettes/pbmc3k_tutorial.Rmd index a51c8b463..87461177e 100644 --- a/vignettes/pbmc3k_tutorial.Rmd +++ b/vignettes/pbmc3k_tutorial.Rmd @@ -59,7 +59,7 @@ pbmc.data[c("CD3D","TCL1A","MS4A1"), 1:30] The `.` values in the matrix represent 0s (no molecules detected). Since most values in an scRNA-seq matrix are 0, Seurat uses a sparse-matrix representation whenever possible. This results in significant memory and speed savings for Drop-seq/inDrop/10x data. -````{r} +```{r} dense.size <- object.size(as.matrix(pbmc.data)) dense.size sparse.size <- object.size(pbmc.data) diff --git a/vignettes/seurat5_archive.Rmd b/vignettes/seurat5_archive.Rmd new file mode 100644 index 000000000..00f8321d5 --- /dev/null +++ b/vignettes/seurat5_archive.Rmd @@ -0,0 +1,41 @@ +--- +title: "Documentation Archive" +output: + html_document: + theme: united + df_print: kable +--- + +```{r helper, include = FALSE} +make_vignette_button <- function(name, title, version) { + url <- paste0("../archive/", version, "/", name, ".html") + paste0('', version, '') +} +``` + +```{r yaml, include = FALSE} +library(yaml) +vdat <- read_yaml(file = "archive.yaml") +``` +In version 4, the Seurat documentation was transitioned to pkgdown. Here we provide access to all previous versions of the documentation. + +# Version 2-3 tutorials + +```{r results='asis', echo=FALSE, warning=FALSE, message = FALSE} +for (i in 1:length(x = vdat$vignettes)) { + vignette <- vdat$vignettes[[i]] + cat('

', vignette$title, '

') + for(j in 1:length(x = vignette$versions)) { + cat(make_vignette_button(name = vignette$name, title = vignette$title, version = vignette$version[j])) + } +} +``` + +# Version 1.3-1.4 tutorials + +For versions 1.3-1.4, we provide access to the old documentation pages [here](../archive/v1.4/get_started_v1_4.html) + +# Version <=1.2 tutorials + +For versions <=1.2, we provide access to the old documentation pages [here](../archive/v1.2/get_started_v1_2.html) + diff --git a/vignettes/seurat5_atacseq_integration_vignette.Rmd b/vignettes/seurat5_atacseq_integration_vignette.Rmd new file mode 100644 index 000000000..4682479cf --- /dev/null +++ b/vignettes/seurat5_atacseq_integration_vignette.Rmd @@ -0,0 +1,235 @@ +--- +title: "Integrating scRNA-seq and scATAC-seq data" +output: + html_document: + theme: united + df_print: kable +date: 'Compiled: `r format(Sys.Date(), "%B %d, %Y")`' +--- + +```{r markdown.setup, include=TRUE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + fig.width = 12, + tidy.opts = list(width.cutoff = 95), + message = FALSE, + warning = FALSE, + time_it = TRUE, + error = TRUE +) +options(SeuratData.repo.use = 'satijalab04.nygenome.org') +``` + +Single-cell transcriptomics has transformed our ability to characterize cell states, but deep biological understanding requires more than a taxonomic listing of clusters. As new methods arise to measure distinct cellular modalities, a key analytical challenge is to integrate these datasets to better understand cellular identity and function. For example, users may perform scRNA-seq and scATAC-seq experiments on the same biological system and to consistently annotate both datasets with the same set of cell type labels. This analysis is particularly challenging as scATAC-seq datasets are difficult to annotate, due to both the sparsity of genomic data collected at single-cell resolution, and the lack of interpretable gene markers in scRNA-seq data. + +In [Stuart\*, Butler\* et al, 2019](https://www.cell.com/cell/fulltext/S0092-8674(19)30559-8), we introduce methods to integrate scRNA-seq and scATAC-seq datasets collected from the same biological system, and demonstrate these methods in this vignette. In particular, we demonstrate the following analyses: + +* How to use an annotated scRNA-seq dataset to label cells from an scATAC-seq experiment +* How to co-visualize (co-embed) cells from scRNA-seq and scATAC-seq +* How to project scATAC-seq cells onto a UMAP derived from an scRNA-seq experiment + +This vignette makes extensive use of the [Signac package](https://satijalab.org/signac/), recently developed for the analysis of chromatin datasets collected at single-cell resolution, including scATAC-seq. Please see the Signac website for additional [vignettes](https://satijalab.org/signac/articles/pbmc_vignette.html) and documentation for analyzing scATAC-seq data. + +We demonstrate these methods using a publicly available ~12,000 human PBMC 'multiome' dataset from 10x Genomics. In this dataset, scRNA-seq and scATAC-seq profiles were simultaneously collected in the same cells. For the purposes of this vignette, we treat the datasets as originating from two different experiments and integrate them together. Since they were originally measured in the same cells, this provides a ground truth that we can use to assess the accuracy of integration. We emphasize that our use of the multiome dataset here is for demonstration and evaluation purposes, and that users should apply these methods to scRNA-seq and scATAC-seq datasets that are collected separately. We provide a separate [weighted nearest neighbors vignette (WNN)](weighted_nearest_neighbor_analysis.html) that describes analysis strategies for multi-omic single-cell data. + +# Load in data and process each modality individually + +The PBMC multiome dataset is available from [10x genomics](https://support.10xgenomics.com/single-cell-multiome-atac-gex/datasets/1.0.0/pbmc_granulocyte_sorted_10k). To facilitate easy loading and exploration, it is also available as part of our SeuratData package. We load the RNA and ATAC data in separately, and pretend that these profiles were measured in separate experiments. We annotated these cells in our [WNN](weighted_nearest_neighbor_analysis.html) vignette, and the annotations are also included in SeuratData. + +```{r installdata} +library(SeuratData) +# install the dataset and load requirements +InstallData('pbmcMultiome') +``` + +```{r loadpkgs} +library(Seurat) +options(Seurat.object.assay.version = "v5") +library(Signac) +library(EnsDb.Hsapiens.v86) +library(ggplot2) +library(cowplot) +``` + +```{r load_data} +# load both modalities +pbmc.rna <- LoadData("pbmcMultiome", "pbmc.rna") +pbmc.atac <- LoadData("pbmcMultiome", "pbmc.atac") + +pbmc.rna <- UpdateSeuratObject(pbmc.rna) +pbma.atac <- UpdateSeuratObject(pbmc.atac) + +# repeat QC steps performed in the WNN vignette +pbmc.rna <- subset(pbmc.rna, seurat_annotations != 'filtered') +pbmc.atac <- subset(pbmc.atac, seurat_annotations != 'filtered') + +# Perform standard analysis of each modality independently +# RNA analysis +pbmc.rna <- NormalizeData(pbmc.rna) +pbmc.rna <- FindVariableFeatures(pbmc.rna) +pbmc.rna <- ScaleData(pbmc.rna) +pbmc.rna <- RunPCA(pbmc.rna) +pbmc.rna <- RunUMAP(pbmc.rna, dims = 1:30) + +# ATAC analysis +# add gene annotation information +annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86) +seqlevelsStyle(annotations) <- 'UCSC' +genome(annotations) <- "hg38" +Annotation(pbmc.atac) <- annotations + +# We exclude the first dimension as this is typically correlated with sequencing depth +pbmc.atac <- RunTFIDF(pbmc.atac) +pbmc.atac <- FindTopFeatures(pbmc.atac, min.cutoff = 'q0') +pbmc.atac <- RunSVD(pbmc.atac) +pbmc.atac <- RunUMAP(pbmc.atac, reduction = 'lsi', dims = 2:30, reduction.name = "umap.atac", reduction.key = "atacUMAP_") +``` + +Now we plot the results from both modalities. Cells have been previously annotated based on transcriptomic state. We will predict annotations for the scATAC-seq cells. + +```{r viz1} +p1 <- DimPlot(pbmc.rna, group.by = 'seurat_annotations', label = TRUE) + NoLegend() + ggtitle("RNA") +p2 <- DimPlot(pbmc.atac, group.by = 'orig.ident', label = FALSE) + NoLegend() + ggtitle("ATAC") +p1 + p2 +``` + +```{r save.img, include = TRUE} +plot <- (p1 + p2) & + xlab("UMAP 1") & ylab("UMAP 2") & + theme(axis.title = element_text(size = 18)) +ggsave(filename = "../output/images/atacseq_integration_vignette.jpg", height = 7, width = 12, plot = plot, quality = 50) +``` + +# Identifying anchors between scRNA-seq and scATAC-seq datasets + +In order to identify 'anchors' between scRNA-seq and scATAC-seq experiments, we first generate a rough estimate of the transcriptional activity of each gene by quantifying ATAC-seq counts in the 2 kb-upstream region and gene body, using the `GeneActivity()` function in the Signac package. The ensuing gene activity scores from the scATAC-seq data are then used as input for canonical correlation analysis, along with the gene expression quantifications from scRNA-seq. We perform this quantification for all genes identified as being highly variable from the scRNA-seq dataset. + +```{r gene.activity} +# quantify gene activity +gene.activities <- GeneActivity(pbmc.atac, features = VariableFeatures(pbmc.rna)) + +# add gene activities as a new assay +pbmc.atac[["ACTIVITY"]] <- CreateAssayObject(counts = gene.activities) + +# normalize gene activities +DefaultAssay(pbmc.atac) <- "ACTIVITY" +pbmc.atac <- NormalizeData(pbmc.atac) +pbmc.atac <- ScaleData(pbmc.atac, features = rownames(pbmc.atac)) +``` + +```{r label.xfer} +# Identify anchors +transfer.anchors <- FindTransferAnchors( + reference = pbmc.rna, + query = pbmc.atac, + features = VariableFeatures(object = pbmc.rna), + reference.assay = 'RNA', + query.assay = 'ACTIVITY', + reduction = 'cca' +) +``` + +# Annotate scATAC-seq cells via label transfer + +After identifying anchors, we can transfer annotations from the scRNA-seq dataset onto the scATAC-seq cells. The annotations are stored in the `seurat_annotations` field, and are provided as input to the `refdata` parameter. The output will contain a matrix with predictions and confidence scores for each ATAC-seq cell. + +```{r transfer.data} +celltype.predictions <- TransferData( + anchorset = transfer.anchors, + refdata = pbmc.rna$seurat_annotations, + weight.reduction = pbmc.atac[['lsi']], + dims = 2:30 +) + +pbmc.atac <- AddMetaData(pbmc.atac, metadata = celltype.predictions) +``` + +
+ **Why do you choose different (non-default) values for reduction and weight.reduction?** + +In `FindTransferAnchors()`, we typically project the PCA structure from the reference onto the query when transferring between scRNA-seq datasets. However, when transferring across modalities we find that CCA better captures the shared feature correlation structure and therefore set `reduction = 'cca'` here. Additionally, by default in `TransferData()` we use the same projected PCA structure to compute the weights of the local neighborhood of anchors that influence each cell's prediction. In the case of scRNA-seq to scATAC-seq transfer, we use the low dimensional space learned by computing an LSI on the ATAC-seq data to compute these weights as this better captures the internal structure of the ATAC-seq data. + +
+\ + +After performing transfer, the ATAC-seq cells have predicted annotations (transferred from the scRNA-seq dataset) stored in the `predicted.id` field. Since these cells were measured with the multiome kit, we also have a ground-truth annotation that can be used for evaluation. You can see that the predicted and actual annotations are extremely similar. + + +```{r viz.label.accuracy} +pbmc.atac$annotation_correct <- pbmc.atac$predicted.id == pbmc.atac$seurat_annotations +p1 <- DimPlot(pbmc.atac, group.by = 'predicted.id', label = TRUE) + NoLegend() + ggtitle("Predicted annotation") +p2 <- DimPlot(pbmc.atac, group.by = 'seurat_annotations', label = TRUE) + NoLegend() + ggtitle("Ground-truth annotation") +p1 | p2 +``` + +In this example, the annotation for an scATAC-seq profile is correctly predicted via scRNA-seq integration ~90% of the time. In addition, the `prediction.score.max` field quantifies the uncertainty associated with our predicted annotations. We can see that cells that are correctly annotated are typically associated with high prediction scores (>90%), while cells that are incorrectly annotated are associated with sharply lower prediction scores (<50%). Incorrect assignments also tend to reflect closely related cell types (i.e. Intermediate vs. Naive B cells). + +```{r score.viz, fig.height = 5} +predictions <- table(pbmc.atac$seurat_annotations, pbmc.atac$predicted.id) +predictions <- predictions / rowSums(predictions) # normalize for number of cells in each cell type +predictions <- as.data.frame(predictions) +p1 <- ggplot(predictions, aes(Var1, Var2, fill = Freq)) + + geom_tile() + + scale_fill_gradient(name = "Fraction of cells", low = "#ffffc8", high = "#7d0025") + + xlab("Cell type annotation (RNA)") + + ylab("Predicted cell type label (ATAC)") + + theme_cowplot() + + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + +correct <- length(which(pbmc.atac$seurat_annotations == pbmc.atac$predicted.id)) +incorrect <- length(which(pbmc.atac$seurat_annotations != pbmc.atac$predicted.id)) +data <- FetchData(pbmc.atac, vars = c("prediction.score.max", "annotation_correct")) +p2 <- ggplot(data, aes(prediction.score.max, fill = annotation_correct, colour = annotation_correct)) + geom_density(alpha = 0.5) + theme_cowplot() + scale_fill_discrete(name = "Annotation Correct", labels = c(paste0("FALSE (n = ", incorrect, ")"), paste0("TRUE (n = ", correct, ")"))) + scale_color_discrete(name = "Annotation Correct", labels = c(paste0("FALSE (n = ", incorrect, ")"), paste0("TRUE (n = ", correct, ")"))) + xlab("Prediction Score") +p1 + p2 +``` + +# Co-embedding scRNA-seq and scATAC-seq datasets + +In addition to transferring labels across modalities, it is also possible to visualize scRNA-seq and scATAC-seq cells on the same plot. We emphasize that this step is primarily for visualization, and is an optional step. Typically, when we perform integrative analysis between scRNA-seq and scATAC-seq datasets, we focus primarily on label transfer as described above. We demonstrate our workflows for co-embedding below, and again highlight that this is for demonstration purposes, especially as in this particular case both the scRNA-seq profiles and scATAC-seq profiles were actually measured in the same cells. + +In order to perform co-embedding, we first 'impute' RNA expression into the scATAC-seq cells based on the previously computed anchors, and then merge the datasets. + +```{r coembed} +# note that we restrict the imputation to variable genes from scRNA-seq, but could impute the +# full transcriptome if we wanted to +genes.use <- VariableFeatures(pbmc.rna) +refdata <- GetAssayData(pbmc.rna, assay = "RNA", slot = "data")[genes.use, ] + +# refdata (input) contains a scRNA-seq expression matrix for the scRNA-seq cells. imputation +# (output) will contain an imputed scRNA-seq matrix for each of the ATAC cells +imputation <- TransferData(anchorset = transfer.anchors, refdata = refdata, weight.reduction = pbmc.atac[["lsi"]], dims = 2:30) +pbmc.atac[["RNA"]] <- imputation + +coembed <- merge(x = pbmc.rna, y = pbmc.atac) + +# Finally, we run PCA and UMAP on this combined object, to visualize the co-embedding of both +# datasets +coembed <- ScaleData(coembed, features = genes.use, do.scale = FALSE) +coembed <- RunPCA(coembed, features = genes.use, verbose = FALSE) +coembed <- RunUMAP(coembed, dims = 1:30) + +DimPlot(coembed, group.by = c("orig.ident","seurat_annotations")) +``` + +```{r save.times, include = TRUE} +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/atacseq_integration_vignette.csv") +``` + +
+ **Session Info** +```{r} +sessionInfo() +``` +
diff --git a/vignettes/seurat5_atomic_integration.Rmd b/vignettes/seurat5_atomic_integration.Rmd new file mode 100644 index 000000000..fedce4754 --- /dev/null +++ b/vignettes/seurat5_atomic_integration.Rmd @@ -0,0 +1,191 @@ +--- +title: "Atomic sketch integration for scRNA-seq data" +output: + html_document: + df_print: paged +--- + +```{r setup, include=FALSE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + tidy.opts = list(width.cutoff = 95), + fig.width = 10, + message = FALSE, + warning = FALSE, + time_it = TRUE +) +``` + + +The recent increase in publicly available single-cell datasets poses a significant challenge for integrative analysis. For example, multiple tissues have now been profiled across dozens of studies, representing hundreds of individuals and millions of cells. In [Hao et al, 2022](https://www.biorxiv.org/content/10.1101/2022.02.24.481684v1) proposed a dictionary learning based method, atomic sketch integration, could also enable efficient and large-scale integrative analysis. Our procedure enables the integration of large compendiums of datasets without ever needing to load the full scale of data into memory at once. In [our manuscript](https://www.biorxiv.org/content/10.1101/2022.02.24.481684v1) we use atomic sketch integration to integrate millions of scRNA-seq from human lung and human PBMC. + +In this vignette, we demonstrate how to use atomic sketch integration to harmonize scRNA-seq experiments from five studies, each profiling of human immune cells (PBMC) from COVID patients. Specifically, we demonstrate how to perform the following steps + +* Sample a representative subset of cells ('atoms') from each dataset +* Integrate the atoms from each dataset +* Reconstruct (integrate) the full datasets, based on the atoms + +First, we install the updated version of Seurat that supports this infrastructure, as well as other packages necessary for this vignette. + +```{r install, eval=FALSE} +if (!requireNamespace("remotes", quietly = TRUE)) { + install.packages("remotes") +} +remotes::install_github("satijalab/seurat", "feat/dictionary") +``` + +```{r message=FALSE, warning=FALSE} +library(Seurat) +library(SeuratDisk) +library(patchwork) +``` + +## Downloading datasets + +We obtained datasets in h5seurat format from a public [resource compiled by the Gottardo Lab](https://atlas.fredhutch.org/fredhutch/covid/). In this analysis, we use the [Arunachalam](https://s3.us-west-2.amazonaws.com/atlas.fredhutch.org/data/hutch/covid19/downloads/arunachalam_2020_processed.HDF5), [Combes](https://s3.us-west-2.amazonaws.com/atlas.fredhutch.org/data/hutch/covid19/downloads/combes_2021_processed.HDF5), [Lee](https://s3.us-west-2.amazonaws.com/atlas.fredhutch.org/data/hutch/covid19/downloads/lee_2020_processed.HDF5), [Wilk](https://s3.us-west-2.amazonaws.com/atlas.fredhutch.org/data/hutch/covid19/downloads/wilk_2020_processed.HDF5), and [Yao](https://s3.us-west-2.amazonaws.com/atlas.fredhutch.org/data/hutch/covid19/downloads/yao_2021_processed.HDF5) datasets, but you can download additional data from this resource and include it in the vignette below. + +## Sample representative atoms from each dataset + +Inspired by pioneering work aiming to identify ['sketches'](https://www.sciencedirect.com/science/article/pii/S2405471219301528) of scRNA-seq data, our first step is to sample a representative set of cells from each dataset. We compute a leverage score (estimate of ['statistical leverage'](https://arxiv.org/abs/1109.3843)) for each cell, which helps to identify cells that are likely to be member of rare subpopulations and ensure that these are included in our representative sample. Importantly, the estimation of leverage scores only requires data normalization, can be computed efficiently for sparse datasets, and does not require any intensive computation or dimensional reduction steps. + +We load each object separately, perform basic preprocessing (normalization and variable feature selection), and select and store 5,000 representative cells (which we call 'atoms') from each dataset. We then delete the full dataset from memory, before loading the next one in. + +```{r init, results='hide', message=FALSE, fig.keep='none'} + +file.dir <- '/brahms/haoy/vignette_data/PBMCVignette/' +files.set <- c("arunachalam_2020_processed.HDF5", "combes_2021_processed.HDF5","lee_2020_processed.HDF5","wilk_2020_processed.HDF5","yao_2021_processed.HDF5") + +atoms.list <- list() +for (i in 1:length(files.set)) { + + # load in Seurat object + object <- LoadH5Seurat(file = paste0(file.dir ,files.set[i]), assays = 'RNA') + dataset_name <- gsub("_processed.HDF5", "", files.set[i]) + object$dataset <- dataset_name + + # Rename cells to avoid future conflicts + object <- RenameCells(object = object, add.cell.id = dataset_name) + + # basic preprocessing + object <- NormalizeData(object) + object <- FindVariableFeatures(object) + + # calculate leverage score and sample 5000 cells based on leverage score + atoms.i <- LeverageScoreSampling(object = object, num.cells = 5000) + atoms.list[[i]] <- atoms.i +} + +# delete full object from memory +# note that this is optional, if you can store the full datasets in memory, you dont have to reload them later +rm(object) +``` + + +## Perform integration on the atoms from different datasets + +Next we perform integrative analysis on the 'atoms' from each of the datasets. Here, we utilize a new wrapper function that takes a list of Seurat object and runs an optimized version of the [Fast integration using reciprocal PCA](https://satijalab.org/seurat/articles/integration_rpca.html) in Seurat workflow. The function performs all corrections in low-dimensional space (rather than on the expression values themselves) to further improve speed and memory usage, and outputs a merged Seurat object where all cells have been placed in an integrated low-dimensional space (stored as `integrated_dr`). We perform SCTransform normalization prior to performing integration, but this step is optional. + +However, we emphasize that you can perform integration here using any analysis technique that places cells across datasets into a shared space. For example, we also demonstrate below how to use [Harmony](https://github.com/immunogenomics/harmony), as an alternative integration approach. + + +```{r fast.integration} +# optional step: SCTransform normalization +for (i in 1:length(atoms.list)) { + atoms.list[[i]] <- SCTransform(atoms.list[[i]], verbose = FALSE) +} + +# perform integration +features <- SelectIntegrationFeatures(object.list = atoms.list) +atoms.merge <- FastRPCAIntegration(object.list = atoms.list, dims = 1:30, normalization.method = 'SCT', anchor.features = features) + +# we can generate a 2D visualization representing the integrated atoms +atom.reduction <- 'integrated_dr' +atoms.merge <- RunUMAP(atoms.merge, reduction = atom.reduction, dims = 1:30, return.model = TRUE) +DimPlot(atoms.merge, group.by = 'dataset') +``` + +
+ **Alternative: integrate atoms using Harmony** + +As an alternative approach to integrate atoms, and to demonstrate the flexibility of our atomic sketch procedure, we can also use the [Harmony within the Seurat workflow](https://github.com/immunogenomics/harmony) to integrate the atoms. The integration procedure returns a Seurat object with a low-dimensional space (stored as the `harmony` dimensional reduction) that jointly represents atoms from all datasets. + +```{r, eval = FALSE} +library(harmony) +atoms.merge <- merge(atoms.list[[1]], atoms.list[2:length(atoms.list)]) +VariableFeatures(atoms.merge) <- SelectIntegrationFeatures(object.list = atoms.list) +atoms.merge <- ScaleData(atoms.merge) +atoms.merge <- RunPCA(atoms.merge) +atoms.merge <- RunHarmony(atoms.merge, project.dim = FALSE, group.by.vars = 'dataset') +atom.reduction <- 'harmony' + +atoms.merge <- RunUMAP(atoms.merge, reduction = atom.reduction, dims = 1:30, return.model = TRUE) +DimPlot(atoms.merge, group.by = 'dataset') +``` + +
+ +--- + +## Integrate all cells from all datasets + +Now that we have integrated the subset of atoms of each dataset, placing them each in an integrated low-dimensional space, we can now place each cell from each dataset in this space as well. We load the full datasets back in individually, and use the `IntegrateSketchEmbeddings` function to integrate all cells. After this function is run, each cell in the object has a + +```{r load.full.data} +integrated_objects <- list() +for (i in 1:length(files.set)) { + + # load in Seurat object / basic preprocessing + object <- LoadH5Seurat(file = paste0(file.dir , files.set[i]), assays = 'RNA') + dataset_name <- gsub("_processed.HDF5", "", files.set[i]) + object$dataset <- dataset_name + object <- RenameCells(object = object, add.cell.id = dataset_name) + object <- NormalizeData(object) + + # Integrate all cells into the same space as the atoms + object <- IntegrateSketchEmbeddings(object = object, atom.sketch.object = atoms.merge, atom.sketch.reduction = atom.reduction, features = features) + + # At this point, you can save the results/delete the object + # Since we want to compute a joint visualization of all cells later, + # we save the object with the dimensional reduction and just the top 100 variable features + object <- DietSeurat(object, features = features[1:100], dimreducs = 'integrated_dr') + integrated_objects[[i]] <- object + rm(object) +} +``` + +We perform UMAP visualization on the integrated embeddings. +```{r } +obj.merge <- merge(integrated_objects[[1]], integrated_objects[2:length(integrated_objects)], merge.dr = 'integrated_dr') +obj.merge <- RunUMAP(obj.merge, reduction = 'integrated_dr', dims = 1:30) +``` + +Now we can visualize the results, plotting the scRNA-seq cells based on dataset batches and pre-annotated labels annotations on the UMAP embedding. We also add pre-computed cell annotations to this object (you can download the cell annotation metadata at [this link](https://seurat.nygenome.org/vignette_data/atomic_integration/pbmc_annotations.txt)). + +```{r split.dim} +annotation_data <- read.table("/brahms/haoy/vignette_data/PBMCVignette/pbmc_annotations.txt") +obj.merge <- AddMetaData(obj.merge, metadata = annotation_data) +DimPlot(obj.merge, reduction = "umap", group.by = "dataset", shuffle = TRUE, raster = FALSE) +DimPlot(obj.merge, reduction = "umap", group.by = "celltype.l2", raster = FALSE) +``` + +Note that Neutrophils are present primarily in a single dataset (Combes), present at very low frequency in two others (Wilk and Lee), and absent in the remaining datasets. Despite the fact that this population is not present in all samples, it is correctly integrated by our atomic sketch procedure. + + +
+ **Session Info** +```{r} +sessionInfo() +``` +
diff --git a/vignettes/seurat5_bridge_integration_vignette.Rmd b/vignettes/seurat5_bridge_integration_vignette.Rmd new file mode 100644 index 000000000..9561ec0a5 --- /dev/null +++ b/vignettes/seurat5_bridge_integration_vignette.Rmd @@ -0,0 +1,285 @@ +--- +title: "Dictionary Learning for cross-modality integration" +output: + html_document: + theme: united + df_print: kable + pdf_document: default +date: 'Compiled: `r format(Sys.Date(), "%B %d, %Y")`' +--- + +```{r setup, include=FALSE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now) + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + message = FALSE, + warning = FALSE, + time_it = TRUE +) +``` + + +In the same way that read mapping tools have transformed genome sequence analysis, the ability to map new datasets to established references represents an exciting opportunity for the field of single-cell genomics. Along with others in the community, we have developed [tools to map and interpret query datasets](https://satijalab.org/seurat/articles/multimodal_reference_mapping.html), and have also constructed a [set of scRNA-seq datasets for diverse mammalian tissues](http://azimuth.hubmapconsortium.org). + +A key challenge is to extend this reference mapping framework to technologies that do not measure gene expression, even if the underlying reference is based on scRNA-seq. In [Hao et al, bioRxiv 2022](https://www.biorxiv.org/content/10.1101/2022.02.24.481684v1), we introduce 'bridge integration', which enables the mapping of complementary technologies (like scATAC-seq, scDNAme, CyTOF), onto scRNA-seq references, using a 'multi-omic' dataset as a molecular bridge. In this vignette, we demonstrate how to map an scATAC-seq dataset of human PBMC, onto our previously constructed [PBMC reference](https://azimuth.hubmapconsortium.org/references/human_pbmc/). We use a publicly available 10x multiome dataset, which simultaneously measures gene expression and chromatin accessibility in the same cell, as a bridge dataset. + +In this vignette we demonstrate: + +* Loading in and pre-processing the scATAC-seq, multiome, and scRNA-seq reference datasets +* Mapping the scATAC-seq dataset via bridge integration +* Exploring and assessing the resulting annotations + +First, we install the updated version of Seurat that supports this infrastructure, as well as other packages necessary for this vignette. + +```{r, message=FALSE, warning=FALSE} +library(remotes) +remotes::install_github("satijalab/seurat", "feat/dictionary", quiet = TRUE) +library(Seurat) +library(SeuratDisk) +library(Signac) +library(EnsDb.Hsapiens.v86) +library(dplyr) +library(ggplot2) +``` + +## Load the bridge, query, and reference datasets + +We start by loading a 10x multiome dataset, consisting of ~12,000 PBMC from a healthy donor. The dataset measures RNA-seq and ATAC-seq in the same cell, and is available for download from 10x Genomics [here](https://www.10xgenomics.com/resources/datasets/pbmc-from-a-healthy-donor-granulocytes-removed-through-cell-sorting-10-k-1-standard-2-0-0). We follow the loading instructions from the [Signac package vignettes](https://satijalab.org/signac/articles/pbmc_multiomic.html). Note that when using Signac, please make sure you are using the [latest version of Bioconductor]([http://www.bioconductor.org/news/bioc_3_14_release/]), as [users have reported errors](https://github.com/timoast/signac/issues/687) when using older BioC versions. + +
+ **Load and setup the 10x multiome object** + +```{r} +# the 10x hdf5 file contains both data types. +inputdata.10x <- Read10X_h5("/brahms/haoy/vignette_data/pbmc_granulocyte_sorted_10k_filtered_feature_bc_matrix.h5") +# extract RNA and ATAC data +rna_counts <- inputdata.10x$`Gene Expression` +atac_counts <- inputdata.10x$Peaks +# Create Seurat object +obj.multi <- CreateSeuratObject(counts = rna_counts) +# Get % of mitochondrial genes +obj.multi[["percent.mt"]] <- PercentageFeatureSet(obj.multi, pattern = "^MT-") + +# add the ATAC-seq assay +grange.counts <- StringToGRanges(rownames(atac_counts), sep = c(":", "-")) +grange.use <- seqnames(grange.counts) %in% standardChromosomes(grange.counts) +atac_counts <- atac_counts[as.vector(grange.use), ] + +# Get gene annotations +annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86) +# Change style to UCSC +seqlevelsStyle(annotations) <- 'UCSC' +genome(annotations) <- "hg38" + +# File with ATAC per fragment information file +frag.file <- "/brahms/haoy/vignette_data/pbmc_granulocyte_sorted_10k_atac_fragments.tsv.gz" + +# Add in ATAC-seq data as ChromatinAssay object +chrom_assay <- CreateChromatinAssay( + counts = atac_counts, + sep = c(":", "-"), + genome = 'hg38', + fragments = frag.file, + min.cells = 10, + annotation = annotations +) + +# Add the ATAC assay to the multiome object +obj.multi[["ATAC"]] <- chrom_assay + +# Filter ATAC data based on QC metrics +obj.multi <- subset( + x = obj.multi, + subset = nCount_ATAC < 7e4 & + nCount_ATAC > 5e3 & + nCount_RNA < 25000 & + nCount_RNA > 1000 & + percent.mt < 20 +) + + +``` +
+ +--- + +The scATAC-seq query dataset represents ~10,000 PBMC from a healthy donor, and is available for download [here](https://www.10xgenomics.com/resources/datasets/10-k-human-pbm-cs-atac-v-1-1-chromium-x-1-1-standard-2-0-0). We load in the peak/cell matrix, store the path to the fragments file, and add gene annotations to the object, following the steps as with the ATAC data in the multiome experiment. + +We note that it is important to quantify the same set of genomic features in the query dataset as are quantified in the multi-omic bridge. We therefore requantify the set of scATAC-seq peaks using the `FeatureMatrix` command. This is also described in the [Signac vignettes](https://satijalab.org/signac/articles/integrate_atac.html) and shown below. + +
+ **Load and setup the 10x scATAC-seq query** + +```{r, message=FALSE, warning=FALSE} +# Load ATAC dataset +atac_pbmc_data <- Read10X_h5(filename = "/brahms/haoy/vignette_data/10k_PBMC_ATAC_nextgem_Chromium_X_filtered_peak_bc_matrix.h5") +fragpath <- "/brahms/haoy/vignette_data/10k_PBMC_ATAC_nextgem_Chromium_X_fragments.tsv.gz" + +# Get gene annotations +annotation <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86) + +# Change to UCSC style +seqlevelsStyle(annotation) <- 'UCSC' + +# Create ChromatinAssay for ATAC data +atac_pbmc_assay <- CreateChromatinAssay( + counts = atac_pbmc_data, + sep = c(":", "-"), + fragments = fragpath, + annotation = annotation +) + +# Requantify query ATAC to have same features as multiome ATAC dataset +requant_multiome_ATAC <- FeatureMatrix( + fragments = Fragments(atac_pbmc_assay), + features = granges(obj.multi[['ATAC']]), + cells = Cells(atac_pbmc_assay) +) + +# Create assay with requantified ATAC data +ATAC_assay <- CreateChromatinAssay( + counts = requant_multiome_ATAC, + fragments = fragpath, + annotation = annotation +) + +# Create Seurat sbject +obj.atac <- CreateSeuratObject(counts = ATAC_assay,assay = 'ATAC') +obj.atac[['peak.orig']] <- atac_pbmc_assay +obj.atac <- subset(obj.atac, subset = nCount_ATAC < 7e4 & nCount_ATAC > 2000) +``` +
+ +--- + +We load the reference (download [here](https://atlas.fredhutch.org/data/nygc/multimodal/pbmc_multimodal.h5seurat)) from our recent [paper](https://doi.org/10.1016/j.cell.2021.04.048). This reference is stored as an h5Seurat file, a format that enables on-disk storage of multimodal Seurat objects (more details on h5Seurat and `SeuratDisk` can be found [here](https://mojaveazure.github.io/seurat-disk/index.html)). + +```{r pbmc.ref} +obj.rna <- LoadH5Seurat("/brahms/haoy/seurat4_pbmc/pbmc_multimodal.h5seurat") +``` +
+ **What if I want to use my own reference dataset?** + +As an alternative to using a pre-built reference, you can also use your own reference. To demonstrate, you can download a scRNA-seq dataset of 23,837 human PBMC [here](https://www.dropbox.com/s/x8mu9ye2w3a63hf/20k_PBMC_scRNA.rds?dl=0), which we have already annotated. +```{r, message=FALSE, warning=FALSE, eval=FALSE} +obj.rna = readRDS("/path/to/reference.rds") +obj.rna = SCTransform(object = obj.rna) %>% RunPCA() %>% RunUMAP(dims = 1:50, return.model = TRUE) +``` +When using your own reference, set `reference.reduction = "pca"` in the `PrepareBridgeReference` function. + +
+ +--- + +# Preprocessing/normalization for all datasets + +Prior to performing bridge integration, we normalize and pre-process each of the datasets (note that the reference has already been normalized). We normalize gene expression data using [sctransform](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1874-1), and ATAC data using TF-IDF. + +```{r, message=FALSE, warning=FALSE} +# normalize multiome RNA +DefaultAssay(obj.multi) <- "RNA" +obj.multi <- SCTransform(obj.multi, verbose = FALSE) + +# normalize multiome ATAC +DefaultAssay(obj.multi) <- "ATAC" +obj.multi <- RunTFIDF(obj.multi) +obj.multi <- FindTopFeatures(obj.multi, min.cutoff = "q0") + +# normalize query +obj.atac <- RunTFIDF(obj.atac) + +``` + +## Map scATAC-seq dataset using bridge integration + +Now that we have the reference, query, and bridge datasets set up, we can begin integration. The bridge dataset enables translation between the scRNA-seq reference and the scATAC-seq query, effectively augmenting the reference so that it can map a new data type. We call this an extended reference, and first set it up. Note that you can save the results of this function and map multiple scATAC-seq datasets without having to rerun. + + +```{r, message=FALSE, warning=FALSE} +# Drop first dimension for ATAC reduction +dims.atac <- 2:50 +dims.rna <- 1:50 +DefaultAssay(obj.multi) <- "RNA" +DefaultAssay(obj.rna) <- "SCT" +obj.rna.ext <- PrepareBridgeReference(reference = obj.rna, + bridge = obj.multi, + reference.reduction = "spca", + reference.dims = dims.rna, + normalization.method = "SCT" +) +``` + +Now, we can directly find anchors between the extended reference and query objects. We use the `FindBridgeTransferAnchors` function, which translates the query dataset using the same dictionary as was used to translate the reference, and then identifies anchors in this space. The function is meant to mimic our `FindTransferAnchors` function, but to identify correspondences across modalities. + +```{r, message=FALSE, warning=FALSE} +bridge.anchor <- FindBridgeTransferAnchors(extended.reference = obj.rna.ext, + query = obj.atac, + reduction = "lsiproject", + dims = dims.atac +) +``` + + +Once we have identified anchors, we can map the query dataset onto the reference. The `MapQuery` function is the same as we have [previously introduced for reference mapping](https://satijalab.org/seurat/articles/multimodal_reference_mapping.html) . It transfers cell annotations from the reference dataset, and also visualizes the query dataset on a previously computed UMAP embedding. Since our reference dataset contains cell type annotations at three levels of resolution (l1 - l3), we can transfer each level to the query dataset. + + +```{r, message=FALSE, warning=FALSE} + +obj.atac <- MapQuery(anchorset = bridge.anchor, + reference = obj.rna, + query = obj.atac, + refdata = list( + l1 = "celltype.l1", + l2 = "celltype.l2", + l3 = "celltype.l3"), + reduction.model = "wnn.umap" +) +``` + +Now we can visualize the results, plotting the scATAC-seq cells based on their predicted annotations, on the reference UMAP embedding. You can see that each scATAC-seq cell has been assigned a cell name based on the scRNA-seq defined cell ontology. + +```{r, message=FALSE, warning=FALSE} +DimPlot(obj.atac, group.by = "predicted.l2", reduction = "ref.umap", label = TRUE) + ggtitle("ATAC") + NoLegend() +``` + +## Assessing the mapping + +To assess the mapping and cell type predictions, we will first see if the predicted cell type labels are concordant with an unsupervised analysis of the scATAC-seq dataset. We follow the standard unsupervised processing workflow for scATAC-seq data: + +```{r, message=FALSE, warning=FALSE} +obj.atac <- FindTopFeatures(obj.atac, min.cutoff = "q0") +obj.atac <- RunSVD(obj.atac) +obj.atac <- RunUMAP(obj.atac, reduction = "lsi", dims = 2:50) +``` + +Now, we visualize the predicted cluster labels on the unsupervised UMAP emebdding. We can see that predicted cluster labels (from the scRNA-seq reference) are concordant with the structure of the scATAC-seq data. However, there are some cell types (i.e. Treg), that do not appear to separate in unsupervised analysis. These may be prediction errors, or cases where the reference mapping provides additional resolution. + +```{r, pbmcdimplots, message=FALSE, warning=FALSE} +DimPlot(obj.atac, group.by = "predicted.l2", reduction = "umap", label = FALSE) +``` + +Lastly, we validate the predicted cell types for the scATAC-seq data by examining their chromatin accessibility profiles at canonical loci. We use the `CoveragePlot` function to visualize accessibility patterns at the CD8A, FOXP3, and RORC, after grouping cells by their predicted labels. We see expected patterns in each case. For example, the PAX5 locus exhibits peaks that are accessible exclusively in B cells, and the CD8A locus shows the same in CD8 T cell subsets. Similarly, the accessibility of FOXP3, a canonical marker of regulatory T cells (Tregs), in predicted Tregs provides strong support for the accuracy of our prediction. + +```{r, message=FALSE, warning=FALSE} +CoveragePlot(obj.atac, region = "PAX5", group.by = "predicted.l1", idents = c("B", "CD4 T", "Mono", "NK"), window = 200, extend.upstream = -150000) +CoveragePlot(obj.atac, region = "CD8A", group.by = "predicted.l2", idents = c("CD8 Naive", "CD4 Naive", "CD4 TCM", "CD8 TCM"), extend.downstream = 5000, extend.upstream = 5000) +CoveragePlot(obj.atac, region = "FOXP3", group.by = "predicted.l2", idents = c( "CD4 Naive", "CD4 TCM", "CD4 TEM", "Treg"), extend.downstream = 0, extend.upstream = 0) +CoveragePlot(obj.atac, region = "RORC", group.by = "predicted.l2", idents = c("CD8 Naive", "CD8 TEM", "CD8 TCM", "MAIT"), extend.downstream = 5000, extend.upstream = 5000) +``` + +
+ **Session Info** +```{r} +sessionInfo() +``` +
diff --git a/vignettes/seurat5_cell_cycle_vignette.Rmd b/vignettes/seurat5_cell_cycle_vignette.Rmd new file mode 100644 index 000000000..434af8f93 --- /dev/null +++ b/vignettes/seurat5_cell_cycle_vignette.Rmd @@ -0,0 +1,150 @@ +--- +title: "Cell-Cycle Scoring and Regression" +output: + html_document: + theme: united + df_print: kable + pdf_document: default +date: 'Compiled: `r Sys.Date()`' +--- + +```{r setup, include=TRUE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + tidy.opts = list(width.cutoff = 95), + warning = FALSE, + error = TRUE, + message = FALSE, + fig.width = 8, + time_it = TRUE +) +``` + +We demonstrate how to mitigate the effects of cell cycle heterogeneity in scRNA-seq data by calculating cell cycle phase scores based on canonical markers, and regressing these out of the data during pre-processing. We demonstrate this on a dataset of murine hematopoietic progenitors ([Nestorowa *et al*., Blood 2016](http://www.bloodjournal.org/content/early/2016/06/30/blood-2016-05-716480?sso-checked=true)).You can download the files needed to run this vignette [here](https://www.dropbox.com/s/3dby3bjsaf5arrw/cell_cycle_vignette_files.zip?dl=1). + +```{r initialize_object, results='hide'} +library(Seurat) +options(Seurat.object.assay.version = "v5") + +# Read in the expression matrix +# The first row is a header row, the first column is rownames +exp.mat <- read.table(file = "../data/nestorawa_forcellcycle_expressionMatrix.txt", header = TRUE, as.is = TRUE, row.names = 1) + +# A list of cell cycle markers, from Tirosh et al, 2015, is loaded with Seurat. +# We can segregate this list into markers of G2/M phase and markers of S phase +s.genes <- cc.genes$s.genes +g2m.genes <- cc.genes$g2m.genes + +# Create our Seurat object and complete the initalization steps +marrow <- CreateSeuratObject(counts = exp.mat) +marrow <- NormalizeData(marrow) +marrow <- FindVariableFeatures(marrow, selection.method = 'vst') +marrow <- ScaleData(marrow, features = rownames(marrow)) +``` + +If we run a PCA on our object, using the variable genes we found in `FindVariableFeatures()` above, we see that while most of the variance can be explained by lineage, PC8 and PC10 are split on cell-cycle genes including *TOP2A* and *MKI67*. We will attempt to regress this signal from the data, so that cell-cycle heterogeneity does not contribute to PCA or downstream analysis. + +```{r justification, message=TRUE} +marrow <- RunPCA(marrow, features = VariableFeatures(marrow), ndims.print = 6:10, + nfeatures.print = 10) +DimHeatmap(marrow, dims = c(8, 10)) +``` + +# Assign Cell-Cycle Scores + +First, we assign each cell a score, based on its expression of G2/M and S phase markers. These marker sets should be anticorrelated in their expression levels, and cells expressing neither are likely not cycling and in G1 phase. + +We assign scores in the `CellCycleScoring()` function, which stores S and G2/M scores in object meta data, along with the predicted classification of each cell in either G2M, S or G1 phase. `CellCycleScoring()` can also set the identity of the Seurat object to the cell-cycle phase by passing `set.ident = TRUE` (the original identities are stored as `old.ident`). Please note that Seurat does not use the discrete classifications (G2M/G1/S) in downstream cell cycle regression. Instead, it uses the quantitative scores for G2M and S phase. However, we provide our predicted classifications in case they are of interest. + +```{r cc_score} +marrow <- CellCycleScoring(marrow, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE) + +#view cell cycle scores and phase assignments +head(marrow[[]]) + +#Visualize the distribution of cell cycle markers across +RidgePlot(marrow, features = c("PCNA","TOP2A","MCM6","MKI67"), ncol = 2) + +#Running a PCA on cell cycle genes reveals, unsurprisingly, that cells separate entirely by phase +marrow <- RunPCA(marrow, features = c(s.genes, g2m.genes)) +DimPlot(marrow) +``` + +```{r save.img, include=TRUE} +library(ggplot2) +plot <- DimPlot(marrow) + + theme(axis.title = element_text(size = 18), legend.text = element_text(size = 18)) + + guides(colour = guide_legend(override.aes = list(size = 10))) +ggsave(filename = "../output/images/cell_cycle_vignette.jpg", height = 7, width = 12, plot = plot, quality = 50) +``` + +We score single cells based on the scoring strategy described in [Tirosh *et al*. 2016](http://science.sciencemag.org/content/352/6282/189). See `?AddModuleScore()` in Seurat for more information, this function can be used to calculate supervised module scores for any gene list. + +# Regress out cell cycle scores during data scaling + +We now attempt to subtract ('regress out') this source of heterogeneity from the data. For users of Seurat v1.4, this was implemented in `RegressOut`. However, as the results of this procedure are stored in the scaled data slot (therefore overwriting the output of `ScaleData()`), we now merge this functionality into the `ScaleData()` function itself. + +For each gene, Seurat models the relationship between gene expression and the S and G2M cell cycle scores. The scaled residuals of this model represent a 'corrected' expression matrix, that can be used downstream for dimensional reduction. + +```{r regress, results='hide'} +marrow <- ScaleData(marrow, vars.to.regress = c('S.Score', 'G2M.Score'), features = rownames(marrow)) +``` + +```{r pca2, message=TRUE} +# Now, a PCA on the variable genes no longer returns components associated with cell cycle +marrow <- RunPCA(marrow, features = VariableFeatures(marrow), nfeatures.print = 10) +``` + +```{r pca3} +#When running a PCA on only cell cycle genes, cells no longer separate by cell-cycle phase +marrow <- RunPCA(marrow, features = c(s.genes, g2m.genes)) +DimPlot(marrow) +``` + +As the best cell cycle markers are extremely well conserved across tissues and species, we have found this procedure to work robustly and reliably on diverse datasets. + +# Alternate Workflow + +The procedure above removes all signal associated with cell cycle. In some cases, we've found that this can negatively impact downstream analysis, particularly in differentiating processes (like murine hematopoiesis), where stem cells are quiescent and differentiated cells are proliferating (or vice versa). In this case, regressing out all cell cycle effects can blur the distinction between stem and progenitor cells as well. + +As an alternative, we suggest regressing out the **difference** between the G2M and S phase scores. This means that signals separating non-cycling cells and cycling cells will be maintained, but differences in cell cycle phase among proliferating cells (which are often uninteresting), will be regressed out of the data + +```{r regress_diff, results='hide'} +marrow$CC.Difference <- marrow$S.Score - marrow$G2M.Score +marrow <- ScaleData(marrow, vars.to.regress = 'CC.Difference', features = rownames(marrow)) +``` + +```{r pca4, message=TRUE} +#cell cycle effects strongly mitigated in PCA +marrow <- RunPCA(marrow, features = VariableFeatures(marrow), nfeatures.print = 10) +``` + +```{r pca5} +#when running a PCA on cell cycle genes, actively proliferating cells remain distinct from G1 cells +#however, within actively proliferating cells, G2M and S phase cells group together +marrow <- RunPCA(marrow, features = c(s.genes, g2m.genes)) +DimPlot(marrow) +``` + +```{r save.times, include = FALSE} +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/cell_cycle_vignette_times.csv") +``` + +
+ **Session Info** +```{r} +sessionInfo() +``` +
diff --git a/vignettes/seurat5_conversion_vignette.Rmd b/vignettes/seurat5_conversion_vignette.Rmd new file mode 100644 index 000000000..b0e5c8a12 --- /dev/null +++ b/vignettes/seurat5_conversion_vignette.Rmd @@ -0,0 +1,132 @@ +--- +title: "Interoperability between single-cell object formats" +output: + html_document: + theme: united + df_print: kable + pdf_document: default +date: 'Compiled: `r Sys.Date()`' +--- + +```{r setup, include=TRUE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + tidy.opts = list(width.cutoff = 95), + fig.width = 10, + message = FALSE, + warning = FALSE, + time_it = TRUE, + error = TRUE +) +``` + +```{r, include = FALSE, cache=FALSE} +options(SeuratData.repo.use = "http://satijalab04.nygenome.org") +``` +In this vignette, we demonstrate the ability to convert between Seurat objects, SingleCellExperiment objects, and anndata objects. + +```{r packages} +# install scater +# https://bioconductor.org/packages/release/bioc/html/scater.html +library(scater) +library(Seurat) +# install SeuratDisk from GitHub using the remotes package +# remotes::install_github(repo = 'mojaveazure/seurat-disk', ref = 'develop') +library(SeuratDisk) +library(SeuratData) +library(patchwork) +``` + +# Converting to/from `SingleCellExperiment` + +[`SingleCellExperiment`](https://bioconductor.org/packages/release/bioc/html/SingleCellExperiment.html) is a class for storing single-cell experiment data, created by Davide Risso, Aaron Lun, and Keegan Korthauer, and is used by many Bioconductor analysis packages. Here we demonstrate converting the Seurat object produced in our 3k PBMC tutorial to SingleCellExperiment for use with Davis McCarthy's [scater](https://bioconductor.org/packages/release/bioc/html/scater.html) package. + +```{r seurat_singlecell} +# Use PBMC3K from SeuratData +InstallData("pbmc3k") +pbmc <- LoadData(ds = "pbmc3k", type = "pbmc3k.final") +pbmc.sce <- as.SingleCellExperiment(pbmc) +p1 <- plotExpression(pbmc.sce, features = 'MS4A1', x = 'ident') + theme(axis.text.x = element_text(angle = 45, hjust = 1)) +p2 <- plotPCA(pbmc.sce, colour_by = 'ident') +p1 + p2 +``` + +Seurat also allows conversion from `SingleCellExperiment` objects to Seurat objects; we demonstrate this on some publicly available data downloaded from a repository maintained by [Martin Hemberg's group](http://www.sanger.ac.uk/science/groups/hemberg-group). + +```{r singlecell_seurat} +# download from hemberg lab +# https://scrnaseq-public-datasets.s3.amazonaws.com/scater-objects/manno_human.rds +manno <- readRDS(file = '../data/manno_human.rds') +manno <- runPCA(manno) +manno.seurat <- as.Seurat(manno, counts = 'counts', data = 'logcounts') +# gives the same results; but omits defaults provided in the last line +manno.seurat <- as.Seurat(manno) +Idents(manno.seurat) <- 'cell_type1' +p1 <- DimPlot(manno.seurat, reduction = 'PCA', group.by = 'Source') + NoLegend() +p2 <- RidgePlot(manno.seurat, features = 'ACTB', group.by = 'Source') +p1 + p2 +``` + +# Converting to/from `loom` + +The [`loom`](http://loompy.org/) format is a file structure imposed on [HDF5 files](http://portal.hdfgroup.org/display/support) designed by [Sten Linnarsson's](http://linnarssonlab.org/) group. It is designed to efficiently hold large single-cell genomics datasets. The ability to save Seurat objects as `loom` files is implemented in [SeuratDisk](https://mojaveazure.github.io/seurat-disk) For more details about the `loom` format, please see the [`loom` file format specification](http://linnarssonlab.org/loompy/format/index.html). + +```{r prepare_loom, echo=FALSE} +if (file.exists('../output/pbmc3k.loom')) { + file.remove('../output/pbmc3k.loom') +} +``` + +```{r seruat_loom} +pbmc.loom <- as.loom(pbmc, filename = '../output/pbmc3k.loom', verbose = FALSE) +pbmc.loom +# Always remember to close loom files when done +pbmc.loom$close_all() +``` + +Seurat can also read in `loom` files connected via [SeuratDisk](https://github.com/mojaveazure/seurat-disk) into a Seurat object; we demonstrate this on a subset of the [Mouse Brain Atlas](http://mousebrain.org/) created by the Linnarsson lab. + +```{r loom_seurat, fig.height=10} +# download from linnarsson lab +# https://storage.googleapis.com/linnarsson-lab-loom/l6_r1_immune_cells.loom +l6.immune <- Connect(filename = '../data/l6_r1_immune_cells.loom', mode = 'r') +l6.immune +l6.seurat <- as.Seurat(l6.immune) +Idents(l6.seurat) <- "ClusterName" +VlnPlot(l6.seurat, features = c('Sparc', 'Ftl1', 'Junb', 'Ccl4'), ncol = 2) +# Always remember to close loom files when done +l6.immune$close_all() +``` + +For more details about interacting with loom files in R and Seurat, please see [loomR on GitHub](https://github.com/mojaveazure/loomR). + +# Converting to/from `AnnData` + +[`AnnData`](https://anndata.readthedocs.io/en/latest/) provides a Python class, created by Alex Wolf and Philipp Angerer, that can be used to store single-cell data. This data format is also use for storage in their [Scanpy](https://scanpy.readthedocs.io/en/latest/index.html) package for which we now support interoperability. Support for reading data from and saving data to `AnnData` files is provided by [SeuratDisk](https://mojaveazure.github.io/seurat-disk); please see their [vignette](https://mojaveazure.github.io/seurat-disk/articles/convert-anndata.html) showcasing the interoperability. + +# Acknowledgments + +Many thanks to [Davis McCarthy](https://twitter.com/davisjmcc?ref_src=twsrc%5Egoogle%7Ctwcamp%5Eserp%7Ctwgr%5Eauthor) and [Alex Wolf](https://twitter.com/falexwolf) for their help in drafting the conversion functions. + +```{r save.times, include = FALSE} +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/conversion_vignette_times.csv") +``` + +
+ **Session Info** +```{r} +sessionInfo() +``` +
diff --git a/vignettes/seurat5_de_vignette.Rmd b/vignettes/seurat5_de_vignette.Rmd new file mode 100644 index 000000000..7aa962979 --- /dev/null +++ b/vignettes/seurat5_de_vignette.Rmd @@ -0,0 +1,144 @@ +--- +title: "Differential expression testing" +output: + html_document: + theme: united + df_print: kable + pdf_document: default +date: 'Compiled: `r Sys.Date()`' +--- + +```{r setup, include=TRUE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + tidy.opts = list(width.cutoff = 95), + message = FALSE, + warning = FALSE, + time_it = TRUE, + error = TRUE +) +``` + +```{r, include = FALSE} +options(SeuratData.repo.use = "http://satijalab04.nygenome.org") +``` + +# Load in the data + +This vignette highlights some example workflows for performing differential expression in Seurat. For demonstration purposes, we will be using the 2,700 PBMC object that is available via the [SeuratData](https://github.com/satijalab/seurat-data) package). + +```{r load_data} +library(Seurat) +library(SeuratData) +pbmc <- LoadData("pbmc3k", type = "pbmc3k.final") +pbmc <- UpdateSeuratObject(pbmc) +``` + +# Perform default differential expression tests + +The bulk of Seurat's differential expression features can be accessed through the `FindMarkers()` function. As a default, Seurat performs differential expression based on the non-parametric Wilcoxon rank sum test. This replaces the previous default test ('bimod'). To test for differential expression between two specific groups of cells, specify the `ident.1` and `ident.2` parameters. + +```{r basic_de} +# list options for groups to perform differential expression on +levels(pbmc) +# Find differentially expressed features between CD14+ and FCGR3A+ Monocytes +monocyte.de.markers <- FindMarkers(pbmc, ident.1 = "CD14+ Mono", ident.2 = "FCGR3A+ Mono") +# view results +head(monocyte.de.markers) +``` + +The results data frame has the following columns : + + * p_val : p_val (unadjusted) + * avg_log2FC : log fold-change of the average expression between the two groups. Positive values indicate that the feature is more highly expressed in the first group. + * pct.1 : The percentage of cells where the feature is detected in the first group + * pct.2 : The percentage of cells where the feature is detected in the second group + * p_val_adj : Adjusted p-value, based on Bonferroni correction using all features in the dataset. + +If the `ident.2` parameter is omitted or set to NULL, `FindMarkers()` will test for differentially expressed features between the group specified by `ident.1` and all other cells. + +```{r basic_de_2} +# Find differentially expressed features between CD14+ Monocytes and all other cells, only search for positive markers +monocyte.de.markers <- FindMarkers(pbmc, ident.1 = "CD14+ Mono", ident.2 = NULL, only.pos = TRUE) +# view results +head(monocyte.de.markers) +``` + +# Prefilter features or cells to increase the speed of DE testing + +To increase the speed of marker discovery, particularly for large datasets, Seurat allows for pre-filtering of features or cells. For example, features that are very infrequently detected in either group of cells, or features that are expressed at similar average levels, are unlikely to be differentially expressed. Example use cases of the `min.pct`, `logfc.threshold`, `min.diff.pct`, and `max.cells.per.ident` parameters are demonstrated below. + +```{r prefilter} +# Pre-filter features that are detected at <50% frequency in either CD14+ Monocytes or FCGR3A+ Monocytes +head(FindMarkers(pbmc, ident.1 = "CD14+ Mono", ident.2 = "FCGR3A+ Mono", min.pct = 0.5)) + +# Pre-filter features that have less than a two-fold change between the average expression of CD14+ Monocytes vs FCGR3A+ Monocytes +head(FindMarkers(pbmc, ident.1 = "CD14+ Mono", ident.2 = "FCGR3A+ Mono", logfc.threshold = log(2))) + +# Pre-filter features whose detection percentages across the two groups are similar (within 0.25) +head(FindMarkers(pbmc, ident.1 = "CD14+ Mono", ident.2 = "FCGR3A+ Mono", min.diff.pct = 0.25)) + +# Increasing min.pct, logfc.threshold, and min.diff.pct, will increase the speed of DE testing, but could also miss features that are prefiltered + +# Subsample each group to a maximum of 200 cells. Can be very useful for large clusters, or computationally-intensive DE tests +head(FindMarkers(pbmc, ident.1 = "CD14+ Mono", ident.2 = "FCGR3A+ Mono", max.cells.per.ident = 200)) +``` + +# Perform DE analysis using alternative tests + +The following differential expression tests are currently supported: + + * "wilcox" : Wilcoxon rank sum test (default) + * "bimod" : Likelihood-ratio test for single cell feature expression, [(McDavid et al., Bioinformatics, 2013)](https://www.ncbi.nlm.nih.gov/pubmed/23267174) + * "roc" : Standard AUC classifier + * "t" : Student's t-test + * "poisson" : Likelihood ratio test assuming an underlying negative binomial distribution. Use only for UMI-based datasets + * "negbinom" : Likelihood ratio test assuming an underlying negative binomial distribution. Use only for UMI-based datasets + * "LR" : Uses a logistic regression framework to determine differentially expressed genes. Constructs a logistic regression model predicting group membership based on each feature individually and compares this to a null model with a likelihood ratio test. + * "MAST" : GLM-framework that treates cellular detection rate as a covariate [(Finak et al, Genome Biology, 2015)](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4676162/) ([Installation instructions](https://github.com/RGLab/MAST)) + * "DESeq2" : DE based on a model using the negative binomial distribution [(Love et al, Genome Biology, 2014)](https://bioconductor.org/packages/release/bioc/html/DESeq2.html) ([Installation instructions](https://bioconductor.org/packages/release/bioc/html/DESeq2.html)) + +For MAST and DESeq2, please ensure that these packages are installed separately in order to use them as part of Seurat. Once installed, use the `test.use` parameter can be used to specify which DE test to use. + +```{r include = FALSE} +# necessary to get MAST to work properly +library(SingleCellExperiment) +``` + +```{r multiple test} +# Test for DE features using the MAST package +head(FindMarkers(pbmc, ident.1 = "CD14+ Mono", ident.2 = "FCGR3A+ Mono", test.use = "MAST")) + +# Test for DE features using the DESeq2 package. Throws an error if DESeq2 has not already been installed +# Note that the DESeq2 workflows can be computationally intensive for large datasets, but are incompatible with some feature pre-filtering options +# We therefore suggest initially limiting the number of cells used for testing +head(FindMarkers(pbmc, ident.1 = "CD14+ Mono", ident.2 = "FCGR3A+ Mono", test.use = "DESeq2", max.cells.per.ident = 50)) +``` + +# Acknowledgements + +We thank the authors of the MAST and DESeq2 packages for their kind assistance and advice. We also point users to the following [study](https://www.nature.com/articles/nmeth.4612) by Charlotte Soneson and Mark Robinson, which performs careful and extensive evaluation of methods for single cell differential expression testing. + +```{r save.times, include = FALSE} +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/de_vignette_times.csv") +``` + +
+ **Session Info** +```{r} +sessionInfo() +``` +
+ diff --git a/vignettes/seurat5_dim_reduction_vignette.Rmd b/vignettes/seurat5_dim_reduction_vignette.Rmd new file mode 100644 index 000000000..7c193e94a --- /dev/null +++ b/vignettes/seurat5_dim_reduction_vignette.Rmd @@ -0,0 +1,120 @@ +--- +title: "Seurat - Dimensional Reduction Vignette" +output: + html_document: + theme: united + df_print: kable +date: 'Compiled: `r format(Sys.Date(), "%B %d, %Y")`' +--- + +```{r setup, include=TRUE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + tidy.opts = list(width.cutoff = 95), + message = FALSE, + warning = FALSE, + time_it = TRUE, + error = TRUE +) +``` + +# Load in the data + +This vignette demonstrates how to store and interact with dimensional reduction information (such as the output from `RunPCA()`) in Seurat. For demonstration purposes, we will be using the 2,700 PBMC object that is available via the [SeuratData](https://github.com/satijalab/seurat-data) package. + +```{r load_data} +library(Seurat) +library(SeuratData) +pbmc <- LoadData("pbmc3k", type = "pbmc3k.final") +pbmc <- UpdateSeuratObject(pbmc) +``` + +# Explore the new dimensional reduction structure + +In Seurat v3.0, storing and interacting with dimensional reduction information has been generalized and formalized into the `DimReduc` object. Each dimensional reduction procedure is stored as a `DimReduc` object in the `object@reductions` slot as an element of a named list. Accessing these reductions can be done with the `[[` operator, calling the name of the reduction desired. For example, after running a principle component analysis with `RunPCA()`, `object[['pca']]` will contain the results of the PCA. By adding new elements to the list, users can add additional, and custom, dimensional reductions. Each stored dimensional reduction contains the following slots: + +1. **cell.embeddings**: stores the coordinates for each cell in low-dimensional space. +2. **feature.loadings**: stores the weight for each feature along each dimension of the embedding +3. **feature.loadings.projected**: Seurat typically calculate the dimensional reduction on a subset of genes (for example, high-variance genes), and then project that structure onto the entire dataset (all genes). The results of that projection (calculated with `ProjectDim()`) are stored in this slot. Note that the cell loadings will remain unchanged after projection but there are now feature loadings for all feature +4. **stdev**: The standard deviations of each dimension. Most often used with PCA (storing the square roots of the eigenvalues of the covariance matrix) and can be useful when looking at the drop off in the amount of variance that is explained by each successive dimension. +5. **key**: Sets the column names for the cell.embeddings and feature.loadings matrices. For example, for PCA, the column names are PC1, PC2, etc., so the key is "PC". +6. **jackstraw**: Stores the results of the jackstraw procedure run using this dimensional reduction technique. Currently supported only for PCA. +7. **misc**: Bonus slot to store any other information you might want + +To access these slots, we provide the `Embeddings()`,`Loadings()`, and `Stdev()` functions + +```{r explore} +pbmc[['pca']] +head(Embeddings(pbmc, reduction = "pca")[, 1:5]) +head(Loadings(pbmc, reduction = "pca")[, 1:5]) +head(Stdev(pbmc, reduction = "pca")) +``` + +Seurat provides `RunPCA()` (pca), and `RunTSNE()` (tsne), and representing dimensional reduction techniques commonly applied to scRNA-seq data. When using these functions, all slots are filled automatically. + +We also allow users to add the results of a custom dimensional reduction technique (for example, multi-dimensional scaling (MDS), or [zero-inflated factor analysis](https://github.com/epierson9/ZIFA)), that is computed separately. All you need is a matrix with each cell's coordinates in low-dimensional space, as shown below. + +# Storing a custom dimensional reduction calculation + +Though not incorporated as part of the Seurat package, its easy to run multidimensional scaling (MDS) in R. If you were interested in running MDS and storing the output in your Seurat object: + +```{r mds} +# Before running MDS, we first calculate a distance matrix between all pairs of cells. +# Here we use a simple euclidean distance metric on all genes, using scale.data as input +d <- dist(t(GetAssayData(pbmc, slot = 'scale.data'))) +# Run the MDS procedure, k determines the number of dimensions +mds <- cmdscale(d = d, k = 2) +# cmdscale returns the cell embeddings, we first label the columns to ensure downstream consistency +colnames(mds) <- paste0("MDS_", 1:2) +# We will now store this as a custom dimensional reduction called "mds" +pbmc[['mds']] <- CreateDimReducObject(embeddings = mds, key = 'MDS_', assay = DefaultAssay(pbmc)) + +# We can now use this as you would any other dimensional reduction in all downstream functions +DimPlot(pbmc, reduction = "mds", pt.size = 0.5) + +# If you wold like to observe genes that are strongly correlated with the first MDS coordinate +pbmc <- ProjectDim(pbmc, reduction = "mds") + +# Display the results as a heatmap +DimHeatmap(pbmc, reduction = "mds", dims = 1, cells = 500, projected = TRUE, balanced = TRUE) + +# Explore how the first MDS dimension is distributed across clusters +VlnPlot(pbmc, features = "MDS_1") + +# See how the first MDS dimension is correlated with the first PC dimension +FeatureScatter(pbmc, feature1 = "MDS_1", feature2 = "PC_1") +``` + + +```{r save.img, include=TRUE} +library(ggplot2) +plot <- DimPlot(pbmc, reduction = "mds", pt.size = 0.5) +ggsave(filename = "../output/images/pbmc_mds.jpg", height = 7, width = 12, plot = plot, quality = 50) +``` + +```{r save.times, include = FALSE} +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/dim_reduction_vignette_times.csv") +``` + +
+ **Session Info** +```{r} +sessionInfo() +``` +
+ + + + diff --git a/vignettes/seurat5_essential_commands.Rmd b/vignettes/seurat5_essential_commands.Rmd new file mode 100644 index 000000000..66ea4d068 --- /dev/null +++ b/vignettes/seurat5_essential_commands.Rmd @@ -0,0 +1,285 @@ +--- +title: "Seurat Command List" +output: + html_document: + theme: united + df_print: kable + pdf_document: default +date: 'Compiled: `r Sys.Date()`' +--- + +```{r setup, include=TRUE} +knitr::opts_chunk$set( + echo = TRUE, + tidy = TRUE, + tidy.opts = list(width.cutoff = 120), + message = FALSE, + warning = FALSE, + results = 'hold', + eval = FALSE, + error = TRUE +) +``` + +```{r load-data, echo=FALSE} +library(Seurat) +library(ggplot2) +pbmc <- readRDS(file = '~/Downloads/pbmc3k/pbmc3k_final.rds') +pbmc <- UpdateSeuratObject(pbmc) +cbmc.rna <- as.sparse(x = read.csv("~/Downloads/GSE100866_CBMC_8K_13AB_10X-RNA_umi.csv.gz", sep = ",", header = TRUE, row.names = 1)) + +# To make life a bit easier going forward, we're going to discard all but the top 100 most highly expressed mouse genes, and remove the "HUMAN_" from the CITE-seq prefix +cbmc.rna <- CollapseSpeciesExpressionMatrix(object = cbmc.rna) + +# Load in the ADT UMI matrix +cbmc.adt <- as.sparse(x = read.csv("~/Downloads/GSE100866_CBMC_8K_13AB_10X-ADT_umi.csv.gz", sep = "," ,header = TRUE, row.names = 1)) + +# When adding multimodal data to Seurat, it's okay to have duplicate feature names. Each set of modal data (eg. RNA, ADT, etc.) is stored in its own Assay object. +# One of these Assay objects is called the "default assay", meaning it's used for all analyses and visualization. +# To pull data from an assay that isn't the default, you can specify a key that's linked to an assay for feature pulling. +# To see all keys for all objects, use the Key function. +# Lastly, we observed poor enrichments for CCR5, CCR7, and CD10 - and therefore remove them from the matrix (optional) +cbmc.adt <- cbmc.adt[setdiff(x = rownames(x = cbmc.adt), y = c('CCR5', 'CCR7', 'CD10')), ] +``` + +# Seurat Standard Worflow + +The standard Seurat workflow takes raw single-cell expression data and aims to find clusters within the data. For full details, please read our tutorial. This process consists of data normalization and variable feature selection, data scaling, a PCA on variable features, construction of a shared-nearest-neighbors graph, and clustering using a modularity optimizer. Finally, we use a t-SNE to visualize our clusters in a two-dimensional space. + +```{r seurat-standard-workflow} +pbmc.counts <- Read10X(data.dir = "~/Downloads/pbmc3k/filtered_gene_bc_matrices/hg19/") +pbmc <- CreateSeuratObject(counts = pbmc.counts) +pbmc <- NormalizeData(object = pbmc) +pbmc <- FindVariableFeatures(object = pbmc) +pbmc <- ScaleData(object = pbmc) +pbmc <- RunPCA(object = pbmc) +pbmc <- FindNeighbors(object = pbmc) +pbmc <- FindClusters(object = pbmc) +pbmc <- RunTSNE(object = pbmc) +DimPlot(object = pbmc, reduction = 'tsne') +``` + +# `Seurat` Object Interaction + +Since Seurat v3.0, we’ve made improvements to the Seurat object, and added new methods for user interaction. We also introduce simple functions for common tasks, like subsetting and merging, that mirror standard R functions. + + + +```{r features-and-cells} +# Get cell and feature names, and total numbers +colnames(x = pbmc) +Cells(object = pbmc) +rownames(x = pbmc) +ncol(x = pbmc) +nrow(x = pbmc) +``` + +```{r idents} +# Get cell identity classes +Idents(object = pbmc) +levels(x = pbmc) + +# Stash cell identity classes +pbmc[['old.ident']] <- Idents(object = pbmc) +pbmc <- StashIdent(object = pbmc, save.name = 'old.ident') + +# Set identity classes +Idents(object = pbmc) <- 'CD4 T cells' +Idents(object = pbmc, cells = 1:10) <- 'CD4 T cells' + +# Set identity classes to an existing column in meta data +Idents(object = pbmc, cells = 1:10) <- 'orig.ident' +Idents(object = pbmc) <- 'orig.ident' + +# Rename identity classes +pbmc <- RenameIdents(object = pbmc, 'CD4 T cells' = 'T Helper cells') +``` + +```{r subsetting} +# Subset Seurat object based on identity class, also see ?SubsetData +subset(x = pbmc, idents = 'B cells') +subset(x = pbmc, idents = c('CD4 T cells', 'CD8 T cells'), invert = TRUE) + +# Subset on the expression level of a gene/feature +subset(x = pbmc, subset = MS4A1 > 3) + +# Subset on a combination of criteria +subset(x = pbmc, subset = MS4A1 > 3 & PC1 > 5) +subset(x = pbmc, subset = MS4A1 > 3, idents = 'B cells') + +# Subset on a value in the object meta data +subset(x = pbmc, subset = orig.ident == "Replicate1") + +# Downsample the number of cells per identity class +subset(x = pbmc, downsample = 100) +``` + +```{r merging, eval=FALSE} +# Merge two Seurat objects +merge(x = pbmc1, y = pbmc2) +# Merge more than two Seurat objects +merge(x = pbmc1, y = list(pbmc2, pbmc3)) +``` + +# Data Access + +Accessing data in Seurat is simple, using clearly defined accessors and setters to quickly find the data needed. + +```{r metadata} +# View metadata data frame, stored in object@meta.data +pbmc[[]] + +# Retrieve specific values from the metadata +pbmc$nCount_RNA +pbmc[[c('percent.mito', 'nFeature_RNA')]] + +# Add metadata, see ?AddMetaData +random_group_labels <- sample(x = c('g1', 'g2'), size = ncol(x = pbmc), replace = TRUE) +pbmc$groups <- random_group_labels +``` + +```{r expression-matrices, eval=FALSE} +# Retrieve or set data in an expression matrix ('counts', 'data', and 'scale.data') +GetAssayData(object = pbmc, slot = 'counts') +pbmc <- SetAssayData(object = pbmc, slot = 'scale.data', new.data = new.data) +``` + +```{r embeddings-loadings} +# Get cell embeddings and feature loadings +Embeddings(object = pbmc, reduction = 'pca') +Loadings(object = pbmc, reduction = 'pca') +Loadings(object = pbmc, reduction = 'pca', projected = TRUE) +``` + +```{r fetchdata} +# FetchData can pull anything from expression matrices, cell embeddings, or metadata +FetchData(object = pbmc, vars = c('PC_1', 'percent.mito', 'MS4A1')) +``` + +# Visualization in Seurat + +Seurat has a vast, ggplot2-based plotting library. All plotting functions will return a ggplot2 plot by default, allowing easy customization with ggplot2. + +```{r visualization} + +# Dimensional reduction plot for PCA or tSNE +DimPlot(object = pbmc, reduction = 'tsne') +DimPlot(object = pbmc, reduction = 'pca') + +# Dimensional reduction plot, with cells colored by a quantitative feature +FeaturePlot(object = pbmc, features = "MS4A1") + +# Scatter plot across single cells, replaces GenePlot +FeatureScatter(object = pbmc, feature1 = "MS4A1", feature2 = "PC_1") +FeatureScatter(object = pbmc, feature1 = "MS4A1", feature2 = "CD3D") + +# Scatter plot across individual features, repleaces CellPlot +CellScatter(object = pbmc, cell1 = "AGTCTACTAGGGTG", cell2 = "CACAGATGGTTTCT") + +VariableFeaturePlot(object = pbmc) + +#Violin and Ridge plots +VlnPlot(object = pbmc, features = c("LYZ", "CCL5", "IL32")) +RidgePlot(object = pbmc, feature = c("LYZ", "CCL5", "IL32")) + +# Heatmaps +DoHeatmap(object = pbmc,features = heatmap_markers) +DimHeatmap(object = pbmc,reduction = 'pca', cells = 200) + +# New things to try! +# Note that plotting functions now return ggplot2 objects, so you can add themes, titles, and options onto them +VlnPlot(object = pbmc, features = "MS4A1", split.by = "groups") +DotPlot(object = pbmc, features = c("LYZ", "CCL5", "IL32"), split.by = "groups") +FeaturePlot(object = pbmc, features = c("MS4A1", "CD79A"), blend = TRUE) +DimPlot(object = pbmc) + DarkTheme() +DimPlot(object = pbmc) + labs(title = '2,700 PBMCs clustered using Seurat and viewed\non a two-dimensional tSNE') +``` + +Seurat provides many prebuilt themes that can be added to ggplot2 plots for quick customization + +| Theme | Function | +| ----- | -------- | +| `DarkTheme` | Set a black background with white text | +| `FontSize` | Set font sizes for various elements of a plot | +| `NoAxes` | Remove axes and axis text | +| `NoLegend` | Remove all legend elements | +| `RestoreLegend` | Restores a legend after removal | +| `RotatedAxis` | Rotates x-axis labels | + +```{r helper-functions} + +# Plotting helper functions work with ggplot2-based scatter plots, such as DimPlot, FeaturePlot, CellScatter, and FeatureScatter +plot <- DimPlot(object = pbmc) + NoLegend() + +# HoverLocator replaces the former `do.hover` argument +# It can also show extra data throught the `information` argument, designed to work smoothly with FetchData +HoverLocator(plot = plot, information = FetchData(object = pbmc, vars = c("ident", "PC_1", "nFeature_RNA"))) + +# FeatureLocator replaces the former `do.identify` +select.cells <- FeatureLocator(plot = plot) + +# Label points on a ggplot object +LabelPoints(plot = plot, points = TopCells(object = pbmc[["pca"]]), repel = TRUE) +``` + +# Multi-Assay Features + +With Seurat, you can easily switch between different assays at the single cell level (such as ADT counts from CITE-seq, or integrated/batch-corrected data). Most functions now take an assay parameter, but you can set a Default Assay to avoid repetitive statements. + +```{r multi-assay} +cbmc <- CreateSeuratObject(counts = cbmc.rna) +# Add ADT data +cbmc[['ADT']] <- CreateAssayObject(counts = cbmc.adt) +# Run analyses by specifying the assay to use +NormalizeData(object = cbmc, assay = 'RNA') +NormalizeData(object = cbmc, assay = 'ADT', method = 'CLR') + +# Retrieve and set the default assay +DefaultAssay(object = cbmc) +DefaultAssay(object = cbmc) <- 'ADT' +DefaultAssay(object = cbmc) + +# Pull feature expression from both assays by using keys +FetchData(object = cbmc, vars = c('rna_CD3E', 'adt_CD3')) + +# Plot data from multiple assays using keys +FeatureScatter(object = cbmc, feature1 = "rna_CD3E", feature2 = "adt_CD3") +``` + +# Seurat v2.X vs v3.X + +| Seurat v2.X | Seurat v3.X | +| ----------- | ----------- | +| `object@data` | `GetAssayData(object = object)` | +| `object@raw.data` | `GetAssayData(object = object, slot = "counts")` | +| `object@scale.data` | `GetAssayData(object = object, slot = "scale.data")` | +| `object@cell.names` | `colnames(x = object)` | +| `rownames(x = object@data)` | `rownames(x = object)` | +| `object@var.genes` | `VariableFeatures(object = object)` | +| `object@hvg.info` | `HVFInfo(object = object)` | +| `object@assays$assay.name` | `object[["assay.name"]]` | +| `object@dr$pca` | `object[["pca"]]` | +| `GetCellEmbeddings(object = object, reduction.type = "pca")` | `Embeddings(object = object, reduction = "pca")` | +| `GetGeneLoadings(object = object, reduction.type = "pca")` | `Loadings(object = object, reduction = "pca")` | +| `AddMetaData(object = object, metadata = vector, col.name = "name")` | `object$name <- vector` | +| `object@meta.data$name` | `object$name` | +| `object@idents` | `Idents(object = object)` | +| `SetIdent(object = object, ident.use = "new.idents")` | `Idents(object = object) <- "new.idents"` | +| `SetIdent(object = object, cells.use = 1:10, ident.use = "new.idents")` | `Idents(object = object, cells = 1:10) <- "new.idents"` | +| `StashIdent(object = object, save.name = "saved.idents")` | `object$saved.idents <- Idents(object = object)` | +| `levels(x = object@idents)` | `levels(x = object)` | +| `RenameIdent(object = object, old.ident.name = "old.ident", new.ident.name = "new.ident")` | `RenameIdents(object = object, "old.ident" = "new.ident")` | +| `WhichCells(object = object, ident = "ident.keep")` | `WhichCells(object = object, idents = "ident.keep")` | +| `WhichCells(object = object, ident.remove = "ident.remove")` | `WhichCells(object = object, idents = "ident.remove", invert = TRUE)` | +| `WhichCells(object = object, max.cells.per.ident = 500)` | `WhichCells(object = object, downsample = 500)` | +| `WhichCells(object = object, subset.name = "name", low.threshold = low, high.threshold = high)` | `WhichCells(object = object, expression = name > low & name < high)` | +| `FilterCells(object = object, subset.names = "name", low.threshold = low, high.threshold = high)` | `subset(x = object, subset = name > low & name < high)` | +| `SubsetData(object = object, subset.name = "name", low.threshold = low, high.threshold = high)` | `subset(x = object, subset = name > low & name < high)` | +| `MergeSeurat(object1 = object1, object2 = object2)` | `merge(x = object1, y = object2)` | + +
+ **Session Info** +```{r} +sessionInfo() +``` +
diff --git a/vignettes/seurat5_extensions.Rmd b/vignettes/seurat5_extensions.Rmd new file mode 100644 index 000000000..787bdcdb6 --- /dev/null +++ b/vignettes/seurat5_extensions.Rmd @@ -0,0 +1,35 @@ +--- +title: "Seurat Extension Packages" +output: html_document +--- + +In addition to the core Seurat package, we provide several extensions that enhance the functionality and utility of Seurat. A brief description of each is listed below with links to more complete documentation and examples. + +# Signac + +Signac is an R toolkit that extends Seurat for the analysis, interpretation, and exploration of single-cell chromatin datasets. The software supports the following features: + +* Calculating single-cell QC metrics +* Dimensional reduction, visualization, and clustering +* Identifying cell type-specific peaks +* Visualizing ‘pseudo-bulk’ coverage tracks +* Integration with single-cell RNA-seq datasets + +For documentation and vignettes, click [here](https://satijalab.org/signac/). + +# SeuratData + +SeuratData is a mechanism for distributing datasets in the form of Seurat objects using R’s internal package and data management systems. It represents an easy way for users to get access to datasets that are used in the Seurat vignettes. For more information, click [here](https://github.com/satijalab/seurat-data). + +# SeuratWrappers + +In order to facilitate the use of community tools with Seurat, we provide the SeuratWrappers package, which contains code to run other analysis tools on Seurat objects. For a full list of supported packages and vignettes, please see our vignettes page. + +# SeuratDisk + +The SeuratDisk package introduces the h5Seurat file format for the storage and analysis of multimodal single-cell and spatially-resolved expression experiments. The SeuratDisk package provides functions to save Seurat objects as h5Seurat files, and functions for rapid on-disk conversion between h5Seurat and AnnData formats with the goal of enhancing interoperability between Seurat and Scanpy. For more information, click [here](https://mojaveazure.github.io/seurat-disk/) + +# Azimuth + +Azimuth is a web application that uses an annotated reference dataset to automate the processing, analysis, and interpretation of a new single-cell RNA-seq experiment. Azimuth leverages a 'reference-based mapping' pipeline that inputs a counts matrix of gene expression in single cells, and performs normalization, visualization, cell annotation, and differential expression (biomarker discovery). All results can be explored within the app, and easily downloaded for additional downstream analysis. To use the Azimuth web app, visit the Azimuth website [here](https://azimuth.hubmapconsortium.org/). + diff --git a/vignettes/seurat5_future_vignette.Rmd b/vignettes/seurat5_future_vignette.Rmd new file mode 100644 index 000000000..cdbddf3ad --- /dev/null +++ b/vignettes/seurat5_future_vignette.Rmd @@ -0,0 +1,133 @@ +--- +title: "Parallelization in Seurat with future" +output: + html_document: + theme: united + df_print: kable +date: 'Compiled: `r format(Sys.Date(), "%B %d, %Y")`' +--- + +```{r setup, include=TRUE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + tidy.opts = list(width.cutoff = 95), + message = FALSE, + warning = FALSE, + time_it = TRUE, + error = TRUE +) +``` +In Seurat, we have chosen to use the `future` framework for parallelization. In this vignette, we will demonstrate how you can take advantage of the `future` implementation of certain Seurat functions from a user's perspective. If you are interested in learning more about the `future` framework beyond what is described here, please see the package vignettes [here](https://cran.r-project.org/web/packages/future/index.html) for a comprehensive and detailed description. + +# How to use parallelization in Seurat + +To access the parallel version of functions in Seurat, you need to load the `future` package and set the `plan`. The `plan` will specify how the function is executed. The default behavior is to evaluate in a non-parallelized fashion (sequentially). To achieve parallel (asynchronous) behavior, we typically recommend the "multiprocess" strategy. By default, this uses all available cores but you can set the `workers` parameter to limit the number of concurrently active futures. + +```{r future.setup} +library(future) +# check the current active plan +plan() +# change the current plan to access parallelization +plan("multiprocess", workers = 4) +plan() +``` + +# 'Futurized' functions in Seurat + +The following functions have been written to take advantage of the future framework and will be parallelized if the current `plan` is set appropriately. Importantly, the way you call the function shouldn't change. + +* `NormalizeData()` +* `ScaleData()` +* `JackStraw()` +* `FindMarkers()` +* `FindIntegrationAnchors()` +* `FindClusters()` - if clustering over multiple resolutions + +For example, to run the parallel version of `FindMarkers()`, you simply need to set the plan and call the function as usual. + +```{r demo} +library(Seurat) +data("pbmc3k.final") +pbmc <- UpdateSeuratObject(pbmc3k.final) + +# Enable parallelization +plan('multiprocess', workers = 4) +markers <- FindMarkers(pbmc, ident.1 = "NK", verbose = FALSE) +``` + +# Comparison of sequential vs. parallel + +Here we'll perform a brief comparison the runtimes for the same function calls with and without parallelization. Note that while we expect that using a parallelized strategy will decrease the runtimes of the functions listed above, the magnitude of that decrease will depend on many factors (e.g. the size of the dataset, the number of workers, specs of the system, the future strategy, etc). The following benchmarks were performed on a desktop computer running Ubuntu 16.04.5 LTS with an Intel(R) Core(TM) i7-6800K CPU @ 3.40GHz and 96 GB of RAM. + +
+ **Click to see bencharking code** + +```{r compare} +timing.comparisons <- data.frame(fxn = character(), time = numeric(), strategy = character()) +plan("sequential") +start <- Sys.time() +pbmc <- ScaleData(pbmc, vars.to.regress = "percent.mt", verbose = FALSE) +end <- Sys.time() +timing.comparisons <- rbind(timing.comparisons, data.frame(fxn = "ScaleData", time = as.numeric(end - start, units = "secs"), strategy = "sequential")) + +start <- Sys.time() +markers <- FindMarkers(pbmc, ident.1 = "NK", verbose = FALSE) +end <- Sys.time() +timing.comparisons <- rbind(timing.comparisons, data.frame(fxn = "FindMarkers", time = as.numeric(end - start, units = "secs"), strategy = "sequential")) + +plan("multiprocess", workers = 4) +start <- Sys.time() +pbmc <- ScaleData(pbmc, vars.to.regress = "percent.mt", verbose = FALSE) +end <- Sys.time() +timing.comparisons <- rbind(timing.comparisons, data.frame(fxn = "ScaleData", time = as.numeric(end - start, units = "secs"), strategy = "multiprocess")) + +start <- Sys.time() +markers <- FindMarkers(pbmc, ident.1 = "NK", verbose = FALSE) +end <- Sys.time() +timing.comparisons <- rbind(timing.comparisons, data.frame(fxn = "FindMarkers", time = as.numeric(end - start, units = "secs"), strategy = "multiprocess")) +``` + +
+ +```{r viz.compare} +library(ggplot2) +library(cowplot) +ggplot(timing.comparisons, aes(fxn, time)) + geom_bar(aes(fill = strategy), stat = "identity", position = "dodge") + ylab("Time(s)") + xlab("Function") + theme_cowplot() +``` + +# Frequently asked questions + +1. **Where did my progress bar go?** +
Unfortunately, the when running these functions in any of the parallel plan modes you will lose the progress bar. This is due to some technical limitations in the `future` framework and R generally. If you want to monitor function progress, you'll need to forgo parallelization and use `plan("sequential")`. + +2. **What should I do if I keep seeing the following error?** +``` +Error in getGlobalsAndPackages(expr, envir = envir, globals = TRUE) : + The total size of the X globals that need to be exported for the future expression ('FUN()') is X GiB. + This exceeds the maximum allowed size of 500.00 MiB (option 'future.globals.maxSize'). The X largest globals are ... +``` +For certain functions, each worker needs access to certain global variables. If these are larger than the default limit, you will see this error. To get around this, you can set `options(future.globals.maxSize = X)`, where X is the maximum allowed size in bytes. So to set it to 1GB, you would run `options(future.globals.maxSize = 1000 * 1024^2)`. Note that this will increase your RAM usage so set this number mindfully. + + +```{r save.times, include = FALSE} +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/future_vignette_times.csv") +``` + +
+ **Session Info** +```{r} +sessionInfo() +``` +
diff --git a/vignettes/seurat5_get_started.Rmd b/vignettes/seurat5_get_started.Rmd new file mode 100644 index 000000000..998c6cc9d --- /dev/null +++ b/vignettes/seurat5_get_started.Rmd @@ -0,0 +1,159 @@ +--- +title: "Getting Started with Seurat" +output: + html_document: + theme: united + df_print: kable + pdf_document: default +--- + +```{r fxns, include = FALSE} +library('htmlTable') +make_list <- function(items) { + paste0("
    ", sprintf('
  • %s
  • ', items), '
', collapse = '') +} +make_href <- function(url, text){ + paste0("") +} +make_href2 <- function(url, text){ + paste0("", text, "") +} +process_entry <- function(dat) { + if (grepl(pattern = "https://satijalab.org/img/vignette_images", x = dat$image)) { + img <- paste0('![](', dat$image, '){width=3000px}') + } else if (grepl(pattern = "assets/", x= dat$image)) { + img <- paste0('![](', dat$image, '){width=3000px}') + } else { + img <- paste0('![](', '../output/images/', dat$image, '){width=3000px}') + } + if (grepl(pattern = "https://satijalab.org/", x = dat$name)) { + link <- dat$name + } else { + link <- paste0(dat$name, ".html") + } + go.button <- paste0('GO') + data.frame( + title = make_href(url = link, text = dat$title), + img = img, + desc = dat$summary, + btn = go.button + ) +} +process_wrapper_entry <- function(dat) { + data.frame( + Package = dat$name, + Vignette = make_href2(url = dat$link, text = dat$title), + Reference = make_href2(url = dat$reference, text = dat$citation), + Source = make_href2(url = dat$source, text = dat$source) + ) +} +make_vignette_card_section <- function(vdat, cat) { + vignettes <- vdat[[cat]]$vignettes + dat <- data.frame(title = character(), img = character(), desc = character()) + for (v in 1:length(x = vignettes)) { + dat <- rbind(dat, process_entry(vignettes[[v]])) + if(nrow(x = dat) == 3 | v == length(x = vignettes)){ + colnames(dat) <- NULL + dat <- t(dat) + if (ncol(x = dat) == 2) { + print(htmlTable( + dat, + align = '|l|l|', + css.cell = "padding-left: .75em; width: 50%", + css.class = "two-column-htmltable" + )) + } else if (ncol(x = dat) == 1){ + print(htmlTable( + dat, + align = '|l|', + css.cell = "padding-left: .75em; width: 100%", + css.class = "one-column-htmltable" + )) + } else { + print(htmlTable( + dat, + align = '|l|l|l|', + css.cell = "padding-left: .75em; width: 30%" + )) + } + dat <- data.frame(title = character(), img = character(), desc = character()) + } + } +} +``` + +```{r yaml, include = TRUE} +library(yaml) +vdat <- read_yaml(file = "vignettes.yaml") +``` + +```{=html} + +``` + +We provide a series of vignettes, tutorials, and analysis walkthroughs to help users get started with Seurat. You can also check out our [Reference page](../reference/index.html) which contains a full list of functions available to users. + +# Introductory Vignettes + +For new users of Seurat, we suggest starting with a guided walk through of a dataset of 2,700 Peripheral Blood Mononuclear Cells (PBMCs) made publicly available by 10X Genomics. This tutorial implements the major components of a standard unsupervised clustering workflow including QC and data filtration, calculation of high-variance genes, dimensional reduction, graph-based clustering, and the identification of cluster markers. + +We provide additional introductory vignettes for users who are interested in analyzing multimodal single-cell datasets (e.g. from CITE-seq, or the 10x mulitome kit), or spatial datasets (e.g. from 10x visium or SLIDE-seq). + +```{r results='asis', echo=FALSE, warning=FALSE, message = FALSE} +make_vignette_card_section(vdat = vdat, cat = 1) +``` + +# Data Integration + +Recently, we have developed [computational methods](https://www.cell.com/cell/fulltext/S0092-8674(19)30559-8) for integrated analysis of single-cell datasets generated across different conditions, technologies, or species. As an example, we provide a guided walk through for integrating and comparing PBMC datasets generated under different stimulation conditions. We provide additional vignettes demonstrating how to leverage an annotated scRNA-seq reference to map and label cells from a query, and to efficiently integrate large datasets. + +```{r results='asis', echo=FALSE, warning=FALSE, message = FALSE} +make_vignette_card_section(vdat = vdat, cat = 2) +``` + +# Additional New Methods + +Seurat also offers additional novel statistical methods for analyzing single-cell data. These include: + +* Weighted-nearest neighbor (WNN) analysis: to define cell state based on multiple modalities [[paper](https://doi.org/10.1016/j.cell.2021.04.048)] +* Mixscape: to analyze data from pooled single-cell CRISPR screens [[paper](https://doi.org/10.1038/s41588-021-00778-2)] +* SCTransform: Improved normalization for single-cell RNA-seq data [[paper](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1874-1)]] +* SCTransform, v2 regularization [[paper](https://www.biorxiv.org/content/10.1101/2021.07.07.451498v1.full)]] + +```{r results='asis', echo=FALSE, warning=FALSE, message = FALSE} +make_vignette_card_section(vdat = vdat, cat = 3) +``` + +# Other + +Here we provide a series of short vignettes to demonstrate a number of features that are commonly used in Seurat. We’ve focused the vignettes around questions that we frequently receive from users. Click on a vignette to get started. + +```{r results='asis', echo=FALSE, warning=FALSE, message = FALSE} +make_vignette_card_section(vdat = vdat, cat = 4) +``` + +# SeuratWrappers + +In order to facilitate the use of community tools with Seurat, we provide the Seurat Wrappers package, which contains code to run other analysis tools on Seurat objects. For the initial release, we provide wrappers for a few packages in the table below but would encourage other package developers interested in interfacing with Seurat to check out our contributor guide [here](https://github.com/satijalab/seurat.wrappers/wiki/Submission-Process). + +```{r results='asis', echo=FALSE, warning=FALSE, message = FALSE} +library(knitr) +library(kableExtra) +cat <- 5 +vignettes <- vdat[[cat]]$vignettes +dat <- data.frame(Package = character(), Vignette = character(), Reference = character(), Source = character()) +for (v in 1:length(x = vignettes)) { + dat <- rbind(dat, process_wrapper_entry(vignettes[[v]])) +} +dat %>% + kable(format = "html", escape = FALSE) %>% + kable_styling(bootstrap_options = c("striped", "hover")) +``` diff --git a/vignettes/seurat5_hashing_vignette.Rmd b/vignettes/seurat5_hashing_vignette.Rmd new file mode 100644 index 000000000..5893cbac5 --- /dev/null +++ b/vignettes/seurat5_hashing_vignette.Rmd @@ -0,0 +1,291 @@ +--- +title: "Demultiplexing with hashtag oligos (HTOs)" +output: + html_document: + theme: united + df_print: kable + pdf_document: default +date: 'Compiled: `r Sys.Date()`' +--- + +```{r setup, include=TRUE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + tidy.opts = list(width.cutoff = 95), + message = FALSE, + warning = FALSE, + time_it = TRUE, + error = TRUE +) +``` + +Developed in collaboration with the Technology Innovation Group at NYGC, Cell Hashing uses oligo-tagged antibodies against ubiquitously expressed surface proteins to place a "sample barcode" on each single cell, enabling different samples to be multiplexed together and run in a single experiment. For more information, please refer to this [paper](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-018-1603-1). + +This vignette will give a brief demonstration on how to work with data produced with Cell Hashing in Seurat. Applied to two datasets, we can successfully demultiplex cells to their the original sample-of-origin, and identify cross-sample doublets. + +
+
The demultiplexing function `HTODemux()` implements the following procedure:
    +
  • We perform a k-medoid clustering on the normalized HTO values, which initially separates cells into K(# of samples)+1 clusters.
  • +
  • We calculate a 'negative' distribution for HTO. For each HTO, we use the cluster with the lowest average value as the negative group.
  • +
  • For each HTO, we fit a negative binomial distribution to the negative cluster. We use the 0.99 quantile of this distribution as a threshold.
  • +
  • Based on these thresholds, each cell is classified as positive or negative for each HTO.
  • +
  • Cells that are positive for more than one HTOs are annotated as doublets.
  • +
+ + +# 8-HTO dataset from human PBMCs + +
+ +
Dataset description:
+
    +
  • Data represent peripheral blood mononuclear cells (PBMCs) from eight different donors.
  • +
  • Cells from each donor are uniquely labeled, using CD45 as a hashing antibody.
  • +
  • Samples were subsequently pooled, and run on a single lane of the the 10X Chromium v2 system. +
  • You can download the count matrices for RNA and HTO [here](https://www.dropbox.com/sh/ntc33ium7cg1za1/AAD_8XIDmu4F7lJ-5sp-rGFYa?dl=0), or the FASTQ files from [GEO](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE108313)
  • +
+ +
+ +## Basic setup + +Load packages + +```{r load_pacakges} +library(Seurat) +options(Seurat.object.assay.version = "v5") +``` + +Read in data + +```{r read_Data} +# Load in the UMI matrix +pbmc.umis <- readRDS("../data/pbmc_umi_mtx.rds") + +# For generating a hashtag count matrix from FASTQ files, please refer to https://github.com/Hoohm/CITE-seq-Count. +# Load in the HTO count matrix +pbmc.htos <- readRDS("../data/pbmc_hto_mtx.rds") + +# Select cell barcodes detected by both RNA and HTO +# In the example datasets we have already filtered the cells for you, but perform this step for clarity. +joint.bcs <- intersect(colnames(pbmc.umis), colnames(pbmc.htos)) + +# Subset RNA and HTO counts by joint cell barcodes +pbmc.umis <- pbmc.umis[, joint.bcs] +pbmc.htos <- as.matrix(pbmc.htos[, joint.bcs]) + +# Confirm that the HTO have the correct names +rownames(pbmc.htos) +``` + +Setup Seurat object and add in the HTO data + +```{r hashtag_setup} +# Setup Seurat object +pbmc.hashtag <- CreateSeuratObject(counts = pbmc.umis) + +# Normalize RNA data with log normalization +pbmc.hashtag <- NormalizeData(pbmc.hashtag) +# Find and scale variable features +pbmc.hashtag <- FindVariableFeatures(pbmc.hashtag, selection.method = 'mean.var.plot') +pbmc.hashtag <- ScaleData(pbmc.hashtag, features = VariableFeatures(pbmc.hashtag)) +``` + +## Adding HTO data as an independent assay + +You can read more about working with multi-modal data [here](multimodal_vignette.html) + +```{r hto_assay} +# Add HTO data as a new assay independent from RNA +pbmc.hashtag[['HTO']] <- CreateAssay5Object(counts = pbmc.htos) +# Normalize HTO data, here we use centered log-ratio (CLR) transformation +pbmc.hashtag <- NormalizeData(pbmc.hashtag, assay = 'HTO', normalization.method = 'CLR') +``` + +## Demultiplex cells based on HTO enrichment + +Here we use the Seurat function `HTODemux()` to assign single cells back to their sample origins. + +```{r hashtag_demux, results = FALSE} +# If you have a very large dataset we suggest using k_function = "clara". This is a k-medoid clustering function for large applications +# You can also play with additional parameters (see documentation for HTODemux()) to adjust the threshold for classification +# Here we are using the default settings +pbmc.hashtag <- HTODemux(pbmc.hashtag, assay = "HTO", positive.quantile = 0.99) +``` + +## Visualize demultiplexing results + +Output from running `HTODemux()` is saved in the object metadata. We can visualize how many cells are classified as singlets, doublets and negative/ambiguous cells. + +```{r demux_summary} +# Global classification results +table(pbmc.hashtag$HTO_classification.global) +``` + +Visualize enrichment for selected HTOs with ridge plots + +```{r hashtag_ridge, fig.width=9} +# Group cells based on the max HTO signal +Idents(pbmc.hashtag) <- 'HTO_maxID' +RidgePlot(pbmc.hashtag, assay = 'HTO', features = rownames(pbmc.hashtag[['HTO']])[1:2], ncol = 2) +``` + +Visualize pairs of HTO signals to confirm mutual exclusivity in singlets + +```{r hashtag_scatter1, fig.height=8, fig.width=9} +FeatureScatter(pbmc.hashtag, feature1 = 'hto_HTO_A', feature2 = 'hto_HTO_B') +``` + +Compare number of UMIs for singlets, doublets and negative cells +```{r hashtag_vln, fig.width=10} +Idents(pbmc.hashtag) <- 'HTO_classification.global' +VlnPlot(pbmc.hashtag, features = 'nCount_RNA', pt.size = 0.1, log = TRUE) +``` + +Generate a two dimensional tSNE embedding for HTOs.Here we are grouping cells by singlets and doublets for simplicity. + +```{r hashtag_sub_tsne, fig.width=9} +#First, we will remove negative cells from the object +pbmc.hashtag.subset <- subset(pbmc.hashtag, idents = 'Negative', invert = TRUE) + +# Calculate a tSNE embedding of the HTO data +DefaultAssay(pbmc.hashtag.subset) <- "HTO" +pbmc.hashtag.subset <- ScaleData(pbmc.hashtag.subset, features = rownames(pbmc.hashtag.subset), verbose = FALSE) +pbmc.hashtag.subset <- RunPCA(pbmc.hashtag.subset, features = rownames(pbmc.hashtag.subset), approx = FALSE) +pbmc.hashtag.subset <- RunTSNE(pbmc.hashtag.subset, dims = 1:8, perplexity = 100) +DimPlot(pbmc.hashtag.subset) +#You can also visualize the more detailed classification result by running Idents(object) <- 'HTO_classification' before plotting. Here, you can see that each of the small clouds on the tSNE plot corresponds to one of the 28 possible doublet combinations. +``` + +Create an HTO heatmap, based on Figure 1C in the Cell Hashing paper. + +```{r hashtag_heatmap, fig.width=12} +# To increase the efficiency of plotting, you can subsample cells using the num.cells argument +HTOHeatmap(pbmc.hashtag, assay = 'HTO', ncells = 5000) +``` + +Cluster and visualize cells using the usual scRNA-seq workflow, and examine for the potential presence of batch effects. + +```{r hastag_cluster} +# Extract the singlets +pbmc.singlet <- subset(pbmc.hashtag, idents = 'Singlet') + +# Select the top 1000 most variable features +pbmc.singlet <- FindVariableFeatures(pbmc.singlet, selection.method = 'mean.var.plot') + +# Scaling RNA data, we only scale the variable features here for efficiency +pbmc.singlet <- ScaleData(pbmc.singlet, features = VariableFeatures(pbmc.singlet)) + +# Run PCA +pbmc.singlet <- RunPCA(pbmc.singlet, features = VariableFeatures(pbmc.singlet)) +``` + +```{r hashtag_tsne, fig.width=9} +# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot +pbmc.singlet <- FindNeighbors(pbmc.singlet, reduction = 'pca', dims = 1:10) +pbmc.singlet <- FindClusters(pbmc.singlet, resolution = 0.6, verbose = FALSE) +pbmc.singlet <- RunTSNE(pbmc.singlet, reduction = 'pca', dims = 1:10) + +# Projecting singlet identities on TSNE visualization +DimPlot(pbmc.singlet, group.by = "HTO_classification") +``` + +# 12-HTO dataset from four human cell lines + +
+ +
Dataset description:
+
    +
  • Data represent single cells collected from four cell lines: HEK, K562, KG1 and THP1
  • +
  • Each cell line was further split into three samples (12 samples in total).
  • +
  • Each sample was labeled with a hashing antibody mixture (CD29 and CD45), pooled, and run on a single lane of 10X.
  • +
  • Based on this design, we should be able to detect doublets both across and within cell types
  • +
  • You can download the count matrices for RNA and HTO [here](https://www.dropbox.com/sh/c5gcjm35nglmvcv/AABGz9VO6gX9bVr5R2qahTZha?dl=0), and are available on GEO [here](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE108313)
  • +
+
+ +## Create Seurat object, add HTO data and perform normalization + +```{r hto_setup} +library(Seurat) +options(Seurat.object.assay.version = "v5") + +# Read in UMI count matrix for RNA +hto12.umis <- readRDS("/brahms/hartmana/seurat_site_builder/seurat/data/hto12_umi_mtx.rds") + +# Read in HTO count matrix +hto12.htos <- readRDS("/brahms/hartmana/seurat_site_builder/seurat/data/hto12_hto_mtx.rds") + +# Select cell barcodes detected in both RNA and HTO +cells.use <- intersect(rownames(hto12.htos), colnames(hto12.umis)) + +# Create Seurat object and add HTO data +hto12 <- CreateSeuratObject(counts = as(hto12.umis[, cells.use], "dgCMatrix"), min.features = 300) +hto12[['HTO']] <- CreateAssay5Object(counts = t(x = hto12.htos[colnames(hto12), 1:12])) + +# Normalize data +hto12 <- NormalizeData(hto12) +hto12 <- NormalizeData(hto12, assay = "HTO", normalization.method = "CLR") +``` + +## Demultiplex data + +```{r demux, results = FALSE} +hto12 <- HTODemux(hto12, assay = "HTO", positive.quantile = 0.99) +``` + +## Visualize demultiplexing results + +Distribution of selected HTOs grouped by classification, displayed by ridge plots + +```{r ridgeplot, fig.height=10, fig.width=9} +RidgePlot(hto12, assay = 'HTO', features = c("HEK-A","K562-B","KG1-A","THP1-C"), ncol = 2) +``` + +Visualize HTO signals in a heatmap + +```{r heatmap, fig.width=12} +HTOHeatmap(hto12, assay = "HTO") +``` + +## Visualize RNA clustering + +
  • Below, we cluster the cells using our standard scRNA-seq workflow. As expected we see four major clusters, corresponding to the cell lines
  • +
  • In addition, we see small clusters in between, representing mixed transcriptomes that are correctly annotated as doublets.
  • +
  • We also see within-cell type doublets, that are (perhaps unsurprisingly) intermixed with singlets of the same cell type
  • + +```{r hto_sub_tsne, fig.width=9} +# Remove the negative cells +hto12 <- subset(hto12, idents = 'Negative', invert = TRUE) + +# Run PCA on most variable features +hto12 <- FindVariableFeatures(hto12, selection.method = 'mean.var.plot') +hto12 <- ScaleData(hto12, features = VariableFeatures(hto12)) +hto12 <- RunPCA(hto12) +hto12 <- RunTSNE(hto12, dims = 1:5, perplexity = 100) +DimPlot(hto12) + NoLegend() +``` + +```{r save.times, include = TRUE} +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/hashing_vignette_times.csv") +``` + +
    + **Session Info** +```{r} +sessionInfo() +``` +
    diff --git a/vignettes/seurat5_install.Rmd b/vignettes/seurat5_install.Rmd new file mode 100644 index 000000000..b5e3f39eb --- /dev/null +++ b/vignettes/seurat5_install.Rmd @@ -0,0 +1,90 @@ +--- +title: "Installation Instructions for Seurat" +output: html_document +--- + +To install Seurat, [R](https://www.r-project.org/) version 4.0 or greater is required. We also recommend installing [R Studio](https://www.rstudio.com/). + +# Install from CRAN + +Seurat is available on [CRAN](https://cran.r-project.org/package=Seurat) for all platforms. To install, run: + +```{r eval = FALSE} +# Enter commands in R (or R studio, if installed) +install.packages('Seurat') +library(Seurat) +``` + +If you see the warning message below, enter `y`: + +```{r eval=FALSE} +package which is only available in source form, and may need compilation of C/C++/Fortran: 'Seurat' +Do you want to attempt to install these from sources? +y/n: +``` + +# Install previous versions of Seurat + +## Install any version 3 release + +Any of the Seurat version 3 releases can be installed with the following command: + +```{r eval = FALSE} +remotes::install_version("Seurat", version = "3.X.X") +``` + +## Install the last version 2 release (2.3.4) + +To facilitate easy re-installation of the last version 2 release, we are hosting the binaries on our website. These can be installed with the following command: + +```{r eval = FALSE} +source("https://z.umn.edu/archived-seurat") +``` + +
    + View the script + +
    + +## Older versions of Seurat +Old versions of Seurat, from Seurat v2.0.1 and up, are hosted in CRAN's archive. To install an old version of Seurat, run: + +```{r eval = FALSE} +# Enter commands in R (or R studio, if installed) +# Install the remotes package +install.packages('remotes') +# Replace '2.3.0' with your desired version +remotes::install_version(package = 'Seurat', version = package_version('2.3.0')) +library(Seurat) +``` + +For versions of Seurat older than those not hosted on CRAN (versions 1.3.0 and 1.4.0), please download the packaged source code from our [releases page](https://github.com/satijalab/seurat/releases) and [install from the tarball](https://stackoverflow.com/questions/4739837/how-do-i-install-an-r-package-from-the-source-tarball-on-windows). + +# Install the development version of Seurat + +Install the development version of Seurat - directly from [GitHub](https://github.com/satijalab/seurat/tree/develop). + +```{r eval = FALSE} +# Enter commands in R (or R studio, if installed) +# Install the remotes package +install.packages('remotes') +remotes::install_github(repo = 'satijalab/seurat', ref = 'develop') +library(Seurat) +``` + +# Docker + +We provide docker images for Seurat via [dockerhub](https://hub.docker.com/r/satijalab/seurat). + +To pull the latest image from the command line: + +```sh +docker pull satijalab/seurat:latest +``` + +To use as a base image in a new Dockerfile: + +```sh +FROM satijalab/seurat:latest +``` + diff --git a/vignettes/seurat5_integration_introduction.Rmd b/vignettes/seurat5_integration_introduction.Rmd new file mode 100644 index 000000000..43e828531 --- /dev/null +++ b/vignettes/seurat5_integration_introduction.Rmd @@ -0,0 +1,269 @@ +--- +title: 'Introduction to scRNA-seq integration' +output: + html_document: + theme: united + pdf_document: default +date: 'Compiled: `r format(Sys.Date(), "%B %d, %Y")`' +--- + +```{r setup, include=TRUE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + tidy.opts = list(width.cutoff = 95), + fig.width = 10, + message = FALSE, + warning = FALSE, + time_it = TRUE, + error = TRUE +) +``` + +# Introduction to scRNA-seq integration + +The joint analysis of two or more single-cell datasets poses unique challenges. In particular, identifying cell populations that are present across multiple datasets can be problematic under standard workflows. Seurat v4 includes a set of methods to match (or ‘align’) shared cell populations across datasets. These methods first identify cross-dataset pairs of cells that are in a matched biological state (‘anchors’), can be used both to correct for technical differences between datasets (i.e. batch effect correction), and to perform comparative scRNA-seq analysis of across experimental conditions. + +Below, we demonstrate methods for scRNA-seq integration as described in [Stuart\*, Butler\* et al, 2019](https://www.cell.com/cell/fulltext/S0092-8674(19)30559-8) to perform a comparative analysis of human immune cells (PBMC) in either a [resting or interferon-stimulated state](https://www.nature.com/articles/nbt.4042). + +## Integration goals + +The following tutorial is designed to give you an overview of the kinds of comparative analyses on complex cell types that are possible using the Seurat integration procedure. Here, we address a few key goals: + +* Create an 'integrated' data assay for downstream analysis +* Identify cell types that are present in both datasets +* Obtain cell type markers that are conserved in both control and stimulated cells +* Compare the datasets to find cell-type specific responses to stimulation + +## Setup the Seurat objects + +For convenience, we distribute this dataset through our [SeuratData](https://github.com/satijalab/seurat-data) package. + +```{r, include = TRUE} +options(SeuratData.repo.use = "http://satijalab04.nygenome.org") +``` + +```{r data} +library(Seurat) +options(Seurat.object.assay.version = "v5") +library(SeuratData) +library(patchwork) +``` + +```{r installdata, eval=FALSE} +# install dataset +InstallData('ifnb') +``` + +```{r init, results='hide', message=FALSE, fig.keep='none'} +# load dataset +LoadData('ifnb') +ifnb <- UpdateSeuratObject(ifnb) +ifnb[["RNA"]] <- CreateAssay5Object(ifnb[["RNA"]]@counts) + +# split the dataset into a list of two seurat objects (stim and CTRL) +ifnb.list <- SplitObject(ifnb, split.by = "stim") + +# normalize and identify variable features for each dataset independently +ifnb.list <- lapply(X = ifnb.list, FUN = function(x) { + x <- NormalizeData(x) + x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000) +}) + +# select features that are repeatedly variable across datasets for integration +features <- SelectIntegrationFeatures(object.list = ifnb.list) +``` + +## Perform integration + +We then identify anchors using the `FindIntegrationAnchors()` function, which takes a list of Seurat objects as input, and use these anchors to integrate the two datasets together with `IntegrateData()`. + +```{r find.anchors} +immune.anchors <- FindIntegrationAnchors(object.list = ifnb.list, anchor.features = features) +``` + +```{r integrate.data} +# this command creates an 'integrated' data assay +immune.combined <- IntegrateData(anchorset = immune.anchors) +``` + +## Perform an integrated analysis + +Now we can run a single integrated analysis on all cells! + +```{r clustering, results='hide', message=FALSE} +# specify that we will perform downstream analysis on the corrected data +# note that the original unmodified data still resides in the 'RNA' assay +DefaultAssay(immune.combined) <- "integrated" + +# Run the standard workflow for visualization and clustering +immune.combined <- ScaleData(immune.combined, verbose = FALSE) +immune.combined <- RunPCA(immune.combined, npcs = 30, verbose = FALSE) +immune.combined <- RunUMAP(immune.combined, reduction = "pca", dims = 1:30) +immune.combined <- FindNeighbors(immune.combined, reduction = "pca", dims = 1:30) +immune.combined <- FindClusters(immune.combined, resolution = 0.5) +``` + +```{r viz, results='hide', message=FALSE} +# Visualization +p1 <- DimPlot(immune.combined, reduction = "umap", group.by = "stim") +p2 <- DimPlot(immune.combined, reduction = "umap", label = TRUE, repel = TRUE) +p1 + p2 +``` + +To visualize the two conditions side-by-side, we can use the `split.by` argument to show each condition colored by cluster. + +```{r split.dim} +DimPlot(immune.combined, reduction = "umap", split.by = "stim") +``` + +## Identify conserved cell type markers + +To identify canonical cell type marker genes that are conserved across conditions, we provide the `FindConservedMarkers()` function. This function performs differential gene expression testing for each dataset/group and combines the p-values using meta-analysis methods from the MetaDE R package. For example, we can calculated the genes that are conserved markers irrespective of stimulation condition in cluster 6 (NK cells). + +```{r conserved.markers, warning=FALSE} +# For performing differential expression after integration, we switch back to the original data +DefaultAssay(immune.combined) <- "RNA" +nk.markers <- FindConservedMarkers(immune.combined, ident.1 = 6, grouping.var = "stim", verbose = FALSE) +head(nk.markers) +``` + +We can explore these marker genes for each cluster and use them to annotate our clusters as specific cell types. + +```{r annotate, results = 'hide', message=FALSE, fig.height = 8} +FeaturePlot(immune.combined, features = c("CD3D", "SELL", "CREM", "CD8A", "GNLY", "CD79A", "FCGR3A", "CCL2", "PPBP"), min.cutoff = "q9") +immune.combined <- RenameIdents(immune.combined, "0" = "CD14 Mono", "1" = "CD4 Naive T", "2" = "CD4 Memory T", "3" = "CD16 Mono", "4" = "B", "5" = "CD8 T", "6" = "NK" , "7" = "T activated", "8" = "DC", "9" = "B Activated", "10" = "Mk", "11" = "pDC", "12" = "Eryth", "13" = "Mono/Mk Doublets", "14" = "HSPC") +DimPlot(immune.combined, label = TRUE) +``` + +The `DotPlot()` function with the `split.by` parameter can be useful for viewing conserved cell type markers across conditions, showing both the expression level and the percentage of cells in a cluster expressing any given gene. Here we plot 2-3 strong marker genes for each of our 14 clusters. + + +```{r splitdotplot, fig.height = 10} +Idents(immune.combined) <- factor( + Idents(immune.combined), + levels = c("HSPC", "Mono/Mk Doublets", "pDC", "Eryth","Mk", "DC", "CD14 Mono", "CD16 Mono", "B Activated", "B", "CD8 T", "NK", "T activated", "CD4 Naive T", "CD4 Memory T")) +markers.to.plot <- c("CD3D","CREM","HSPH1","SELL","GIMAP5","CACYBP","GNLY","NKG7","CCL5","CD8A","MS4A1","CD79A","MIR155HG","NME1","FCGR3A","VMO1","CCL2","S100A9","HLA-DQA1","GPR183","PPBP","GNG11","HBA2","HBB","TSPAN13","IL3RA","IGJ","PRSS57") +DotPlot(immune.combined, features = markers.to.plot, cols = c('blue', 'red'), dot.scale = 8, split.by = "stim") + RotatedAxis() +``` + +```{r save.img, include=TRUE} +library(ggplot2) +plot <- DotPlot(immune.combined, features = markers.to.plot, cols = c('blue', 'red'), + dot.scale = 6, split.by = "stim") + RotatedAxis() +ggsave(filename = "../output/images/pbmc_alignment.jpg", height = 7, width = 12, plot = plot, quality = 50) +``` + +### Identify differential expressed genes across conditions + +Now that we've aligned the stimulated and control cells, we can start to do comparative analyses and look at the differences induced by stimulation. One way to look broadly at these changes is to plot the average expression of both the stimulated and control cells and look for genes that are visual outliers on a scatter plot. Here, we take the average expression of both the stimulated and control naive T cells and CD14 monocyte populations and generate the scatter plots, highlighting genes that exhibit dramatic responses to interferon stimulation. + +```{r scatterplots, results = 'hide', message=FALSE} +library(ggplot2) +library(cowplot) +theme_set(theme_cowplot()) +t.cells <- subset(immune.combined, idents = "CD4 Naive T") +Idents(t.cells) <- "stim" +avg.t.cells <- as.data.frame(log1p(AverageExpression(t.cells, verbose = FALSE)$RNA)) +avg.t.cells$gene <- rownames(avg.t.cells) + +cd14.mono <- subset(immune.combined, idents = "CD14 Mono") +Idents(cd14.mono) <- "stim" +avg.cd14.mono <- as.data.frame(log1p(AverageExpression(cd14.mono, verbose = FALSE)$RNA)) +avg.cd14.mono$gene <- rownames(avg.cd14.mono) + +genes.to.label = c("ISG15", "LY6E", "IFI6", "ISG20", "MX1", "IFIT2", "IFIT1", "CXCL10", "CCL8") +p1 <- ggplot(avg.t.cells, aes(CTRL, STIM)) + geom_point() + ggtitle("CD4 Naive T Cells") +p1 <- LabelPoints(plot = p1, points = genes.to.label, repel = TRUE) +p2 <- ggplot(avg.cd14.mono, aes(CTRL, STIM)) + geom_point() + ggtitle("CD14 Monocytes") +p2 <- LabelPoints(plot = p2, points = genes.to.label, repel = TRUE) +p1 + p2 +``` + +As you can see, many of the same genes are upregulated in both of these cell types and likely represent a conserved interferon response pathway. + +Because we are confident in having identified common cell types across condition, we can ask what genes change in different conditions for cells of the same type. First, we create a column in the meta.data slot to hold both the cell type and stimulation information and switch the current ident to that column. Then we use `FindMarkers()` to find the genes that are different between stimulated and control B cells. Notice that many of the top genes that show up here are the same as the ones we plotted earlier as core interferon response genes. Additionally, genes like CXCL10 which we saw were specific to monocyte and B cell interferon response show up as highly significant in this list as well. + +```{r de.genes} +immune.combined$celltype.stim <- paste(Idents(immune.combined), immune.combined$stim, sep = "_") +immune.combined$celltype <- Idents(immune.combined) +Idents(immune.combined) <- "celltype.stim" +b.interferon.response <- FindMarkers(immune.combined, ident.1 = "B_STIM", ident.2 = "B_CTRL", verbose = FALSE) +head(b.interferon.response, n = 15) +``` + +Another useful way to visualize these changes in gene expression is with the `split.by` option to the `FeaturePlot()` or `VlnPlot()` function. This will display FeaturePlots of the list of given genes, split by a grouping variable (stimulation condition here). Genes such as CD3D and GNLY are canonical cell type markers (for T cells and NK/CD8 T cells) that are virtually unaffected by interferon stimulation and display similar gene expression patterns in the control and stimulated group. IFI6 and ISG15, on the other hand, are core interferon response genes and are upregulated accordingly in all cell types. Finally, CD14 and CXCL10 are genes that show a cell type specific interferon response. CD14 expression decreases after stimulation in CD14 monocytes, which could lead to misclassification in a supervised analysis framework, underscoring the value of integrated analysis. CXCL10 shows a distinct upregulation in monocytes and B cells after interferon stimulation but not in other cell types. + +```{r feature.heatmaps, fig.height = 14} +FeaturePlot(immune.combined, features = c("CD3D", "GNLY", "IFI6"), split.by = "stim", max.cutoff = 3, cols = c("grey", "red")) +``` + +```{r splitvln, fig.height = 12} +plots <- VlnPlot(immune.combined, features = c("LYZ", "ISG15", "CXCL10"), split.by = "stim", group.by = "celltype", pt.size = 0, combine = FALSE) +wrap_plots(plots = plots, ncol = 1) +``` + +```{r save, include=TRUE} +saveRDS(immune.combined, file = "../output/immune.combined.rds") +``` + +# Performing integration on datasets normalized with SCTransform + +In [Hafemeister and Satija, 2019](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1874-1), we introduced an improved method for the normalization of scRNA-seq, based on regularized negative binomial regression. The method is named 'sctransform', and avoids some of the pitfalls of standard normalization workflows, including the addition of a pseudocount, and log-transformation. You can read more about sctransform in the [manuscript](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1874-1) or our [SCTransform vignette](sctransform_vignette.html). + +Below, we demonstrate how to modify the Seurat integration workflow for datasets that have been normalized with the sctransform workflow. The commands are largely similar, with a few key differences: + +* Normalize datasets individually by `SCTransform()`, instead of `NormalizeData()` prior to integration +* As discussed further in our [SCTransform vignette](sctransform_vignette.html), we typically use 3,000 or more features for analysis downstream of sctransform. +* Run the `PrepSCTIntegration()` function prior to identifying anchors +* When running `FindIntegrationAnchors()`, and `IntegrateData()`, set the `normalization.method` parameter to the value `SCT`. +* When running sctransform-based workflows, including integration, do not run the `ScaleData()` function + + +```{r panc8.cca.sct.init, results='hide', message=FALSE, fig.keep='none'} +LoadData('ifnb') +ifnb.list <- SplitObject(ifnb, split.by = "stim") +ifnb.list <- lapply(X = ifnb.list, FUN = SCTransform) +features <- SelectIntegrationFeatures(object.list = ifnb.list, nfeatures = 3000) +ifnb.list <- PrepSCTIntegration(object.list = ifnb.list, anchor.features = features) +``` + +```{r ifnb.cca.sct.anchors} +immune.anchors <- FindIntegrationAnchors(object.list = ifnb.list, normalization.method = 'SCT', anchor.features = features) +immune.combined.sct <- IntegrateData(anchorset = immune.anchors, normalization.method = 'SCT') +``` + +```{r ifnb.cca.sct.clustering, results='hide', message=FALSE} +immune.combined.sct <- RunPCA(immune.combined.sct, verbose = FALSE) +immune.combined.sct <- RunUMAP(immune.combined.sct, reduction = "pca", dims = 1:30) +``` + +```{r immunesca.cca.sct.split.dims} +p1 <- DimPlot(immune.combined.sct, reduction = "umap", group.by = "stim") +p2 <- DimPlot(immune.combined.sct, reduction = "umap", group.by = 'seurat_annotations',label = TRUE, repel = TRUE) +p1 + p2 +``` + +Now that the datasets have been integrated, you can follow the previous steps in this vignette identify cell types and cell type-specific responses. + +```{r save.times, include=TRUE} +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/integration_introduction.csv") +``` + +
    + **Session Info** +```{r} +sessionInfo() +``` +
    diff --git a/vignettes/seurat5_integration_large_datasets.Rmd b/vignettes/seurat5_integration_large_datasets.Rmd new file mode 100644 index 000000000..8801c7faa --- /dev/null +++ b/vignettes/seurat5_integration_large_datasets.Rmd @@ -0,0 +1,119 @@ +--- +title: "Tips for integrating large datasets" +output: + html_document: + theme: united + df_print: kable + pdf_document: default +date: 'Compiled: `r Sys.Date()`' +--- + +```{r setup, include=TRUE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + tidy.opts = list(width.cutoff = 85), + fig.width = 10, + message = FALSE, + warning = FALSE, + time_it = TRUE, + error = TRUE +) +``` + +```{r, include=TRUE} +options(SeuratData.repo.use = "http://satijalab04.nygenome.org") +``` + +For very large datasets, the standard integration workflow can sometimes be prohibitively computationally expensive. In this workflow, we employ two options that can improve efficiency and runtimes: + +1. Reciprocal PCA (RPCA) +2. Reference-based integration + +The main efficiency improvements are gained in `FindIntegrationAnchors()`. First, we use reciprocal PCA (RPCA) instead of CCA, to identify an effective space in which to find anchors. When determining anchors between any two datasets using reciprocal PCA, we project each dataset into the others PCA space and constrain the anchors by the same mutual neighborhood requirement. All downstream integration steps remain the same and we are able to 'correct' (or harmonize) the datasets. + +Additionally, we use reference-based integration. In the standard workflow, we identify anchors between all pairs of datasets. While this gives datasets equal weight in downstream integration, it can also become computationally intensive. For example when integrating 10 different datasets, we perform 45 different pairwise comparisons. As an alternative, we introduce here the possibility of specifying one or more of the datasets as the 'reference' for integrated analysis, with the remainder designated as 'query' datasets. In this workflow, we do not identify anchors between pairs of query datasets, reducing the number of comparisons. For example, when integrating 10 datasets with one specified as a reference, we perform only 9 comparisons. Reference-based integration can be applied to either log-normalized or SCTransform-normalized datasets. + +This alternative workflow consists of the following steps: + +* Create a list of Seurat objects to integrate +* Perform normalization, feature selection, and scaling separately for each dataset +* Run PCA on each object in the list +* Integrate datasets, and proceed with joint analysis + +In general, we observe strikingly similar results between the standard workflow and the one demonstrated here, with substantial reduction in compute time and memory. However, if the datasets are highly divergent (for example, cross-modality mapping or cross-species mapping), where only a small subset of features can be used to facilitate integration, and you may observe superior results using CCA. + +For this example, we will be using the "Immune Cell Atlas" data from the Human Cell Atlas which can be found [here](https://data.humancellatlas.org/explore/projects?filter=%5B%7B%22facetName%22:%22organ%22,%22terms%22:%5B%22immune%20system%22%5D%7D%5D&catalog=dcp1). + +```{r libs} +library(Seurat) +``` + +After acquiring the data, we first perform standard normalization and variable feature selection. + +```{r hca.full.1} +bm280k.data <- Read10X_h5("../data/ica_bone_marrow_h5.h5") +bm280k <- CreateSeuratObject(counts = bm280k.data, min.cells = 100, min.features = 500) +bm280k.list <- SplitObject(bm280k, split.by = "orig.ident") +bm280k.list <- lapply(X = bm280k.list, FUN = function(x) { + x <- NormalizeData(x, verbose = FALSE) + x <- FindVariableFeatures(x, verbose = FALSE) +}) +``` + +Next, select features for downstream integration, and run PCA on each object in the list, which is required for running the alternative reciprocal PCA workflow. + +```{r hca.full.2} +features <- SelectIntegrationFeatures(object.list = bm280k.list) +bm280k.list <- lapply(X = bm280k.list, FUN = function(x) { + x <- ScaleData(x, features = features, verbose = FALSE) + x <- RunPCA(x, features = features, verbose = FALSE) +}) +``` + +Since this dataset contains both men and women, we will chose one male and one female (BM1 and BM2) to use in a reference-based workflow. We determined donor sex by examining the expression of the XIST gene. + +```{r integration.hca.full} +anchors <- FindIntegrationAnchors(object.list = bm280k.list, reference = c(1, 2), reduction = "rpca", dims = 1:50) +bm280k.integrated <- IntegrateData(anchorset = anchors, dims = 1:50) +``` + +```{r downstream.hca.full} +bm280k.integrated <- ScaleData(bm280k.integrated, verbose = FALSE) +bm280k.integrated <- RunPCA(bm280k.integrated, verbose = FALSE) +bm280k.integrated <- RunUMAP(bm280k.integrated, dims = 1:50) +``` + +```{r viz.hca.full, fig.height = 9, fig.width = 16} +DimPlot(bm280k.integrated, group.by = "orig.ident") +``` + +```{r save.img, include=TRUE} +library(ggplot2) +plot <- DimPlot(bm280k.integrated, group.by = "orig.ident") + xlab("UMAP 1") + ylab("UMAP 2") + + theme(axis.title = element_text(size = 18), legend.text = element_text(size = 18)) + + guides(colour = guide_legend(override.aes = list(size = 10))) +ggsave(filename = "../output/images/bm280k_integrated.jpg", height = 7, width = 12, plot = plot, quality = 50) +``` + +```{r save.times, include=TRUE} +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/integration_large_datasets.csv") +``` + +
    + **Session Info** +```{r} +sessionInfo() +``` +
    diff --git a/vignettes/seurat5_integration_mapping.Rmd b/vignettes/seurat5_integration_mapping.Rmd new file mode 100644 index 000000000..ee52e8d36 --- /dev/null +++ b/vignettes/seurat5_integration_mapping.Rmd @@ -0,0 +1,214 @@ +--- +title: "Mapping and annotating query datasets" +output: + html_document: + theme: united + df_print: kable + pdf_document: default +date: 'Compiled: `r Sys.Date()`' +--- +*** +```{r setup, include=TRUE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + tidy.opts = list(width.cutoff = 95), + warning = FALSE, + error = TRUE, + message = FALSE, + fig.width = 8, + time_it = TRUE +) +``` + +# Introduction to single-cell reference mapping + +In this vignette, we first build an integrated reference and then demonstrate how to leverage this reference to annotate new query datasets. Generating an integrated reference follows the same workflow described in more detail in the integration introduction [vignette](integration_introduction.html). Once generated, this reference can be used to analyze additional query datasets through tasks like cell type label transfer and projecting query cells onto reference UMAPs. Notably, this does not require correction of the underlying raw query data and can therefore be an efficient strategy if a high quality reference is available. + +# Dataset preprocessing + +For the purposes of this example, we've chosen human pancreatic islet cell datasets produced across four technologies, CelSeq (GSE81076) CelSeq2 (GSE85241), Fluidigm C1 (GSE86469), and SMART-Seq2 (E-MTAB-5061). For convenience, we distribute this dataset through our [SeuratData](https://github.com/satijalab/seurat-data) package. The metadata contains the technology (`tech` column) and cell type annotations (`celltype` column) for each cell in the four datasets. + +```{r libraries, message=FALSE, warning=FALSE} +library(Seurat) +options(Seurat.object.assay.version = "v5") +library(SeuratData) +``` + +```{r install.data, eval=FALSE} +InstallData('panc8') +``` + +To construct a reference, we will identify 'anchors' between the individual datasets. First, we split the combined object into a list, with each dataset as an element (this is only necessary because the data was bundled together for easy distribution). + +```{r preprocessing1} +data('panc8') +panc8 <- UpdateSeuratObject(panc8) +panc8[["RNA"]] <- CreateAssay5Object(panc8[["RNA"]]@counts) +pancreas.list <- SplitObject(panc8, split.by = "tech") +pancreas.list <- pancreas.list[c("celseq", "celseq2", "fluidigmc1", "smartseq2")] +``` + +Prior to finding anchors, we perform standard preprocessing (log-normalization), and identify variable features individually for each. Note that Seurat implements an improved method for variable feature selection based on a variance stabilizing transformation (`"vst"`) + +```{r preprocessing3} +for (i in 1:length(pancreas.list)) { + pancreas.list[[i]] <- NormalizeData(pancreas.list[[i]], verbose = FALSE) + pancreas.list[[i]] <- FindVariableFeatures(pancreas.list[[i]], selection.method = "vst", + nfeatures = 2000, verbose = FALSE) +} +``` + +# Integration of 3 pancreatic islet cell datasets + +Next, we identify anchors using the `FindIntegrationAnchors()` function, which takes a list of Seurat objects as input. Here, we integrate three of the objects into a reference (we will use the fourth later in this vignette as a query dataset to demonstrate mapping). + +* We use all default parameters here for identifying anchors, including the 'dimensionality' of the dataset (30; feel free to try varying this parameter over a broad range, for example between 10 and 50). + +```{r integration.anchors, warning = FALSE, message = FALSE} +reference.list <- pancreas.list[c("celseq", "celseq2", "smartseq2")] +pancreas.anchors <- FindIntegrationAnchors(object.list = reference.list, dims = 1:30) +``` + +We then pass these anchors to the `IntegrateData()` function, which returns a Seurat object. + +* The returned object will contain a new `Assay`, which holds an integrated (or 'batch-corrected') expression matrix for all cells, enabling them to be jointly analyzed. + +```{r data.integration, warning = FALSE, message = FALSE} +pancreas.integrated <- IntegrateData(anchorset = pancreas.anchors, dims = 1:30) +``` + +After running `IntegrateData()`, the `Seurat` object will contain a new `Assay` with the integrated expression matrix. Note that the original (uncorrected values) are still stored in the object in the "RNA" assay, so you can switch back and forth. + +We can then use this new integrated matrix for downstream analysis and visualization. Here we scale the integrated data, run PCA, and visualize the results with UMAP. The integrated datasets cluster by cell type, instead of by technology. + +```{r analysis, message = FALSE, warning=FALSE, fig.width=10} +library(ggplot2) +library(cowplot) +library(patchwork) +#switch to integrated assay. The variable features of this assay are automatically +#set during IntegrateData +DefaultAssay(pancreas.integrated) <- 'integrated' +# Run the standard workflow for visualization and clustering +pancreas.integrated <- ScaleData(pancreas.integrated, verbose = FALSE) +pancreas.integrated <- RunPCA(pancreas.integrated, npcs = 30, verbose = FALSE) +pancreas.integrated <- RunUMAP(pancreas.integrated, reduction = "pca", dims = 1:30, + verbose = FALSE) +p1 <- DimPlot(pancreas.integrated, reduction = "umap", group.by = "tech") +p2 <- DimPlot(pancreas.integrated, reduction = "umap", group.by = "celltype", + label = TRUE, repel = TRUE) + NoLegend() +p1 + p2 +``` + +```{r save.img, include=TRUE} +plot <- DimPlot(pancreas.integrated, reduction = "umap", label = TRUE, label.size = 4.5) + xlab("UMAP 1") + ylab("UMAP 2") + + theme(axis.title = element_text(size = 18), legend.text = element_text(size = 18)) + + guides(colour = guide_legend(override.aes = list(size = 10))) +ggsave(filename = "pancreas_integrated_umap.jpg", height = 7, width = 12, plot = plot, quality = 50) +``` + +# Cell type classification using an integrated reference + +Seurat also supports the projection of reference data (or meta data) onto a query object. While many of the methods are conserved (both procedures begin by identifying anchors), there are two important distinctions between data transfer and integration: + +1. In data transfer, Seurat does not correct or modify the query expression data. +2. In data transfer, Seurat has an option (set by default) to project the PCA structure of a reference onto the query, instead of learning a joint structure with CCA. We generally suggest using this option when projecting data between scRNA-seq datasets. + +After finding anchors, we use the `TransferData()` function to classify the query cells based on reference data (a vector of reference cell type labels). `TransferData()` returns a matrix with predicted IDs and prediction scores, which we can add to the query metadata. + +```{r label.transfer, warning = FALSE, message = FALSE} +pancreas.query <- pancreas.list[["fluidigmc1"]] +pancreas.anchors <- FindTransferAnchors(reference = pancreas.integrated, query = pancreas.query, dims = 1:30, reference.reduction = "pca") +predictions <- TransferData(anchorset = pancreas.anchors, refdata = pancreas.integrated$celltype, dims = 1:30) +pancreas.query <- AddMetaData(pancreas.query, metadata = predictions) +``` + +Because we have the original label annotations from our full integrated analysis, we can evaluate how well our predicted cell type annotations match the full reference. In this example, we find that there is a high agreement in cell type classification, with over 96% of cells being labeled correctly. + +```{r analysis2} +pancreas.query$prediction.match <- pancreas.query$predicted.id == pancreas.query$celltype +table(pancreas.query$prediction.match) +``` + +To verify this further, we can examine some canonical cell type markers for specific pancreatic islet cell populations. Note that even though some of these cell types are only represented by one or two cells (e.g. epsilon cells), we are still able to classify them correctly. + +```{r vlnplots, fig.height=8} +table(pancreas.query$predicted.id) +VlnPlot(pancreas.query, c("REG1A", "PPY", "SST", "GHRL", "VWF", "SOX10"), group.by = "predicted.id") +``` + +# Unimodal UMAP Projection + +In Seurat v4, we also enable projection of a query onto the reference UMAP structure. This can be achieved by computing the reference UMAP model and then calling `MapQuery()` instead of `TransferData()`. + +```{r label.transfer.v4, warning = FALSE, message = FALSE} +pancreas.integrated <- RunUMAP(pancreas.integrated, dims = 1:30, reduction = "pca", return.model = TRUE) +pancreas.query <- MapQuery( + anchorset = pancreas.anchors, + reference = pancreas.integrated, + query = pancreas.query, + refdata = list(celltype = 'celltype'), + reference.reduction = 'pca', + reduction.model = 'umap' +) +``` + +
    + **What is `MapQuery` doing?** + +`MapQuery()` is a wrapper around three functions: `TransferData()`, `IntegrateEmbeddings()`, and `ProjectUMAP()`. `TransferData()` is used to transfer cell type labels and impute the ADT values; `IntegrateEmbeddings()` is used to integrate reference with query by correcting the query's projected low-dimensional embeddings; and finally `ProjectUMAP()` is used to project the query data onto the UMAP structure of the reference. The equivalent code for doing this with the intermediate functions is below: + +```{r, eval=FALSE} +pancreas.query <- TransferData( + anchorset = pancreas.anchors, + reference = pancreas.integrated, + query = pancreas.query, + refdata = list(celltype = "celltype") +) +pancreas.query <- IntegrateEmbeddings( + anchorset = pancreas.anchors, + reference = pancreas.integrated, + query = pancreas.query, + new.reduction.name = "ref.pca" +) +pancreas.query <- ProjectUMAP( + query = pancreas.query, + query.reduction = "ref.pca", + reference = pancreas.integrated, + reference.reduction = "pca", + reduction.model = "umap" +) +``` +
    + +We can now visualize the query cells alongside our reference. + +```{r panc.refdimplots, fig.width=10} +p1 <- DimPlot(pancreas.integrated, reduction = "umap", group.by = "celltype", label = TRUE, + label.size = 3 ,repel = TRUE) + NoLegend() + ggtitle("Reference annotations") +p2 <- DimPlot(pancreas.query, reduction = "ref.umap", group.by = "predicted.celltype", label = TRUE, + label.size = 3 ,repel = TRUE) + NoLegend() + ggtitle("Query transferred labels") +p1 + p2 +``` + +```{r save.times, include=TRUE} +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/integration_reference_mapping.csv") +``` + +
    + **Session Info** +```{r} +sessionInfo() +``` +
    diff --git a/vignettes/seurat5_integration_rpca.Rmd b/vignettes/seurat5_integration_rpca.Rmd new file mode 100644 index 000000000..9505be734 --- /dev/null +++ b/vignettes/seurat5_integration_rpca.Rmd @@ -0,0 +1,188 @@ +--- +title: 'Fast integration using reciprocal PCA (RPCA)' +output: + html_document: + theme: united + pdf_document: default +date: 'Compiled: `r format(Sys.Date(), "%B %d, %Y")`' +--- + +```{r setup, include=TRUE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + tidy.opts = list(width.cutoff = 95), + fig.width = 10, + message = FALSE, + warning = FALSE, + time_it = TRUE, + error = TRUE +) +``` + +In this vignette, we present a slightly modified workflow for the integration of scRNA-seq datasets. Instead of utilizing canonical correlation analysis (‘CCA’) to identify anchors, we instead utilize reciprocal PCA (‘RPCA’). When determining anchors between any two datasets using RPCA, we project each dataset into the others PCA space and constrain the anchors by the same mutual neighborhood requirement. The commands for both workflows are largely identical, but the two methods may be applied in different context. + +By identifying shared sources of variation between datasets, CCA is well-suited for identifying anchors when cell types are conserved, but there are very substantial differences in gene expression across experiments. CCA-based integration therefore enables integrative analysis when experimental conditions or disease states introduce very strong expression shifts, or when integrating datasets across modalities and species. However, CCA-based integration may also lead to overcorrection, especially when a large proportion of cells are non-overlapping across datasets. + +RPCA-based integration runs significantly faster, and also represents a more conservative approach where cells in different biological states are less likely to 'align' after integration. We therefore,recommend RPCA during integrative analysis where: +* A substantial fraction of cells in one dataset have no matching type in the other +* Datasets originate from the same platform (i.e. multiple lanes of 10x genomics) +* There are a large number of datasets or cells to integrate (see [here](integration_large_datasets.html) for more tips on integrating large datasets) + +Below, we demonstrate the use of reciprocal PCA to align the same stimulated and resting datasets first analyzed in our [introduction to scRNA-seq integration](integration_introduction.html) vignette. While the list of commands is nearly identical, this workflow requires users to run principal components analysis (PCA) individually on each dataset prior to integration. Users should also set the 'reduction' argument to 'rpca', when running `FindIntegrationAnchors()`. + +```{r, include=TRUE} +options(SeuratData.repo.use = "http://satijalab04.nygenome.org") +``` + +```{r installdata} +library(Seurat) +library(SeuratData) +# install dataset +InstallData('ifnb') +``` + +```{r init, results='hide', message=FALSE, fig.keep='none'} +# load dataset +LoadData('ifnb') +ifnb <- UpdateSeuratObject(ifnb) + +# split the dataset into a list of two seurat objects (stim and CTRL) +ifnb.list <- SplitObject(ifnb, split.by = "stim") + +# normalize and identify variable features for each dataset independently +ifnb.list <- lapply(X = ifnb.list, FUN = function(x) { + x <- NormalizeData(x) + x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000) +}) + +# select features that are repeatedly variable across datasets for integration +# run PCA on each dataset using these features +features <- SelectIntegrationFeatures(object.list = ifnb.list) +ifnb.list <- lapply(X = ifnb.list, FUN = function(x) { + x <- ScaleData(x, features = features, verbose = FALSE) + x <- RunPCA(x, features = features, verbose = FALSE) +}) +``` + +# Perform integration + +We then identify anchors using the `FindIntegrationAnchors()` function, which takes a list of Seurat objects as input, and use these anchors to integrate the two datasets together with `IntegrateData()`. + +```{r find.anchors} +immune.anchors <- FindIntegrationAnchors(object.list = ifnb.list, anchor.features = features,reduction = 'rpca') +``` + +```{r integrate.data} +# this command creates an 'integrated' data assay +immune.combined <- IntegrateData(anchorset = immune.anchors) +``` + +Now we can run a single integrated analysis on all cells! + +```{r clustering, results='hide', message=FALSE} +# specify that we will perform downstream analysis on the corrected data +# note that the original unmodified data still resides in the 'RNA' assay +DefaultAssay(immune.combined) <- "integrated" + +# Run the standard workflow for visualization and clustering +immune.combined <- ScaleData(immune.combined, verbose = FALSE) +immune.combined <- RunPCA(immune.combined, npcs = 30, verbose = FALSE) +immune.combined <- RunUMAP(immune.combined, reduction = "pca", dims = 1:30) +immune.combined <- FindNeighbors(immune.combined, reduction = "pca", dims = 1:30) +immune.combined <- FindClusters(immune.combined, resolution = 0.5) +``` + +```{r viz, results='hide', message=FALSE} +# Visualization +p1 <- DimPlot(immune.combined, reduction = "umap", group.by = "stim") +p2 <- DimPlot(immune.combined, reduction = "umap", group.by = 'seurat_annotations',label = TRUE, repel = TRUE) +p1 + p2 +``` + +# Modifying the strength of integration + +The results show that rpca-based integration is more conservative, and in this case, do not perfectly align a subset of cells (which are naive and memory T cells) across experiments. You can increase the strength of alignment by increasing the `k.anchor` parameter, which is set to 5 by default. Increasing this parameter to 20 will assist in aligning these populations. + +```{r split.dim} +immune.anchors <- FindIntegrationAnchors(object.list = ifnb.list, anchor.features = features,reduction = 'rpca', k.anchor = 20) +immune.combined <- IntegrateData(anchorset = immune.anchors) + +immune.combined <- ScaleData(immune.combined, verbose = FALSE) +immune.combined <- RunPCA(immune.combined, npcs = 30, verbose = FALSE) +immune.combined <- RunUMAP(immune.combined, reduction = "pca", dims = 1:30) +immune.combined <- FindNeighbors(immune.combined, reduction = "pca", dims = 1:30) +immune.combined <- FindClusters(immune.combined, resolution = 0.5) +``` + +```{r viz2, results='hide', message=FALSE} +# Visualization +p1 <- DimPlot(immune.combined, reduction = "umap", group.by = "stim") +p2 <- DimPlot(immune.combined, reduction = "umap", label = TRUE, repel = TRUE) +p1 + p2 +``` + +```{r save.img, include=TRUE} +library(ggplot2) +plot <- DimPlot(immune.combined, group.by = "stim") + + xlab("UMAP 1") + ylab("UMAP 2") + + theme(axis.title = element_text(size = 18), legend.text = element_text(size = 18)) + + guides(colour = guide_legend(override.aes = list(size = 10))) +ggsave(filename = "../output/images/rpca_integration.jpg", height = 7, width = 12, plot = plot, quality = 50) +``` + +Now that the datasets have been integrated, you can follow the previous steps in the [introduction to scRNA-seq integration vignette](integration_introduction.html) to identify cell types and cell type-specific responses. + +# Performing integration on datasets normalized with SCTransform + +As an additional example, we repeat the analyses performed above, but normalize the datasets using [SCTransform](sctransform_vignette.html). We may choose to set the `method` parameter to `glmGamPoi` (install [here](https://bioconductor.org/packages/release/bioc/html/glmGamPoi.html)) in order to enable faster estimation of regression parameters in `SCTransform()`. + +```{r panc8.cca.sct.init, results='hide', message=FALSE, fig.keep='none'} +LoadData('ifnb') +ifnb <- UpdateSeuratObject(ifnb) +ifnb.list <- SplitObject(ifnb, split.by = "stim") +ifnb.list <- lapply(X = ifnb.list, FUN = SCTransform, method = "glmGamPoi") +features <- SelectIntegrationFeatures(object.list = ifnb.list, nfeatures = 3000) +ifnb.list <- PrepSCTIntegration(object.list = ifnb.list, anchor.features = features) +ifnb.list <- lapply(X = ifnb.list, FUN = RunPCA, features = features) +``` + +```{r ifnb.cca.sct.anchors} +immune.anchors <- FindIntegrationAnchors(object.list = ifnb.list, normalization.method = 'SCT', anchor.features = features, dims = 1:30, reduction = 'rpca', k.anchor = 20) +immune.combined.sct <- IntegrateData(anchorset = immune.anchors, normalization.method = 'SCT', dims = 1:30) +``` + +```{r ifnb.cca.sct.clustering, results='hide', message=FALSE} +immune.combined.sct <- RunPCA(immune.combined.sct, verbose = FALSE) +immune.combined.sct <- RunUMAP(immune.combined.sct, reduction = "pca", dims = 1:30) +``` + +```{r immunesca.cca.sct.split.dims} +# Visualization +p1 <- DimPlot(immune.combined.sct, reduction = "umap", group.by = "stim") +p2 <- DimPlot(immune.combined.sct, reduction = "umap", group.by = 'seurat_annotations',label = TRUE, repel = TRUE) +p1 + p2 +``` + +```{r save.times, include=TRUE} +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/integration_rpca.csv") +``` + +
    + **Session Info** +```{r} +sessionInfo() +``` +
    + diff --git a/vignettes/seurat5_interaction_vignette.Rmd b/vignettes/seurat5_interaction_vignette.Rmd new file mode 100644 index 000000000..50770bd9d --- /dev/null +++ b/vignettes/seurat5_interaction_vignette.Rmd @@ -0,0 +1,150 @@ +--- +title: "Seurat - Interaction Tips" +output: + html_document: + theme: united + df_print: kable +date: 'Compiled: `r format(Sys.Date(), "%B %d, %Y")`' +--- + +```{r setup, include=TRUE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + tidy.opts = list(width.cutoff = 95), + message = FALSE, + warning = FALSE, + time_it = TRUE, + error = TRUE +) +``` + +```{r, include=TRUE} +options(SeuratData.repo.use = "http://satijalab04.nygenome.org") +``` + +# Load in the data + +This vignette demonstrates some useful features for interacting with the Seurat object. For demonstration purposes, we will be using the 2,700 PBMC object that is created in the first guided tutorial. You can load the data from our [SeuratData](https://github.com/satijalab/seurat-data) package. To simulate the scenario where we have two replicates, we will randomly assign half the cells in each cluster to be from "rep1" and other half from "rep2". + +```{r load_data} +library(Seurat) +library(SeuratData) +InstallData("pbmc3k") +pbmc <- LoadData("pbmc3k", type = "pbmc3k.final") +pbmc <- UpdateSeuratObject(pbmc) + +# pretend that cells were originally assigned to one of two replicates (we assign randomly here) +# if your cells do belong to multiple replicates, and you want to add this info to the Seurat object +# create a data frame with this information (similar to replicate.info below) +set.seed(42) +pbmc$replicate <- sample(c('rep1', 'rep2'), size = ncol(pbmc), replace = TRUE) +``` + +# Switch identity class between cluster ID and replicate + +```{r swap.idents} +# Plot UMAP, coloring cells by cell type (currently stored in object@ident) +DimPlot(pbmc, reduction = 'umap') +# How do I create a UMAP plot where cells are colored by replicate? +# First, store the current identities in a new column of meta.data called CellType +pbmc$CellType <- Idents(pbmc) +# Next, switch the identity class of all cells to reflect replicate ID +Idents(pbmc) <- 'replicate' +DimPlot(pbmc, reduction = 'umap') +# alternately : DimPlot(pbmc, reduction = 'umap', group.by = "replicate") +# you can pass the shape.by to label points by both replicate and cell type + +# Switch back to cell type labels +Idents(pbmc) <- 'CellType' +``` + +# Tabulate cells by cluster ID, replicate, or both + +```{r counting} +# How many cells are in each cluster +table(Idents(pbmc)) + +# How many cells are in each replicate? +table(pbmc$replicate) + +# What proportion of cells are in each cluster? +prop.table(table(Idents(pbmc))) + +# How does cluster membership vary by replicate? +table(Idents(pbmc), pbmc$replicate) +prop.table(table(Idents(pbmc), pbmc$replicate), margin = 2) +``` + +# Selecting particular cells and subsetting the Seurat object + +```{r subset} +# What are the cell names of all NK cells? +WhichCells(pbmc, idents = "NK") + +# How can I extract expression matrix for all NK cells (perhaps, to load into another package) +nk.raw.data <- as.matrix(GetAssayData(pbmc, slot = 'counts')[, WhichCells(pbmc, ident = "NK")]) + +# Can I create a Seurat object based on expression of a feature or value in object metadata? +subset(pbmc, subset = MS4A1 > 1) +subset(pbmc, subset = replicate == 'rep2') + +# Can I create a Seurat object of just the NK cells and B cells? +subset(pbmc, idents = c('NK', 'B')) + +# Can I create a Seurat object of all cells except the NK cells and B cells? +subset(pbmc, idents = c('NK', 'B'), invert = TRUE) + +# note that if you wish to perform additional rounds of clustering after subsetting +# we recommend re-running FindVariableFeatures() and ScaleData() +``` + +# Calculating the average gene expression within a cluster + +```{r avg.exp, fig.height=8} +# How can I calculate the average expression of all cells within a cluster? +cluster.averages <- AverageExpression(pbmc) +head(cluster.averages[['RNA']][, 1:5]) + +# Return this information as a Seurat object (enables downstream plotting and analysis) +# First, replace spaces with underscores '_' so ggplot2 doesn't fail +orig.levels <- levels(pbmc) +Idents(pbmc) <- gsub(pattern = ' ', replacement = '_', x = Idents(pbmc)) +orig.levels <- gsub(pattern = ' ', replacement = '_', x = orig.levels) +levels(pbmc) <- orig.levels +cluster.averages <- AverageExpression(pbmc, return.seurat = TRUE) +cluster.averages + +# How can I plot the average expression of NK cells vs. CD8 T cells? +# Pass do.hover = T for an interactive plot to identify gene outliers +CellScatter(cluster.averages, cell1 = "NK", cell2 = "CD8_T") + +# How can I calculate expression averages separately for each replicate? +cluster.averages <- AverageExpression(pbmc, return.seurat = TRUE, add.ident = "replicate") +CellScatter(cluster.averages, cell1 = "CD8_T_rep1", cell2 = "CD8_T_rep2") + +# You can also plot heatmaps of these 'in silico' bulk datasets to visualize agreement between replicates +DoHeatmap(cluster.averages, features = unlist(TopFeatures(pbmc[['pca']], balanced = TRUE)), size = 3, draw.lines = FALSE) +``` + +```{r save.times, include=TRUE} +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/interaction_vignette_times.csv") +``` + +
    + **Session Info** +```{r} +sessionInfo() +``` +
    diff --git a/vignettes/seurat5_merge_vignette.Rmd b/vignettes/seurat5_merge_vignette.Rmd new file mode 100644 index 000000000..f5a6f585d --- /dev/null +++ b/vignettes/seurat5_merge_vignette.Rmd @@ -0,0 +1,110 @@ +--- +title: "Seurat - Combining Two 10X Runs" +output: + html_document: + theme: united + df_print: kable + pdf_document: default +date: 'Compiled: `r Sys.Date()`' +--- + +```{r setup, include=TRUE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + tidy.opts = list(width.cutoff = 95), + message = FALSE, + warning = FALSE, + time_it = TRUE, + error = TRUE +) +``` + +```{r, include=TRUE} +options(SeuratData.repo.use = "http://satijalab04.nygenome.org") +``` + +In this vignette, we will combine two 10X PBMC datasets: one containing 4K cells and one containing 8K cells. The datasets can be found [here](https://support.10xgenomics.com/single-cell-gene-expression/datasets). + +To start, we read in the data and create two `Seurat` objects. + +```{r load_data} +library(Seurat) +pbmc4k.data <- Read10X(data.dir = "../data/pbmc4k/filtered_gene_bc_matrices/GRCh38/") +pbmc4k <- CreateSeuratObject(counts = pbmc4k.data, project = "PBMC4K") +pbmc4k + +pbmc8k.data <- Read10X(data.dir = "../data/pbmc8k/filtered_gene_bc_matrices/GRCh38/") +pbmc8k <- CreateSeuratObject(counts = pbmc8k.data, project = "PBMC8K") +pbmc8k +``` + + +# Merging Two `Seurat` Objects + +`merge()` merges the raw count matrices of two `Seurat` objects and creates a new `Seurat` object with the resulting combined raw count matrix. To easily tell which original object any particular cell came from, you can set the `add.cell.ids` parameter with an `c(x, y)` vector, which will prepend the given identifier to the beginning of each cell name. The original project ID will remain stored in object meta data under `orig.ident` + +```{r merge.objects} +pbmc.combined <- merge(pbmc4k, y = pbmc8k, add.cell.ids = c('4K', '8K'), project = 'PBMC12K') +pbmc.combined +``` + +```{r inspect.merge} +# notice the cell names now have an added identifier +head(colnames(pbmc.combined)) +table(pbmc.combined$orig.ident) +``` + +# Merging More Than Two `Seurat` Objects + +To merge more than two `Seurat` objects, simply pass a vector of multiple `Seurat` objects to the `y` parameter for `merge`; we'll demonstrate this using the 4K and 8K PBMC datasets as well as our previously computed Seurat object from the 2,700 PBMC tutorial (loaded via the [SeuratData](https://github.com/satijalab/seurat-data) package). + +```{r merge_three} +library(SeuratData) +InstallData("pbmc3k") +pbmc3k <- LoadData("pbmc3k", type = "pbmc3k.final") +pbmc3k <- UpdateSeuratObject(pbmc3k) +pbmc3k + +pbmc.big <- merge(pbmc3k, y = c(pbmc4k, pbmc8k), add.cell.ids = c('3K', '4K', '8K'), project = 'PBMC15K') +pbmc.big + +head(colnames(pbmc.big)) +tail(colnames(pbmc.big)) +unique(sapply(X = strsplit(colnames(pbmc.big), split = '_'), FUN = '[', 1)) +table(pbmc.big$orig.ident) +``` + +# Merge Based on Normalized Data + +By default, `merge()` will combine the `Seurat` objects based on the raw count matrices, erasing any previously normalized and scaled data matrices. If you want to merge the normalized data matrices as well as the raw count matrices, simply pass `merge.data = TRUE`. This should be done if the same normalization approach was applied to all objects. + +```{r normalize} +pbmc4k <- NormalizeData(pbmc4k) +pbmc8k <- NormalizeData(pbmc8k) +pbmc.normalized <- merge(pbmc4k, y = pbmc8k, add.cell.ids = c('4K', '8K'), project = 'PBMC12K', merge.data = TRUE) +GetAssayData(pbmc.combined)[1:10, 1:15] +GetAssayData(pbmc.normalized)[1:10, 1:15] +``` + +```{r save.times, include=TRUE} +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/merge_vignette_times.csv") +``` + +
    + **Session Info** +```{r} +sessionInfo() +``` +
    diff --git a/vignettes/seurat5_mixscape_vignette.Rmd b/vignettes/seurat5_mixscape_vignette.Rmd new file mode 100644 index 000000000..e59d80e83 --- /dev/null +++ b/vignettes/seurat5_mixscape_vignette.Rmd @@ -0,0 +1,372 @@ +--- +title: "Mixscape Vignette" +output: + html_document: + theme: united + df_print: kable +date: 'Compiled: `r format(Sys.Date(), "%B %d, %Y")`' +--- + +```{r setup, include=TRUE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now) + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + message = FALSE, + warning = FALSE, + time_it = TRUE, + error = TRUE +) +options(SeuratData.repo.use = 'satijalab04.nygenome.org') +``` + +# Overview + +This tutorial demonstrates how to use mixscape for the analyses of single-cell pooled CRSIPR screens. We introduce new Seurat functions for: + +1. Calculating the perturbation-specific signature of every cell. +2. Identifying and removing cells that have 'escaped' CRISPR perturbation. +3. Visualizing similarities/differences across different perturbations. + +# Loading required packages + +```{r pkgs1} +# Load packages. +library(Seurat) +options(Seurat.object.assay.version = "v5") +library(SeuratData) +library(ggplot2) +library(patchwork) +library(scales) +library(dplyr) +library(reshape2) + +# Download dataset using SeuratData. +InstallData(ds = "thp1.eccite") + +# Setup custom theme for plotting. +custom_theme <- theme( + plot.title = element_text(size=16, hjust = 0.5), + legend.key.size = unit(0.7, "cm"), + legend.text = element_text(size = 14)) +``` + +# Loading Seurat object containing ECCITE-seq dataset + +We use a 111 gRNA ECCITE-seq dataset generated from stimulated THP-1 cells that was recently published from our lab in bioRxiv [Papalexi et al. 2020](https://www.biorxiv.org/content/10.1101/2020.06.28.175596v1). This dataset can be easily downloaded from the [SeuratData](https://github.com/satijalab/seurat-data) package. + +```{r eccite.load} +# Load object. +eccite <- LoadData(ds = "thp1.eccite") +eccite <- UpdateSeuratObject(eccite) +eccite[["RNA"]] <- CreateAssay5Object(eccite[["RNA"]]@counts) +eccite[["ADT"]] <- CreateAssay5Object(eccite[["ADT"]]@counts) +eccite[["HTO"]] <- CreateAssay5Object(eccite[["HTO"]]@counts) +eccite[["GDO"]] <- CreateAssay5Object(eccite[["GDO"]]@counts) + +# Normalize protein. +eccite <- NormalizeData( + object = eccite, + assay = "ADT", + normalization.method = "CLR", + margin = 2) +``` + +# RNA-based clustering is driven by confounding sources of variation + +Here, we follow the standard Seurat workflow to cluster cells based on their gene expression profiles. We expected to obtain perturbation-specific clusters however we saw that clustering is primarily driven by cell cycle phase and replicate ID. We only observed one perturbation-specific cluster containing cells expression IFNgamma pathway gRNAs. + +```{r eccite.pp, fig.height = 10, fig.width = 15} +# Prepare RNA assay for dimensionality reduction: +# Normalize data, find variable features and scale data. +DefaultAssay(object = eccite) <- 'RNA' +eccite <- NormalizeData(object = eccite) %>% FindVariableFeatures() %>% ScaleData() + +# Run Principle Component Analysis (PCA) to reduce the dimensionality of the data. +eccite <- RunPCA(object = eccite) + +# Run Uniform Manifold Approximation and Projection (UMAP) to visualize clustering in 2-D. +eccite <- RunUMAP(object = eccite, dims = 1:40) + +# Generate plots to check if clustering is driven by biological replicate ID, +# cell cycle phase or target gene class. +p1 <- DimPlot( + object = eccite, + group.by = 'replicate', + label = F, + pt.size = 0.2, + reduction = "umap", cols = "Dark2", repel = T) + + scale_color_brewer(palette = "Dark2") + + ggtitle("Biological Replicate") + + xlab("UMAP 1") + + ylab("UMAP 2") + +p2 <- DimPlot( + object = eccite, + group.by = 'Phase', + label = F, pt.size = 0.2, + reduction = "umap", repel = T) + + ggtitle("Cell Cycle Phase") + + ylab("UMAP 2") + + xlab("UMAP 1") + +p3 <- DimPlot( + object = eccite, + group.by = 'crispr', + pt.size = 0.2, + reduction = "umap", + split.by = "crispr", + ncol = 1, + cols = c("grey39","goldenrod3")) + + ggtitle("Perturbation Status") + + ylab("UMAP 2") + + xlab("UMAP 1") + +# Visualize plots. +((p1 / p2 + plot_layout(guides = 'auto')) | p3 ) +``` + +# Calculating local perturbation signatures mitigates confounding effects + +To calculate local perturbation signatures we set the number of non-targeting Nearest Neighbors (NNs) equal to k=20 and we recommend that the user picks a k from the following range: 20 < k < 30. Intuitively, the user does not want to set k to a very small or large number as this will most likely not remove the technical variation from the dataset. Using the PRTB signature to cluster cells removes all technical variation and reveals one additional perturbation-specific cluster. + +```{r eccite.cps, fig.height = 10, fig.width = 15} +# Calculate perturbation signature (PRTB). +eccite<- CalcPerturbSig( + object = eccite, + assay = "RNA", + slot = "data", + gd.class ="gene", + nt.cell.class = "NT", + reduction = "pca", + ndims = 40, + num.neighbors = 20, + split.by = "replicate", + new.assay.name = "PRTB") + +# Prepare PRTB assay for dimensionality reduction: +# Normalize data, find variable features and center data. +DefaultAssay(object = eccite) <- 'PRTB' + +# Use variable features from RNA assay. +VariableFeatures(object = eccite) <- VariableFeatures(object = eccite[["RNA"]]) +eccite <- ScaleData(object = eccite, do.scale = F, do.center = T) + +# Run PCA to reduce the dimensionality of the data. +eccite <- RunPCA(object = eccite, reduction.key = 'prtbpca', reduction.name = 'prtbpca') + +# Run UMAP to visualize clustering in 2-D. +eccite <- RunUMAP( + object = eccite, + dims = 1:40, + reduction = 'prtbpca', + reduction.key = 'prtbumap', + reduction.name = 'prtbumap') + +# Generate plots to check if clustering is driven by biological replicate ID, +# cell cycle phase or target gene class. +q1 <- DimPlot( + object = eccite, + group.by = 'replicate', + reduction = 'prtbumap', + pt.size = 0.2, cols = "Dark2", label = F, repel = T) + + scale_color_brewer(palette = "Dark2") + + ggtitle("Biological Replicate") + + ylab("UMAP 2") + + xlab("UMAP 1") + +q2 <- DimPlot( + object = eccite, + group.by = 'Phase', + reduction = 'prtbumap', + pt.size = 0.2, label = F, repel = T) + + ggtitle("Cell Cycle Phase") + + ylab("UMAP 2") + + xlab("UMAP 1") + +q3 <- DimPlot( + object = eccite, + group.by = 'crispr', + reduction = 'prtbumap', + split.by = "crispr", + ncol = 1, + pt.size = 0.2, + cols = c("grey39","goldenrod3")) + + ggtitle("Perturbation Status") + + ylab("UMAP 2") + + xlab("UMAP 1") + +# Visualize plots. +(q1 / q2 + plot_layout(guides = 'auto') | q3) +``` + +# Mixscape identifies cells with no detectable perturbation + +Here, we are assuming each target gene class is a mixture of two Gaussian distributions one representing the knockout (KO) and the other the non-perturbed (NP) cells. We further assume that the distribution of the NP cells is identical to that of cells expressing non-targeting gRNAs (NT) and we try to estimate the distribution of KO cells using the function `normalmixEM()` from the mixtools package. Next, we calculate the posterior probability that a cell belongs to the KO distribution and classify cells with a probability higher than 0.5 as KOs. Applying this method we identify KOs in 11 target gene classes and detect variation in gRNA targeting efficiency within each class. + +```{r eccite.mixscape, fig.height = 20, fig.width = 20, results="hide"} +# Run mixscape. +eccite <- RunMixscape( + object = eccite, + assay = "PRTB", + slot = "scale.data", + labels = "gene", + nt.class.name = "NT", + min.de.genes = 5, + iter.num = 10, + de.assay = "RNA", + verbose = F, + prtb.type = "KO") + +# Calculate percentage of KO cells for all target gene classes. +df <- prop.table(table(eccite$mixscape_class.global, eccite$NT),2) + +df2 <- reshape2::melt(df) +df2$Var2 <- as.character(df2$Var2) +test <- df2[which(df2$Var1 == "KO"),] +test <- test[order(test$value, decreasing = T),] +new.levels <- test$Var2 +df2$Var2 <- factor(df2$Var2, levels = new.levels ) +df2$Var1 <- factor(df2$Var1, levels = c("NT", "NP", "KO")) +df2$gene <- sapply(as.character(df2$Var2), function(x) strsplit(x, split = "g")[[1]][1]) +df2$guide_number <- sapply(as.character(df2$Var2), + function(x) strsplit(x, split = "g")[[1]][2]) +df3 <- df2[-c(which(df2$gene == "NT")),] + +p1 <- ggplot(df3, aes(x = guide_number, y = value*100, fill= Var1)) + + geom_bar(stat= "identity") + + theme_classic()+ + scale_fill_manual(values = c("grey49", "grey79","coral1")) + + ylab("% of cells") + + xlab("sgRNA") + +p1 + theme(axis.text.x = element_text(size = 18, hjust = 1), + axis.text.y = element_text(size = 18), + axis.title = element_text(size = 16), + strip.text = element_text(size=16, face = "bold")) + + facet_wrap(vars(gene),ncol = 5, scales = "free") + + labs(fill = "mixscape class") +theme(legend.title = element_text(size = 14), + legend.text = element_text(size = 12)) +``` + +# Inspecting mixscape results + +To ensure mixscape is assigning the correct perturbation status to cells we can use the functions below to look at the perturbation score distributions and the posterior probabilities of cells within a target gene class (for example IFNGR2) and compare it to those of the NT cells. In addition, we can perform differential expression (DE) analyses and show that only IFNGR2 KO cells have reduced expression of the IFNG-pathway genes. Finally, as an independent check, we can look at the PD-L1 protein expression values in NP and KO cells for target genes known to be PD-L1 regulators. + +```{r eccite.plots, fig.height = 10, fig.width = 15, results="hide"} +# Explore the perturbation scores of cells. +PlotPerturbScore(object = eccite, + target.gene.ident = "IFNGR2", + mixscape.class = "mixscape_class", + col = "coral2") +labs(fill = "mixscape class") + +# Inspect the posterior probability values in NP and KO cells. +VlnPlot(eccite, "mixscape_class_p_ko", idents = c("NT", "IFNGR2 KO", "IFNGR2 NP")) + + theme(axis.text.x = element_text(angle = 0, hjust = 0.5),axis.text = element_text(size = 16) ,plot.title = element_text(size = 20)) + + NoLegend() + + ggtitle("mixscape posterior probabilities") + +# Run DE analysis and visualize results on a heatmap ordering cells by their posterior +# probability values. +Idents(object = eccite) <- "gene" +MixscapeHeatmap(object = eccite, + ident.1 = "NT", + ident.2 = "IFNGR2", + balanced = F, + assay = "RNA", + max.genes = 20, angle = 0, + group.by = "mixscape_class", + max.cells.group = 300, + size=6.5) + NoLegend() +theme(axis.text.y = element_text(size = 16)) + +# Show that only IFNG pathway KO cells have a reduction in PD-L1 protein expression. +VlnPlot( + object = eccite, + features = "adt_PDL1", + idents = c("NT","JAK2","STAT1","IFNGR1","IFNGR2", "IRF1"), + group.by = "gene", + pt.size = 0.2, + sort = T, + split.by = "mixscape_class.global", + cols = c("coral3","grey79","grey39")) + + ggtitle("PD-L1 protein") + + theme(axis.text.x = element_text(angle = 0, hjust = 0.5), plot.title = element_text(size = 20), axis.text = element_text(size = 16)) +``` + +```{r save.img, include=TRUE} +p <- VlnPlot(object = eccite, features = "adt_PDL1", idents = c("NT","JAK2","STAT1","IFNGR1","IFNGR2", "IRF1"), group.by = "gene", pt.size = 0.2, sort = T, split.by = "mixscape_class.global", cols = c("coral3","grey79","grey39")) +ggtitle("PD-L1 protein") +theme(axis.text.x = element_text(angle = 0, hjust = 0.5)) +ggsave(filename = "../output/images/mixscape_vignette.jpg", height = 7, width = 12, plot = p, quality = 50) +``` + +# Visualizing perturbation responses with Linear Discriminant Analysis (LDA) + +We use LDA as a dimensionality reduction method to visualize perturbation-specific clusters. LDA is trying to maximize the separability of known labels (mixscape classes) using both gene expression and the labels as input. + +```{r eccite.lda, fig.height = 7, fig.width = 10, results="hide"} +# Remove non-perturbed cells and run LDA to reduce the dimensionality of the data. +Idents(eccite) <- "mixscape_class.global" +sub <- subset(eccite, idents = c("KO", "NT")) + +# Run LDA. +sub <- MixscapeLDA( + object = sub, + assay = "RNA", + pc.assay = "PRTB", + labels = "gene", + nt.label = "NT", + npcs = 10, + logfc.threshold = 0.25, + verbose = F) + +# Use LDA results to run UMAP and visualize cells on 2-D. +# Here, we note that the number of the dimensions to be used is equal to the number of +# labels minus one (to account for NT cells). +sub <- RunUMAP( + object = sub, + dims = 1:11, + reduction = 'lda', + reduction.key = 'ldaumap', + reduction.name = 'ldaumap') + +# Visualize UMAP clustering results. +Idents(sub) <- "mixscape_class" +sub$mixscape_class <- as.factor(sub$mixscape_class) + +# Set colors for each perturbation. +col = setNames(object = hue_pal()(12),nm = levels(sub$mixscape_class)) +names(col) <- c(names(col)[1:7], "NT", names(col)[9:12]) +col[8] <- "grey39" + +p <- DimPlot(object = sub, + reduction = "ldaumap", + repel = T, + label.size = 5, + label = T, + cols = col) + NoLegend() + +p2 <- p+ + scale_color_manual(values=col, drop=FALSE) + + ylab("UMAP 2") + + xlab("UMAP 1") +p2 +``` + +```{r save.times, include=TRUE} +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/mixscape_vignette_times.csv") +``` + +
    + **Session Info** +```{r} +sessionInfo() +``` +
    diff --git a/vignettes/seurat5_multimodal_reference_mapping.Rmd b/vignettes/seurat5_multimodal_reference_mapping.Rmd new file mode 100644 index 000000000..bda439eaa --- /dev/null +++ b/vignettes/seurat5_multimodal_reference_mapping.Rmd @@ -0,0 +1,394 @@ +--- +title: "Multimodal reference mapping" +output: + html_document: + theme: united + df_print: kable +date: 'Compiled: `r format(Sys.Date(), "%B %d, %Y")`' +--- + +```{r setup, include=TRUE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now) + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + message = FALSE, + warning = FALSE, + time_it = TRUE, + error = TRUE +) +``` + +# Intro: Seurat v4 Reference Mapping + +This vignette introduces the process of mapping query datasets to annotated references in Seurat. In this example, we map one of the first scRNA-seq datasets released by 10X Genomics of 2,700 PBMC to our [recently described CITE-seq reference of 162,000 PBMC measured with 228 antibodies](https://doi.org/10.1016/j.cell.2021.04.048). We chose this example to demonstrate how supervised analysis guided by a reference dataset can help to enumerate cell states that would be challenging to find with [unsupervised analysis](pbmc3k_tutorial.html). In a second example, we demonstrate how to serially map Human Cell Atlas datasets of human BMNC profiled from different individuals onto a consistent reference. + +We have [previously demonstrated](integration_mapping.html) how to use reference-mapping approach to annotate cell labels in a query dataset . In Seurat v4, we have substantially improved the speed and memory requirements for integrative tasks including reference mapping, and also include new functionality to project query cells onto a previously computed UMAP visualization. + +In this vignette, we demonstrate how to use a previously established reference to interpret an scRNA-seq query: + +* Annotate each query cell based on a set of reference-defined cell states +* Project each query cell onto a previously computed UMAP visualization +* Impute the predicted levels of surface proteins that were measured in the CITE-seq reference + +To run this vignette please install Seurat v4, available on CRAN. Additionally, you will need to install the `SeuratDisk` package. + +```{r install, eval = FALSE} +install.packages("Seurat") +remotes::install_github("mojaveazure/seurat-disk") +``` + +```{r packages, cache=FALSE} +library(Seurat) +library(SeuratDisk) +library(ggplot2) +library(patchwork) +``` + +```{r, include=TRUE, cache=FALSE} +options(SeuratData.repo.use = "http://satijalab04.nygenome.org") +``` + +# Example 1: Mapping human peripheral blood cells + +## A Multimodal PBMC Reference Dataset + +We load the reference (download [here](https://atlas.fredhutch.org/data/nygc/multimodal/pbmc_multimodal.h5seurat)) from our recent [paper](https://doi.org/10.1016/j.cell.2021.04.048), and visualize the pre-computed UMAP. This reference is stored as an h5Seurat file, a format that enables on-disk storage of multimodal Seurat objects (more details on h5Seurat and `SeuratDisk` can be found [here](https://mojaveazure.github.io/seurat-disk/index.html)). + +```{r pbmc.ref} +reference <- LoadH5Seurat("../data/pbmc_multimodal.h5seurat") +reference <- UpdateSeuratObject(reference) +``` + +```{r ref.dimplot} +DimPlot(object = reference, reduction = "wnn.umap", group.by = "celltype.l2", label = TRUE, label.size = 3, repel = TRUE) + NoLegend() +``` + +## Mapping + +To demonstrate mapping to this multimodal reference, we will use a dataset of 2,700 PBMCs generated by 10x Genomics and available via `SeuratData`. + +```{r 3k.load} +library(SeuratData) +InstallData('pbmc3k') +pbmc3k <- UpdateSeuratObject(pbmc3k) +``` + +The reference was normalized using `SCTransform()`, so we use the same approach to normalize the query here. + +```{r 3k.preprocess, results="hide"} +pbmc3k <- SCTransform(pbmc3k, verbose = FALSE) +``` + +We then find anchors between reference and query. As described in the [manuscript](https://doi.org/10.1016/j.cell.2021.04.048), we used a precomputed supervised PCA (spca) transformation for this example. We recommend the use of supervised PCA for CITE-seq datasets, and demonstrate how to compute this transformation on the next tab of this vignette. However, you can also use a standard PCA transformation. + +```{r transfer.anchors} +anchors <- FindTransferAnchors( + reference = reference, + query = pbmc3k, + normalization.method = "SCT", + reference.reduction = "spca", + dims = 1:50 +) +``` + +We then transfer cell type labels and protein data from the reference to the query. Additionally, we project the query data onto the UMAP structure of the reference. + +```{r transfer} +pbmc3k <- MapQuery( + anchorset = anchors, + query = pbmc3k, + reference = reference, + refdata = list( + celltype.l1 = "celltype.l1", + celltype.l2 = "celltype.l2", + predicted_ADT = "ADT" + ), + reference.reduction = "spca", + reduction.model = "wnn.umap" +) +``` + +
    + **What is `MapQuery` doing?** + + `MapQuery()` is a wrapper around three functions: `TransferData()`, `IntegrateEmbeddings()`, and `ProjectUMAP()`. `TransferData()` is used to transfer cell type labels and impute the ADT values. `IntegrateEmbeddings()` and `ProjectUMAP()` are used to project the query data onto the UMAP structure of the reference. The equivalent code for doing this with the intermediate functions is below: + + +```{r, eval=FALSE} +pbmc3k <- TransferData( + anchorset = anchors, + reference = reference, + query = pbmc3k, + refdata = list( + celltype.l1 = "celltype.l1", + celltype.l2 = "celltype.l2", + predicted_ADT = "ADT") +) +pbmc3k <- IntegrateEmbeddings( + anchorset = anchors, + reference = reference, + query = pbmc3k, + new.reduction.name = "ref.spca" +) +pbmc3k <- ProjectUMAP( + query = pbmc3k, + query.reduction = "ref.spca", + reference = reference, + reference.reduction = "spca", + reduction.model = "wnn.umap" +) +``` +
    + +## Explore the mapping results + +We can now visualize the 2,700 query cells. They have been projected into a UMAP visualization defined by the reference, and each has received annotations at two levels of granularity (level 1, and level 2). + +```{r 3k.refdimplots, fig.width=10} +p1 = DimPlot(pbmc3k, reduction = "ref.umap", group.by = "predicted.celltype.l1", label = TRUE, label.size = 3, repel = TRUE) + NoLegend() +p2 = DimPlot(pbmc3k, reduction = "ref.umap", group.by = "predicted.celltype.l2", label = TRUE, label.size = 3 ,repel = TRUE) + NoLegend() +p1 + p2 +``` + +The reference-mapped dataset helps us identify cell types that were previously blended in an [unsupervised analysis of the query dataset](pbmc3k_tutorial.html). Just a few examples include plasmacytoid dendritic cells (pDC), hematopoietic stem and progenitor cells (HSPC), regulatory T cells (Treg), CD8 Naive T cells, cells, CD56+ NK cells, memory, and naive B cells, and plasmablasts. + +Each prediction is assigned a score between 0 and 1. + +```{r 3k.featureplots1, fig.width = 10, fig.height =4} +FeaturePlot(pbmc3k, features = c("pDC", "CD16 Mono", "Treg"), reduction = "ref.umap", cols = c("lightgrey", "darkred"), ncol = 3) & theme(plot.title = element_text(size = 10)) +``` + +```{r save.img, include=TRUE} +library(ggplot2) +plot <- FeaturePlot(pbmc3k, features = "CD16 Mono", reduction = "ref.umap", cols = c("lightgrey", "darkred")) + ggtitle("CD16 Mono") + theme(plot.title = element_text(hjust = 0.5, size = 30)) + labs(color = "Prediction Score") + xlab("UMAP 1") + ylab("UMAP 2") + + theme(axis.title = element_text(size = 18), legend.text = element_text(size = 18), legend.title = element_text(size = 25)) +ggsave(filename = "../output/images/multimodal_reference_mapping.jpg", height = 7, width = 12, plot = plot, quality = 50) +``` + +We can verify our predictions by exploring the expression of canonical marker genes. For example, CLEC4C and LIRA4 have been [reported](https://pubmed.ncbi.nlm.nih.gov/30395816/) as markers of pDC identity, consistent with our predictions. Similarly, if we perform differential expression to identify markers of Tregs, we identify a set of [canonical markers](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4761514/) including RTKN2, CTLA4, FOXP3, and IL2RA. + +```{r 3k.VlnPlots, fig.height=6, fig.width = 10, fig.height =5} +Idents(pbmc3k) <- 'predicted.celltype.l2' +VlnPlot(pbmc3k, features = c("CLEC4C", "LILRA4"), sort = TRUE) + NoLegend() + +treg_markers <- FindMarkers(pbmc3k, ident.1 = "Treg", only.pos = TRUE, logfc.threshold = 0.1) +print(head(treg_markers)) +``` + +Finally, we can visualize the imputed levels of surface protein, which were inferred based on the CITE-seq reference. + +```{r 3k.featureplots2, fig.width=10, fig.height =4} +DefaultAssay(pbmc3k) <- 'predicted_ADT' +# see a list of proteins: rownames(pbmc3k) +FeaturePlot(pbmc3k, features = c("CD3-1", "CD45RA", "IgD"), reduction = "ref.umap", cols = c("lightgrey", "darkgreen"), ncol = 3) +``` + +## Computing a new UMAP visualiztion + +In the previous examples, we visualize the query cells after mapping to the reference-derived UMAP. Keeping a consistent visualization can assist with the interpretation of new datasets. However, if there are cell states that are present in the query dataset that are not represented in the reference, they will project to the most similar cell in the reference. This is the expected behavior and functionality as established by the UMAP package, but can potentially mask the presence of new cell types in the query which may be of interest. + +In our [manuscript](https://doi.org/10.1016/j.cell.2021.04.048), we map a query dataset containing developing and differentiated neutrophils, which are not included in our reference. We find that computing a new UMAP ('de novo visualization') after merging the reference and query can help to identify these populations, as demonstrated in Supplementary Figure 8. In the 'de novo' visualization, unique cell states in the query remain separated. In this example, the 2,700 PBMC does not contain unique cell states, but we demonstrate how to compute this visualization below. + +We emphasize that if users are attempting to map datasets where the underlying samples are not PBMC, or contain cell types that are not present in the reference, computing a 'de novo' visualization is an important step in interpreting their dataset. + +```{r hiddendiet, include=TRUE} +reference <- DietSeurat(reference, counts = FALSE, dimreducs = "spca") +pbmc3k <- DietSeurat(pbmc3k, counts = FALSE, dimreducs = "ref.spca") +``` + +```{r denovoumap} +#merge reference and query +reference$id <- 'reference' +pbmc3k$id <- 'query' +refquery <- merge(reference, pbmc3k) +refquery[["spca"]] <- merge(reference[["spca"]], pbmc3k[["ref.spca"]]) +refquery <- RunUMAP(refquery, reduction = 'spca', dims = 1:50) +DimPlot(refquery, group.by = 'id', shuffle = TRUE) +``` + +# Example 2: Mapping human bone marrow cells + +## A Multimodal BMNC Reference Dataset + +As a second example, we map a dataset of human bone marrow mononuclear (BMNC) cells from eight individual donors, produced by the Human Cell Atlas. As a reference, we use the CITE-seq reference of human BMNC that we analyzed using [weighted-nearest neighbor analysis (WNN)](weighted_nearest_neighbor_analysis.html). + +This vignette exhibits the same reference-mapping functionality as the PBMC example on the previous tab. In addition, we also demonstrate: + +* How to construct a supervised PCA (sPCA) transformation +* How to serially map multiple datasets to the same reference +* Optimization steps to further enhance to speed of mapping + +```{r bmref.seuratdata} +# Both datasets are available through SeuratData +library(SeuratData) +#load reference data +InstallData("bmcite") +bm <- LoadData(ds = "bmcite") +bm <- UpdateSeuratObject(bm) +#load query data +InstallData('hcabm40k') +hcabm40k <- LoadData(ds = "hcabm40k") +hcabm40k <- UpdateSeuratObject(hcabm40k) +``` + +The reference dataset contains a [WNN graph](weighted_nearest_neighbor_analysis.html), reflecting a weighted combination of the RNA and protein data in this CITE-seq experiment. + +We can compute a UMAP visualization based on this graph. We set `return.model = TRUE`, which will enable us to project query datasets onto this visualization. + +```{r bm.refdimplot, fig.width=8} +bm <- RunUMAP(bm, nn.name = "weighted.nn", reduction.name = "wnn.umap", + reduction.key = "wnnUMAP_", return.model = TRUE) +DimPlot(bm, group.by = "celltype.l2", reduction = "wnn.umap") +``` + +## Computing an sPCA transformation + +As described in our [manuscript](https://doi.org/10.1016/j.cell.2021.04.048), we first compute a 'supervised' PCA. This identifies the transformation of the transcriptome data that best encapsulates the structure of the WNN graph. This allows a weighted combination of the protein and RNA measurements to 'supervise' the PCA, and highlight the most relevant sources of variation. After computing this transformation, we can project it onto a query dataset. We can also compute and project a PCA projection, but recommend the use of sPCA when working with multimodal references that have been constructed with WNN analysis. + +The sPCA calculation is performed once, and then can be rapidly projected onto each query dataset. + +```{r bm.spca} +bm <- ScaleData(bm, assay = 'RNA') +bm <- RunSPCA(bm, assay = 'RNA', graph = 'wsnn') +``` + +## Computing a cached neighbor index + +Since we will be mapping multiple query samples to the same reference, we can cache particular steps that only involve the reference. This step is optional but will improve speed when mapping multiple samples. + +We compute the first 50 neighbors in the sPCA space of the reference. We store this information in the `spca.annoy.neighbors` object within the reference Seurat object and also cache the annoy index data structure (via `cache.index = TRUE`). + +```{r bm.nn, cache = FALSE} +bm <- FindNeighbors( + object = bm, + reduction = "spca", + dims = 1:50, + graph.name = "spca.annoy.neighbors", + k.param = 50, + cache.index = TRUE, + return.neighbor = TRUE, + l2.norm = TRUE +) +``` + +
    + **How can I save and load a cached annoy index?** + +If you want to save and load a cached index for a `Neighbor` object generated with `method = "annoy"` and `cache.index = TRUE`, use the `SaveAnnoyIndex()`/`LoadAnnoyIndex()` functions. Importantly, this index cannot be saved normally to an RDS or RDA file, so it will not persist correctly across R session restarts or `saveRDS`/`readRDS` for the Seurat object containing it. Instead, use `LoadAnnoyIndex()` to add the Annoy index to the `Neighbor` object every time R restarts or you load the reference Seurat object from RDS. The file created by `SaveAnnoyIndex()` can be distributed along with a reference Seurat object, and added to the `Neighbor` object in the reference. + +```{r neighbor.demo} +bm[["spca.annoy.neighbors"]] +SaveAnnoyIndex(object = bm[["spca.annoy.neighbors"]], file = "../data/reftmp.idx") +bm[["spca.annoy.neighbors"]] <- LoadAnnoyIndex(object = bm[["spca.annoy.neighbors"]], file = "../data/reftmp.idx") +``` +
    + +## Query dataset preprocessing + +Here we will demonstrate mapping multiple donor bone marrow samples to the multimodal bone marrow reference. These query datasets are derived from the Human Cell Atlas (HCA) Immune Cell Atlas Bone marrow dataset and are available through SeuratData. This dataset is provided as a single merged object with 8 donors. We first split the data back into 8 separate Seurat objects, one for each original donor to map individually. + +```{r bm40k.load} +library(dplyr) +library(SeuratData) +InstallData('hcabm40k') +hcabm40k.batches <- SplitObject(hcabm40k, split.by = "orig.ident") +``` + +We then normalize the query in the same manner as the reference. Here, the reference was normalized using log-normalization via `NormalizeData()`. If the reference had been normalized using `SCTransform()`, the query must be normalized with `SCTransform()` as well. + +```{r 40k.norm} +hcabm40k.batches <- lapply(X = hcabm40k.batches, FUN = NormalizeData, verbose = FALSE) +``` + +## Mapping + +We then find anchors between each donor query dataset and the multimodal reference. This command is optimized to minimize mapping time, by passing in a pre-computed set of reference neighbors, and turning off anchor filtration. + +```{r bm.anchors} +anchors <- list() +for (i in 1:length(hcabm40k.batches)) { + anchors[[i]] <- FindTransferAnchors( + reference = bm, + query = hcabm40k.batches[[i]], + k.filter = NA, + reference.reduction = "spca", + reference.neighbors = "spca.annoy.neighbors", + dims = 1:50 + ) +} +``` + +We then individually map each of the datasets. + +```{r bm.map} +for (i in 1:length(hcabm40k.batches)) { + hcabm40k.batches[[i]] <- MapQuery( + anchorset = anchors[[i]], + query = hcabm40k.batches[[i]], + reference = bm, + refdata = list( + celltype = "celltype.l2", + predicted_ADT = "ADT"), + reference.reduction = "spca", + reduction.model = "wnn.umap" + ) +} +``` + +## Explore the mapping results + +Now that mapping is complete, we can visualize the results for individual objects + +```{r bm.umap.separate, fig.width=10} +p1 <- DimPlot(hcabm40k.batches[[1]], reduction = 'ref.umap', group.by = 'predicted.celltype', label.size = 3) +p2 <- DimPlot(hcabm40k.batches[[2]], reduction = 'ref.umap', group.by = 'predicted.celltype', label.size = 3) +p1 + p2 + plot_layout(guides = "collect") +``` + +We can also merge all the objects into one dataset. Note that they have all been integrated into a common space, defined by the reference. We can then visualize the results together. + +```{r bm.umap.combine} +# Merge the batches +hcabm40k <- merge(hcabm40k.batches[[1]], hcabm40k.batches[2:length(hcabm40k.batches)], merge.dr = "ref.umap") +DimPlot(hcabm40k, reduction = "ref.umap", group.by = "predicted.celltype", label = TRUE, repel = TRUE, label.size = 3) + NoLegend() +``` + +We can visualize gene expression, cluster prediction scores, and (imputed) surface protein levels in the query cells: + +```{r ftplot, fig.height = 10, fig.width=10} +p3 <- FeaturePlot(hcabm40k, features = c("rna_TRDC", "rna_MPO", "rna_AVP"), reduction = 'ref.umap', + max.cutoff = 3, ncol = 3) + +# cell type prediction scores +DefaultAssay(hcabm40k) <- 'prediction.score.celltype' +p4 <- FeaturePlot(hcabm40k, features = c("CD16 Mono", "HSC", "Prog-RBC"), ncol = 3, + cols = c("lightgrey", "darkred")) + +# imputed protein levels +DefaultAssay(hcabm40k) <- 'predicted_ADT' +p5 <- FeaturePlot(hcabm40k, features = c("CD45RA", "CD16", "CD161"), reduction = 'ref.umap', + min.cutoff = 'q10', max.cutoff = 'q99', cols = c("lightgrey", "darkgreen") , + ncol = 3) +p3 / p4 / p5 +``` + +```{r save.times, include=TRUE} +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/reference_mapping_times.csv") +``` + +
    + **Session Info** +```{r} +sessionInfo() +``` +
    diff --git a/vignettes/seurat5_multimodal_vignette.Rmd b/vignettes/seurat5_multimodal_vignette.Rmd new file mode 100644 index 000000000..ad8b52a12 --- /dev/null +++ b/vignettes/seurat5_multimodal_vignette.Rmd @@ -0,0 +1,240 @@ +--- +title: "Using Seurat with multimodal data" +output: + html_document: + theme: united + df_print: kable + pdf_document: default +date: 'Compiled: `r Sys.Date()`' +--- +*** + +```{r setup, include=TRUE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + tidy.opts = list(width.cutoff = 95), + message = FALSE, + warning = FALSE, + time_it = TRUE, + fig.width = 10, + error = TRUE +) +``` + +# Load in the data + +The ability to make simultaneous measurements of multiple data types from the same cell, known as multimodal analysis, represents a new and exciting frontier for single-cell genomics. For example, [CITE-seq](http://www.nature.com/nmeth/journal/v14/n9/full/nmeth.4380.html) enables the simultaneous measurements of transcriptomes and cell-surface proteins from the same cell. Other exciting multimodal technologies, such as the [10x multiome kit](https://www.10xgenomics.com/products/single-cell-multiome-atac-plus-gene-expression) allow for the paired measurements of cellular transcriptome and chromatin accessibility (i.e scRNA-seq+scATAC-seq). Other modalities that can be measured alongside cellular transcriptomes include genetic perturbations, cellular methylomes, and hashtag oligos from [Cell Hashing](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-018-1603-1). We have designed Seurat4 to enable for the seamless storage, analysis, and exploration of diverse multimodal single-cell datasets. + +In this vignette, we present an introductory workflow for creating a multimodal Seurat object and performing an initial analysis. For example, we demonstrate how to cluster a CITE-seq dataset on the basis of the measured cellular transcriptomes, and subsequently discover cell surface proteins that are enriched in each cluster. We note that Seurat4 also enables more advanced techniques for the analysis of multimodal data, in particular the application of our [Weighted Nearest Neighbors (WNN) approach](https://doi.org/10.1016/j.cell.2021.04.048) that enables simultaneous clustering of cells based on a weighted combination of both modalities, and you can explore this functionality [here](weighted_nearest_neighbor_analysis.html). + +Here, we analyze a dataset of 8,617 cord blood mononuclear cells (CBMCs), where transcriptomic measurements are paired with abundance estimates for 11 surface proteins, whose levels are quantified with DNA-barcoded antibodies. First, we load in two count matrices : one for the RNA measurements, and one for the antibody-derived tags (ADT). You can download the ADT file [here](ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE100nnn/GSE100866/suppl/GSE100866_CBMC_8K_13AB_10X-ADT_umi.csv.gz) and the RNA file [here](ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE100nnn/GSE100866/suppl/GSE100866_CBMC_8K_13AB_10X-RNA_umi.csv.gz) + +```{r load_packages} +library(Seurat) +options(Seurat.object.assay.version = "v5") +library(ggplot2) +library(patchwork) +``` + +```{r load_data} +# Load in the RNA UMI matrix + +# Note that this dataset also contains ~5% of mouse cells, which we can use as negative controls for the protein measurements. For this reason, the gene expression matrix has HUMAN_ or MOUSE_ appended to the beginning of each gene. +cbmc.rna <- as.sparse(read.csv(file = '/brahms/hartmana/seurat_site_builder/seurat5/seurat-private/data/GSE100866_CBMC_8K_13AB_10X-RNA_umi.csv.gz', sep = ',', header = TRUE, row.names = 1)) + +# To make life a bit easier going forward, we're going to discard all but the top 100 most highly expressed mouse genes, and remove the "HUMAN_" from the CITE-seq prefix +cbmc.rna <- CollapseSpeciesExpressionMatrix(cbmc.rna) + +# Load in the ADT UMI matrix +cbmc.adt <- as.sparse(read.csv(file = '/brahms/hartmana/seurat_site_builder/seurat5/seurat-private/data/GSE100866_CBMC_8K_13AB_10X-ADT_umi.csv.gz', sep = ',', header = TRUE, row.names = 1)) + +# Note that since measurements were made in the same cells, the two matrices have identical column names +all.equal(colnames(cbmc.rna),colnames(cbmc.adt)) +``` + +# Setup a Seurat object, add the RNA and protein data + +Now we create a Seurat object, and add the ADT data as a second assay + +```{r basic_de} +# creates a Seurat object based on the scRNA-seq data +cbmc <- CreateSeuratObject(counts = cbmc.rna) + +# We can see that by default, the cbmc object contains an assay storing RNA measurement +Assays(cbmc) + +# create a new assay to store ADT information +adt_assay <- CreateAssay5Object(counts = cbmc.adt) + +# add this assay to the previously created Seurat object +cbmc[["ADT"]] <- adt_assay + +# Validate that the object now contains multiple assays +Assays(cbmc) + +# Extract a list of features measured in the ADT assay +rownames(cbmc[["ADT"]]) + +# Note that we can easily switch back and forth between the two assays to specify the default for visualization and analysis + +# List the current default assay +DefaultAssay(cbmc) + +# Switch the default to ADT +DefaultAssay(cbmc) <- 'ADT' +DefaultAssay(cbmc) +``` + +# Cluster cells on the basis of their scRNA-seq profiles + +The steps below represent a quick clustering of the PBMCs based on the scRNA-seq data. For more detail on individual steps or more advanced options, see our PBMC clustering guided tutorial [here](pbmc3k_tutorial.html) + +```{r cluster1} +# Note that all operations below are performed on the RNA assay +# Set and verify that the default assay is RNA +DefaultAssay(cbmc) <- "RNA" +DefaultAssay(cbmc) + +# perform visualization and clustering steps +cbmc <- NormalizeData(cbmc) +DefaultLayer(cbmc[["RNA"]]) <- "counts" +cbmc <- FindVariableFeatures(cbmc) +DefaultLayer(cbmc[["RNA"]]) <- "data" +cbmc <- ScaleData(cbmc) +DefaultLayer(cbmc[["RNA"]]) <- "scale.data" +cbmc <- RunPCA(cbmc, verbose = FALSE) +cbmc <- FindNeighbors(cbmc, dims = 1:30) +cbmc <- FindClusters(cbmc, resolution = 0.8, verbose = FALSE) +cbmc <- RunUMAP(cbmc, dims = 1:30) +DimPlot(cbmc, label = TRUE) +``` + +# Visualize multiple modalities side-by-side + +Now that we have obtained clusters from scRNA-seq profiles, we can visualize the expression of either protein or RNA molecules in our dataset. Importantly, Seurat provides a couple ways to switch between modalities, and specify which modality you are interested in analyzing or visualizing. This is particularly important as, in some cases, the same feature can be present in multiple modalities - for example this dataset contains independent measurements of the B cell marker CD19 (both protein and RNA levels). + +```{r vis} +# Normalize ADT data, +DefaultAssay(cbmc) <- 'ADT' +DefaultLayer(cbmc[["ADT"]]) <- "counts" +cbmc <- NormalizeData(cbmc, normalization.method = 'CLR', margin = 2) +DefaultAssay(cbmc) <- 'RNA' + +# Note that the following command is an alternative but returns the same result +DefaultLayer(cbmc[["ADT"]]) <- "counts" +cbmc <- NormalizeData(cbmc, normalization.method = 'CLR', margin = 2, assay = 'ADT') + +# Now, we will visualize CD14 levels for RNA and protein +# By setting the default assay, we can visualize one or the other +DefaultAssay(cbmc) <- 'ADT' +p1 <- FeaturePlot(cbmc, "CD19",cols = c("lightgrey","darkgreen")) + ggtitle("CD19 protein") +DefaultAssay(cbmc) <- 'RNA' +p2 <- FeaturePlot(cbmc, "CD19") + ggtitle("CD19 RNA") + +# place plots side-by-side +p1 | p2 + +# Alternately, we can use specific assay keys to specify a specific modality +# Identify the key for the RNA and protein assays +Key(cbmc[["RNA"]]) +Key(cbmc[["ADT"]]) + +# Now, we can include the key in the feature name, which overrides the default assay +p1 <- FeaturePlot(cbmc, "adt_CD19",cols = c("lightgrey","darkgreen")) + ggtitle("CD19 protein") +p2 <- FeaturePlot(cbmc, "rna_CD19") + ggtitle("CD19 RNA") +p1 | p2 +``` + +# Identify cell surface markers for scRNA-seq clusters + +We can leverage our paired CITE-seq measurements to help annotate clusters derived from scRNA-seq, and to identify both protein and RNA markers. + +```{r markers} +# as we know that CD19 is a B cell marker, we can identify cluster 6 as expressing CD19 on the surface +VlnPlot(cbmc, "adt_CD19") + +# we can also identify alternative protein and RNA markers for this cluster through differential expression +adt_markers <- FindMarkers(cbmc,ident.1 = 6, assay = 'ADT') +rna_markers <- FindMarkers(cbmc,ident.1 = 6, assay = 'RNA') + +head(adt_markers) +head(rna_markers) +``` + +# Additional visualizations of multimodal data + +```{r viz.cite.two, fig.height=4.5, fig.width=10} +# Draw ADT scatter plots (like biaxial plots for FACS). Note that you can even 'gate' cells if desired by using HoverLocator and FeatureLocator +FeatureScatter(cbmc, feature1 = 'adt_CD19', feature2 = 'adt_CD3') + +# view relationship between protein and RNA +FeatureScatter(cbmc, feature1 = 'adt_CD3', feature2 = 'rna_CD3E') + +FeatureScatter(cbmc, feature1 = 'adt_CD4', feature2 = 'adt_CD8') + +# Let's look at the raw (non-normalized) ADT counts. You can see the values are quite high, particularly in comparison to RNA values. This is due to the significantly higher protein copy number in cells, which significantly reduces 'drop-out' in ADT data +FeatureScatter(cbmc, feature1 = 'adt_CD4', feature2 = 'adt_CD8', slot = 'counts') +``` + +# Loading data from 10X multi-modal experiments + +Seurat is also able to analyze data from multimodal 10X experiments processed using CellRanger v3; as an example, we recreate the plots above using a dataset of 7,900 peripheral blood mononuclear cells (PBMC), freely available from 10X Genomics [here](https://support.10xgenomics.com/single-cell-gene-expression/datasets/3.0.0/pbmc_10k_protein_v3). + +```{r pbmc10x, fig.height=4.5, fig.width=10} +pbmc10k.data <- Read10X(data.dir = '../data/pbmc10k/filtered_feature_bc_matrix/') +rownames(x = pbmc10k.data[['Antibody Capture']]) <- gsub( + pattern = '_[control_]*TotalSeqB', + replacement = '', + x = rownames(x = pbmc10k.data[['Antibody Capture']]) +) + +pbmc10k <- CreateSeuratObject(counts = pbmc10k.data[['Gene Expression']], min.cells = 3, min.features = 200) +pbmc10k <- NormalizeData(pbmc10k) +pbmc10k[['ADT']] <- CreateAssay5Object(pbmc10k.data[['Antibody Capture']][, colnames(x = pbmc10k)]) +pbmc10k <- NormalizeData(pbmc10k, assay = 'ADT', normalization.method = 'CLR') + +plot1 <- FeatureScatter(pbmc10k, feature1 = 'adt_CD19', feature2 = 'adt_CD3', pt.size = 1) +plot2 <- FeatureScatter(pbmc10k, feature1 = 'adt_CD4', feature2 = 'adt_CD8a', pt.size = 1) +plot3 <- FeatureScatter(pbmc10k, feature1 = 'adt_CD3', feature2 = 'CD3E', pt.size = 1) +(plot1 + plot2 + plot3) & NoLegend() +``` + +```{r save.img, include=TRUE} +plot <- FeatureScatter(cbmc, feature1 = "adt_CD19", feature2 = "adt_CD3") + NoLegend() + + theme(axis.title = element_text(size = 18), legend.text = element_text(size = 18)) +ggsave(filename = "../output/images/citeseq_plot.jpg", height = 7, width = 12, plot = plot, quality = 50) +``` + +```{r save.times, include=TRUE} +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/multimodal_vignette_times.csv") +``` + +# Additional functionality for multimodal data in Seurat + +Seurat v4 also includes additional functionality for the analysis, visualization, and integration of multimodal datasets. For more information, please explore the resources below: + +* Defining cellular identity from multimodal data using WNN analysis in Seurat v4 [vignette](weighted_nearest_neighbor_analysis.html) +* Mapping scRNA-seq data onto CITE-seq references [[vignette](reference_mapping.html)] +* Introduction to the analysis of spatial transcriptomics analysis [[vignette](spatial_vignette.html)] +* Analysis of 10x multiome (paired scRNA-seq + ATAC) using WNN analysis [[vignette](weighted_nearest_neighbor_analysis.html)] +* Signac: Analysis, interpretation, and exploration of single-cell chromatin datasets [[package](https://satijalab.org/signac/)] +* Mixscape: an analytical toolkit for pooled single-cell genetic screens [[vignette](mixscape_vignette.html)] + +
    + **Session Info** +```{r} +sessionInfo() +``` +
    diff --git a/vignettes/seurat5_pbmc3k_tutorial.Rmd b/vignettes/seurat5_pbmc3k_tutorial.Rmd new file mode 100644 index 000000000..78e6f32ff --- /dev/null +++ b/vignettes/seurat5_pbmc3k_tutorial.Rmd @@ -0,0 +1,395 @@ +--- +title: "Seurat - Guided Clustering Tutorial" +output: + html_document: + theme: united + df_print: kable +date: 'Compiled: `r format(Sys.Date(), "%B %d, %Y")`' +--- +*** + +```{r setup, include=TRUE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + tidy.opts = list(width.cutoff = 95), + message = FALSE, + warning = FALSE, + time_it = TRUE, + error = TRUE +) +``` + +# Setup the Seurat Object + +For this tutorial, we will be analyzing the a dataset of Peripheral Blood Mononuclear Cells (PBMC) freely available from 10X Genomics. There are 2,700 single cells that were sequenced on the Illumina NextSeq 500. The raw data can be found [here](https://cf.10xgenomics.com/samples/cell/pbmc3k/pbmc3k_filtered_gene_bc_matrices.tar.gz). + +We start by reading in the data. The `Read10X()` function reads in the output of the [cellranger](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/what-is-cell-ranger) pipeline from 10X, returning a unique molecular identified (UMI) count matrix. The values in this matrix represent the number of molecules for each feature (i.e. gene; row) that are detected in each cell (column). + +We next use the count matrix to create a `Seurat` object. The object serves as a container that contains both data (like the count matrix) and analysis (like PCA, or clustering results) for a single-cell dataset. For a technical discussion of the `Seurat` object structure, check out our [GitHub Wiki](https://github.com/satijalab/seurat/wiki). For example, the count matrix is stored in `pbmc[["RNA"]]@counts`. + +```{r init, error=TRUE} +library(dplyr) +library(Seurat) +options(Seurat.object.assay.version = "v5") +library(patchwork) + +# Load the PBMC dataset +pbmc.data <- Read10X(data.dir = "../data/pbmc3k/filtered_gene_bc_matrices/hg19/") +# Initialize the Seurat object with the raw (non-normalized data). +pbmc <- CreateSeuratObject(counts = pbmc.data, project = "pbmc3k", min.cells = 3, min.features = 200) +pbmc +``` +
    + **What does data in a count matrix look like?** + +```{r, error=TRUE} +# Lets examine a few genes in the first thirty cells +pbmc.data[c("CD3D", "TCL1A", "MS4A1"), 1:30] +``` + +The `.` values in the matrix represent 0s (no molecules detected). Since most values in an scRNA-seq matrix are 0, Seurat uses a sparse-matrix representation whenever possible. This results in significant memory and speed savings for Drop-seq/inDrop/10x data. + +```{r, error=TRUE} +dense.size <- object.size(as.matrix(pbmc.data)) +dense.size +sparse.size <- object.size(pbmc.data) +sparse.size +dense.size / sparse.size +``` +
    +\ + +# Standard pre-processing workflow + +The steps below encompass the standard pre-processing workflow for scRNA-seq data in Seurat. These represent the selection and filtration of cells based on QC metrics, data normalization and scaling, and the detection of highly variable features. + +## QC and selecting cells for further analysis + +Seurat allows you to easily explore QC metrics and filter cells based on any user-defined criteria. A few QC metrics [commonly used](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4758103/) by the community include + +* The number of unique genes detected in each cell. + + Low-quality cells or empty droplets will often have very few genes + + Cell doublets or multiplets may exhibit an aberrantly high gene count +* Similarly, the total number of molecules detected within a cell (correlates strongly with unique genes) +* The percentage of reads that map to the mitochondrial genome + + Low-quality / dying cells often exhibit extensive mitochondrial contamination + + We calculate mitochondrial QC metrics with the `PercentageFeatureSet()` function, which calculates the percentage of counts originating from a set of features + + We use the set of all genes starting with `MT-` as a set of mitochondrial genes + +```{r mito, fig.height=7, fig.width=13, error=TRUE} +# The [[ operator can add columns to object metadata. This is a great place to stash QC stats +pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-") +``` + +
    + **Where are QC metrics stored in Seurat?** + +* The number of unique genes and total molecules are automatically calculated during `CreateSeuratObject()` + + You can find them stored in the object meta data +```{r qc, fig.height=7, fig.width=13, error=TRUE} +# Show QC metrics for the first 5 cells +head(pbmc@meta.data, 5) +``` +
    +\ + +In the example below, we visualize QC metrics, and use these to filter cells. + +* We filter cells that have unique feature counts over 2,500 or less than 200 +* We filter cells that have >5% mitochondrial counts + +```{r qc2, fig.height=7, fig.width=13, error=TRUE} + +#Visualize QC metrics as a violin plot +VlnPlot(pbmc, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3) + +# FeatureScatter is typically used to visualize feature-feature relationships, but can be used for anything calculated by the object, i.e. columns in object metadata, PC scores etc. + +plot1 <- FeatureScatter(pbmc, feature1 = "nCount_RNA", feature2 = "percent.mt") +plot2 <- FeatureScatter(pbmc, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") +plot1 + plot2 + +pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5) +``` + +*** + +# Normalizing the data + +After removing unwanted cells from the dataset, the next step is to normalize the data. By default, we employ a global-scaling normalization method "LogNormalize" that normalizes the feature expression measurements for each cell by the total expression, multiplies this by a scale factor (10,000 by default), and log-transforms the result. Normalized values are stored in `pbmc[["RNA"]]@data`. + +```{r normalize, error=TRUE} +pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 1e4) +``` +For clarity, in this previous line of code (and in future commands), we provide the default values for certain parameters in the function call. However, this isn't required and the same behavior can be achieved with: + +```{r normalize.default, eval = FALSE} +pbmc <- NormalizeData(pbmc) +``` + +# Identification of highly variable features (feature selection) + +We next calculate a subset of features that exhibit high cell-to-cell variation in the dataset (i.e, they are highly expressed in some cells, and lowly expressed in others). We and [others](https://www.nature.com/articles/nmeth.2645) have found that focusing on these genes in downstream analysis helps to highlight biological signal in single-cell datasets. + +Our procedure in Seurat is described in detail [here](https://doi.org/10.1016/j.cell.2019.05.031), and improves on previous versions by directly modeling the mean-variance relationship inherent in single-cell data, and is implemented in the `FindVariableFeatures()` function. By default, we return 2,000 features per dataset. These will be used in downstream analysis, like PCA. + +```{r var_features, fig.height=5, fig.width=11} +DefaultLayer(pbmc[["RNA"]]) <- "counts" +pbmc <- FindVariableFeatures(pbmc, selection.method = 'vst', nfeatures = 2000) + +# Identify the 10 most highly variable genes +top10 <- head(VariableFeatures(pbmc), 10) + +# plot variable features with and without labels +plot1 <- VariableFeaturePlot(pbmc) +plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE) +plot1 + plot2 +``` + +*** + +# Scaling the data + +Next, we apply a linear transformation ('scaling') that is a standard pre-processing step prior to dimensional reduction techniques like PCA. The `ScaleData()` function: + +* Shifts the expression of each gene, so that the mean expression across cells is 0 +* Scales the expression of each gene, so that the variance across cells is 1 + + This step gives equal weight in downstream analyses, so that highly-expressed genes do not dominate +* The results of this are stored in `pbmc[["RNA"]]@scale.data` + +```{r regress, fig.height=7, fig.width=11, results='hide'} +DefaultLayer(pbmc[["RNA"]]) <- "data" +all.genes <- rownames(pbmc) +pbmc <- ScaleData(pbmc, features = all.genes) +``` +
    + **This step takes too long! Can I make it faster?** + +Scaling is an essential step in the Seurat workflow, but only on genes that will be used as input to PCA. Therefore, the default in `ScaleData()` is only to perform scaling on the previously identified variable features (2,000 by default). To do this, omit the `features` argument in the previous function call, i.e. +```{r regressvar, fig.height=7, fig.width=11, results='hide', eval = FALSE} +pbmc <- ScaleData(pbmc) +``` +Your PCA and clustering results will be unaffected. However, Seurat heatmaps (produced as shown below with `DoHeatmap()`) require genes in the heatmap to be scaled, to make sure highly-expressed genes don't dominate the heatmap. To make sure we don't leave any genes out of the heatmap later, we are scaling all genes in this tutorial. +
    +\ +
    + **How can I remove unwanted sources of variation, as in Seurat v2?** + +In `Seurat v2` we also use the `ScaleData()` function to remove unwanted sources of variation from a single-cell dataset. For example, we could 'regress out' heterogeneity associated with (for example) cell cycle stage, or mitochondrial contamination. These features are still supported in `ScaleData()` in `Seurat v3`, i.e.: +```{r regressvarmt, fig.height=7, fig.width=11, results='hide', eval = FALSE} +pbmc <- ScaleData(pbmc, vars.to.regress = 'percent.mt') +``` +However, particularly for advanced users who would like to use this functionality, we strongly recommend the use of our new normalization workflow, `SCTransform()`. The method is described in our [paper](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1874-1), with a separate vignette using Seurat v3 [here](sctransform_vignette.html). As with `ScaleData()`, the function `SCTransform()` also includes a `vars.to.regress` parameter. +
    +\ + +*** + +# Perform linear dimensional reduction + +Next we perform PCA on the scaled data. By default, only the previously determined variable features are used as input, but can be defined using `features` argument if you wish to choose a different subset. + +```{r pca} +pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc)) +``` + +Seurat provides several useful ways of visualizing both cells and features that define the PCA, including `VizDimReduction()`, `DimPlot()`, and `DimHeatmap()` + +```{r pca_viz, message=TRUE} +# Examine and visualize PCA results a few different ways +print(pbmc[['pca']], dims = 1:5, nfeatures = 5) +VizDimLoadings(pbmc, dims = 1:2, reduction = 'pca') +DimPlot(pbmc, reduction = 'pca') +``` + +In particular `DimHeatmap()` allows for easy exploration of the primary sources of heterogeneity in a dataset, and can be useful when trying to decide which PCs to include for further downstream analyses. Both cells and features are ordered according to their PCA scores. Setting `cells` to a number plots the 'extreme' cells on both ends of the spectrum, which dramatically speeds plotting for large datasets. Though clearly a supervised analysis, we find this to be a valuable tool for exploring correlated feature sets. + +```{r single-heatmap} +DimHeatmap(pbmc, dims = 1, cells = 500, balanced = TRUE) +``` + +```{r multi-heatmap, fig.height=15, fig.width=9} +DimHeatmap(pbmc, dims = 1:15, cells = 500, balanced = TRUE) +``` + +# Determine the 'dimensionality' of the dataset + +To overcome the extensive technical noise in any single feature for scRNA-seq data, Seurat clusters cells based on their PCA scores, with each PC essentially representing a 'metafeature' that combines information across a correlated feature set. The top principal components therefore represent a robust compression of the dataset. However, how many components should we choose to include? 10? 20? 100? + +In [Macosko *et al*](http://www.cell.com/abstract/S0092-8674(15)00549-8), we implemented a resampling test inspired by the JackStraw procedure. We randomly permute a subset of the data (1% by default) and rerun PCA, constructing a 'null distribution' of feature scores, and repeat this procedure. We identify 'significant' PCs as those who have a strong enrichment of low p-value features. + +```{r jackstraw, fig.height=6, fig.width=10} +# NOTE: This process can take a long time for big datasets, comment out for expediency. More approximate techniques such as those implemented in ElbowPlot() can be used to reduce computation time +pbmc <- JackStraw(pbmc, num.replicate = 100) +pbmc <- ScoreJackStraw(pbmc, dims = 1:20) +``` + +The `JackStrawPlot()` function provides a visualization tool for comparing the distribution of p-values for each PC with a uniform distribution (dashed line). 'Significant' PCs will show a strong enrichment of features with low p-values (solid curve above the dashed line). In this case it appears that there is a sharp drop-off in significance after the first 10-12 PCs. + +```{r jsplots, fig.height=6, fig.width=10} +JackStrawPlot(pbmc, dims = 1:15) +``` + +An alternative heuristic method generates an 'Elbow plot': a ranking of principle components based on the percentage of variance explained by each one (`ElbowPlot()` function). In this example, we can observe an 'elbow' around PC9-10, suggesting that the majority of true signal is captured in the first 10 PCs. + +```{r elbow_plot, fig.height=6, fig.width=10} +ElbowPlot(pbmc) +``` + +Identifying the true dimensionality of a dataset -- can be challenging/uncertain for the user. We therefore suggest these three approaches to consider. The first is more supervised, exploring PCs to determine relevant sources of heterogeneity, and could be used in conjunction with GSEA for example. The second implements a statistical test based on a random null model, but is time-consuming for large datasets, and may not return a clear PC cutoff. The third is a heuristic that is commonly used, and can be calculated instantly. In this example, all three approaches yielded similar results, but we might have been justified in choosing anything between PC 7-12 as a cutoff. + +We chose 10 here, but encourage users to consider the following: + +* Dendritic cell and NK aficionados may recognize that genes strongly associated with PCs 12 and 13 define rare immune subsets (i.e. MZB1 is a marker for plasmacytoid DCs). However, these groups are so rare, they are difficult to distinguish from background noise for a dataset of this size without prior knowledge. +* We encourage users to repeat downstream analyses with a different number of PCs (10, 15, or even 50!). As you will observe, the results often do not differ dramatically. +* We advise users to err on the higher side when choosing this parameter. For example, performing downstream analyses with only 5 PCs does significantly and adversely affect results. + +*** + +# Cluster the cells + +Seurat v3 applies a graph-based clustering approach, building upon initial strategies in ([Macosko *et al*](http://www.cell.com/abstract/S0092-8674(15)00549-8)). Importantly, the *distance metric* which drives the clustering analysis (based on previously identified PCs) remains the same. However, our approach to partitioning the cellular distance matrix into clusters has dramatically improved. Our approach was heavily inspired by recent manuscripts which applied graph-based clustering approaches to scRNA-seq data [[SNN-Cliq, Xu and Su, Bioinformatics, 2015]](http://bioinformatics.oxfordjournals.org/content/early/2015/02/10/bioinformatics.btv088.abstract) and CyTOF data [[PhenoGraph, Levine *et al*., Cell, 2015]](http://www.ncbi.nlm.nih.gov/pubmed/26095251). Briefly, these methods embed cells in a graph structure - for example a K-nearest neighbor (KNN) graph, with edges drawn between cells with similar feature expression patterns, and then attempt to partition this graph into highly interconnected 'quasi-cliques' or 'communities'. + +As in PhenoGraph, we first construct a KNN graph based on the euclidean distance in PCA space, and refine the edge weights between any two cells based on the shared overlap in their local neighborhoods (Jaccard similarity). This step is performed using the `FindNeighbors()` function, and takes as input the previously defined dimensionality of the dataset (first 10 PCs). + +To cluster the cells, we next apply modularity optimization techniques such as the Louvain algorithm (default) or SLM [[SLM, Blondel *et al*., Journal of Statistical Mechanics]](http://dx.doi.org/10.1088/1742-5468/2008/10/P10008), to iteratively group cells together, with the goal of optimizing the standard modularity function. The `FindClusters()` function implements this procedure, and contains a resolution parameter that sets the 'granularity' of the downstream clustering, with increased values leading to a greater number of clusters. We find that setting this parameter between 0.4-1.2 typically returns good results for single-cell datasets of around 3K cells. Optimal resolution often increases for larger datasets. The clusters can be found using the `Idents()` function. + + +```{r cluster, fig.height=5, fig.width=7} +pbmc <- FindNeighbors(pbmc, dims = 1:10) +pbmc <- FindClusters(pbmc, resolution = 0.5) + +# Look at cluster IDs of the first 5 cells +head(Idents(pbmc), 5) +``` + +*** + +# Run non-linear dimensional reduction (UMAP/tSNE) + +Seurat offers several non-linear dimensional reduction techniques, such as tSNE and UMAP, to visualize and explore these datasets. The goal of these algorithms is to learn the underlying manifold of the data in order to place similar cells together in low-dimensional space. Cells within the graph-based clusters determined above should co-localize on these dimension reduction plots. As input to the UMAP and tSNE, we suggest using the same PCs as input to the clustering analysis. + +```{r tsne, fig.height=5, fig.width=7} +# If you haven't installed UMAP, you can do so via reticulate::py_install(packages = "umap-learn") +pbmc <- RunUMAP(pbmc, dims = 1:10) +``` + +```{r tsneplot, fig.height=5, fig.width=7} +# note that you can set `label = TRUE` or use the LabelClusters function to help label individual clusters +DimPlot(pbmc, reduction = 'umap') +``` + +You can save the object at this point so that it can easily be loaded back in without having to rerun the computationally intensive steps performed above, or easily shared with collaborators. + +```{r saveobject, eval=FALSE} +saveRDS(pbmc, file = "../output/pbmc_tutorial.rds") +``` + +*** + +# Finding differentially expressed features (cluster biomarkers) + +Seurat can help you find markers that define clusters via differential expression. By default, it identifies positive and negative markers of a single cluster (specified in `ident.1`), compared to all other cells. `FindAllMarkers()` automates this process for all clusters, but you can also test groups of clusters vs. each other, or against all cells. + +The `min.pct` argument requires a feature to be detected at a minimum percentage in either of the two groups of cells, and the thresh.test argument requires a feature to be differentially expressed (on average) by some amount between the two groups. You can set both of these to 0, but with a dramatic increase in time - since this will test a large number of features that are unlikely to be highly discriminatory. As another option to speed up these computations, `max.cells.per.ident` can be set. This will downsample each identity class to have no more cells than whatever this is set to. While there is generally going to be a loss in power, the speed increases can be significant and the most highly differentially expressed features will likely still rise to the top. + +```{r markers1, fig.height=8, fig.width=15} +# find all markers of cluster 2 +cluster2.markers <- FindMarkers(pbmc, ident.1 = 2, min.pct = 0.25) +head(cluster2.markers, n = 5) +# find all markers distinguishing cluster 5 from clusters 0 and 3 +cluster5.markers <- FindMarkers(pbmc, ident.1 = 5, ident.2 = c(0, 3), min.pct = 0.25) +head(cluster5.markers, n = 5) +# find markers for every cluster compared to all remaining cells, report only the positive ones +pbmc.markers <- FindAllMarkers(pbmc, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25) +pbmc.markers %>% group_by(cluster) %>% slice_max(n = 2, order_by = avg_log2FC) +``` + +Seurat has several tests for differential expression which can be set with the test.use parameter (see our [DE vignette](de_vignette.html) for details). For example, the ROC test returns the 'classification power' for any individual marker (ranging from 0 - random, to 1 - perfect). + +```{r markersroc, fig.height=8, fig.width=15} +cluster0.markers <- FindMarkers(pbmc, ident.1 = 0, logfc.threshold = 0.25, test.use = "roc", only.pos = TRUE) +``` + +We include several tools for visualizing marker expression. `VlnPlot()` (shows expression probability distributions across clusters), and `FeaturePlot()` (visualizes feature expression on a tSNE or PCA plot) are our most commonly used visualizations. We also suggest exploring `RidgePlot()`, `CellScatter()`, and `DotPlot()` as additional methods to view your dataset. + +```{r markerplots, fig.height=10, fig.width=15} +VlnPlot(pbmc, features = c("MS4A1", "CD79A")) +# you can plot raw counts as well +VlnPlot(pbmc, features = c("NKG7", "PF4"), slot = 'counts', log = TRUE) +FeaturePlot(pbmc, features = c("MS4A1", "GNLY", "CD3E", "CD14", "FCER1A", "FCGR3A", "LYZ", "PPBP", "CD8A")) +``` + +`DoHeatmap()` generates an expression heatmap for given cells and features. In this case, we are plotting the top 20 markers (or all markers if less than 20) for each cluster. + +```{r clusterHeatmap, fig.height=8, fig.width=15} +pbmc.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC) -> top10 + +# DoHeatmap doesn't automatically use the correct slot/layer anymore. need to figure out to make this occur automatically. +DefaultLayer(pbmc[["RNA"]]) <- "scale.data" +DoHeatmap(pbmc, features = top10$gene) + NoLegend() +``` + +*** +# Assigning cell type identity to clusters + +Fortunately in the case of this dataset, we can use canonical markers to easily match the unbiased clustering to known cell types: + +Cluster ID | Markers | Cell Type +-----------|---------------|---------- +0 | IL7R, CCR7 | Naive CD4+ T +1 | CD14, LYZ | CD14+ Mono +2 | IL7R, S100A4 | Memory CD4+ +3 | MS4A1 | B +4 | CD8A | CD8+ T +5 | FCGR3A, MS4A7 | FCGR3A+ Mono +6 | GNLY, NKG7 | NK +7 | FCER1A, CST3 | DC +8 | PPBP | Platelet + + +```{r labelplot, fig.height=5, fig.width=9} +new.cluster.ids <- c("Naive CD4 T", "CD14+ Mono", "Memory CD4 T", "B", "CD8 T", "FCGR3A+ Mono", "NK", "DC", "Platelet") +names(new.cluster.ids) <- levels(pbmc) +pbmc <- RenameIdents(pbmc, new.cluster.ids) +DimPlot(pbmc, reduction = 'umap', label = TRUE, pt.size = 0.5) + NoLegend() +``` + +```{r save.img, include=TRUE} +library(ggplot2) +plot <- DimPlot(pbmc, reduction = "umap", label = TRUE, label.size = 4.5) + xlab("UMAP 1") + ylab("UMAP 2") + + theme(axis.title = element_text(size = 18), legend.text = element_text(size = 18)) + + guides(colour = guide_legend(override.aes = list(size = 10))) +ggsave(filename = "../output/images/pbmc3k_umap.jpg", height = 7, width = 12, plot = plot, quality = 50) +``` + +```{r save.rds, eval=FALSE} +saveRDS(pbmc, file = "../output/pbmc3k_final.rds") +``` + +```{r save2, include=TRUE} +saveRDS(pbmc, file = "../data/pbmc3k_final.rds") +``` + +```{r save.times, include=TRUE} +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/pbmc3k_tutorial_times.csv") +``` + +
    + **Session Info** +```{r} +sessionInfo() +``` +
    diff --git a/vignettes/seurat5_sctransform_v2_vignette.Rmd b/vignettes/seurat5_sctransform_v2_vignette.Rmd new file mode 100644 index 000000000..5e8795cf8 --- /dev/null +++ b/vignettes/seurat5_sctransform_v2_vignette.Rmd @@ -0,0 +1,233 @@ +--- +title: 'Introduction to SCTransform, v2 regularization' +output: + html_document: + theme: united + pdf_document: default +date: 'Compiled: `r format(Sys.Date(), "%B %d, %Y")`' +--- + +```{r setup, include=TRUE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + tidy.opts = list(width.cutoff = 95), + fig.width = 10, + message = FALSE, + warning = FALSE, + time_it = TRUE, + error = TRUE +) + +``` + +## TL;DR + +We recently introduced [sctransform](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1874-1) to perform normalization and variance stabilization of scRNA-seq datasets. We now release an updated version ('v2'), based on [our broad analysis](https://www.biorxiv.org/content/10.1101/2021.07.07.451498v1) of 59 scRNA-seq datasets spanning a range of technologies, systems, and sequencing depths. This update improves speed and memory consumption, the stability of parameter estimates, the identification of variable features, and the the ability to perform downstream differential expression analyses. + +Users can install sctransform v2 from CRAN (sctransform v0.3.3) and invoke the use of the updated method via the `vst.flavor` argument. + +```{r tldr, eval=FALSE} +# install sctransform >= 0.3.3 +install.packages("sctransform") +# invoke sctransform - requires Seurat>=4.1 +object <- SCTransform(object, vst.flavor = "v2") +``` + +## Introduction + +Heterogeneity in single-cell RNA-seq (scRNA-seq) data is driven by multiple sources, including biological variation in cellular state as well as technical variation introduced during experimental processing. In [Choudhary and Satija, 2021](https://www.biorxiv.org/content/10.1101/2021.07.07.451498v1) we provide a set of recommendations for modeling variation in scRNA-seq data, particularly when using generalized linear models or likelihood-based approaches for preprocessing and downstream analysis. + +In this vignette, we use [sctransform v2](https://github.com/satijalab/sctransform/) based workflow to perform a comparative analysis of human immune cells (PBMC) in either a [resting or interferon-stimulated state](https://www.nature.com/articles/nbt.4042). In this vignette we apply sctransform-v2 based normalization to perform the following tasks: + +* Create an 'integrated' data assay for downstream analysis +* Compare the datasets to find cell-type specific responses to stimulation +* Obtain cell type markers that are conserved in both control and stimulated cells + +## Install sctransform + +We will install sctransform v2 from CRAN (v0.3.3). We will also install the [glmGamPoi](https://bioconductor.org/packages/release/bioc/html/glmGamPoi.html) package which substantially improves the speed of the learning procedure. + +```{r results='hide', message=FALSE, warning=FALSE} +# install glmGamPoi +if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager") +BiocManager::install("glmGamPoi") +# install sctransform from Github +install.packages("sctransform") +``` + +## Setup the Seurat objects + + +```{r data} +library(Seurat) +options(Seurat.object.assay.version = "v5") +library(SeuratData) +library(patchwork) +library(dplyr) +library(ggplot2) +``` +The dataset is available through our [SeuratData](https://github.com/satijalab/seurat-data) package. + +```{r installdata, eval=FALSE} +# install dataset +InstallData("ifnb") +``` + +```{r init, results='hide', message=FALSE, fig.keep='none'} +# load dataset +LoadData("ifnb") +ifnb <- UpdateSeuratObject(ifnb) +# ifnb[["RNA"]] <- CreateAssay5Object(ifnb[["RNA"]]@counts) + +# split the dataset into a list of two seurat objects (stim and CTRL) +ifnb.list <- SplitObject(ifnb, split.by = "stim") + +ctrl <- ifnb.list[["CTRL"]] +stim <- ifnb.list[["STIM"]] +``` + +## Perform normalization and dimensionality reduction + +To perform normalization, we invoke `SCTransform` with an additional flag `vst.flavor="v2"` to invoke +the v2 regularization. This provides some improvements over our original approach first introduced in [Hafemeister and Satija, 2019](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1874-1). + +* We fix the slope parameter of the GLM to $\ln(10)$ with $\log_{10}(\text{total UMI})$ used as the predictor as proposed by [Lause et al.](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-021-02451-7) +* We utilize an improved parameter estimation procedure that alleviates uncertainty and bias that result from fitting GLM models for very lowly expressed genes. +* We place a lower bound on gene-level standard deviation when calculating Pearson residuals. This prevents genes with extremely low expression (only 1-2 detected UMIs) from having a high pearson residual. + + +```{r ctrldimreduc, fig.width=10, fig.height=4} +# normalize and run dimensionality reduction on control dataset +ctrl <- SCTransform(ctrl, vst.flavor = "v2", verbose = FALSE) %>% + RunPCA(npcs = 30, verbose = FALSE) %>% + RunUMAP(reduction = "pca", dims = 1:30, verbose = FALSE) %>% + FindNeighbors(reduction = "pca", dims = 1:30, verbose = FALSE) %>% + FindClusters(resolution = 0.7, verbose = FALSE) + +p1 <- DimPlot(ctrl, label = T, repel = T) + ggtitle("Unsupervised clustering") +p2 <- DimPlot(ctrl, label = T, repel = T, group.by = "seurat_annotations") + ggtitle("Annotated celltypes") + +p1 | p2 +``` + +## Perform integration using pearson residuals + +To perform integration using the pearson residuals calculated above, we use the `PrepSCTIntegration()` function after selecting a list of informative features using `SelectIntegrationFeatures()`: + +```{r prepinteg} +stim <- SCTransform(stim, vst.flavor = "v2", verbose = FALSE) %>% RunPCA(npcs = 30, verbose = FALSE) +ifnb.list <- list(ctrl = ctrl, stim = stim) +features <- SelectIntegrationFeatures(object.list = ifnb.list, nfeatures = 3000) +ifnb.list <- PrepSCTIntegration(object.list = ifnb.list, anchor.features = features) +``` + +To integrate the two datasets, we use the `FindIntegrationAnchors()` function, which takes a list of Seurat objects as input, and use these anchors to integrate the two datasets together with `IntegrateData()`. + +```{r ifnb.cca.sct.anchors} +immune.anchors <- FindIntegrationAnchors(object.list = ifnb.list, + normalization.method = "SCT", anchor.features = features) +immune.combined.sct <- IntegrateData(anchorset = immune.anchors, normalization.method = "SCT") +``` + +## Perform an integrated analysis + +Now we can run a single integrated analysis on all cells: + +```{r ifnb.cca.sct.clustering, results='hide', message=FALSE} +immune.combined.sct <- RunPCA(immune.combined.sct, verbose = FALSE) +immune.combined.sct <- RunUMAP(immune.combined.sct, reduction = "pca", dims = 1:30, verbose = FALSE) +immune.combined.sct <- FindNeighbors(immune.combined.sct, reduction = "pca", dims = 1:30) +immune.combined.sct <- FindClusters(immune.combined.sct, resolution = 0.3) +``` + +To visualize the two conditions side-by-side, we can use the `split.by` argument to show each condition colored by cluster. + +```{r split.dim} +DimPlot(immune.combined.sct, reduction = "umap", split.by = "stim") +``` + +We can also visualize the distribution of annotated celltypes across control and stimulated datasets: + +```{r immunesca.cca.sct.split.dims, fig.width=13, fig.height=4} +p1 <- DimPlot(immune.combined.sct, reduction = "umap", group.by = "stim") +p2 <- DimPlot(immune.combined.sct, reduction = "umap", group.by = "seurat_clusters", label = TRUE, repel = TRUE) +p3 <- DimPlot(immune.combined.sct, reduction = "umap", group.by = "seurat_annotations", label = TRUE, repel = TRUE) +p1 | p2 | p3 +``` + + +## Identify differential expressed genes across conditions + +Using the normalized datasets with known celltype annotation, we can ask what genes change in different conditions for cells of the same type. First, we create a column in the meta.data slot to hold both the cell type and stimulation information and switch the current ident to that column. + +```{r de.genes} +immune.combined.sct$celltype.stim <- paste(immune.combined.sct$seurat_annotations, + immune.combined.sct$stim, sep = "_") +Idents(immune.combined.sct) <- "celltype.stim" +``` + +To run differential expression, we make use of 'corrected counts' that are stored in the `data` slot of the the `SCT` assay. Corrected counts are obtained by setting the sequencing depth for all the cells to a fixed value and reversing the learned regularized negative-binomial regression model. Prior to performing differential expression, we first run `PrepSCTFindMarkers`, which ensures that the fixed value is set properly. Then we use `FindMarkers(assay="SCT")` to find differentially expressed genes. Here, we aim to identify genes that are differently expressed between stimulated and control B cells. + +```{r runde} +immune.combined.sct <- PrepSCTFindMarkers(immune.combined.sct) + +b.interferon.response <- FindMarkers(immune.combined.sct, assay = "SCT", + ident.1 = "B_STIM", ident.2 = "B_CTRL", verbose = FALSE) +head(b.interferon.response, n = 15) +``` + +If running on a subset of the original object after running `PrepSCTFindMarkers()`, `FindMarkers()` should be invoked with `recorrect_umi = FALSE` to use the existing corrected counts: + +```{r runde2} +immune.combined.sct.subset <- subset(immune.combined.sct, idents = c("B_STIM", "B_CTRL")) +b.interferon.response.subset <- FindMarkers(immune.combined.sct.subset, assay = "SCT", + ident.1 = "B_STIM", ident.2 = "B_CTRL", + verbose = FALSE, recorrect_umi = FALSE) +``` + +We can also use the corrected counts for visualization: + +```{r feature.heatmaps, fig.height = 14} +Idents(immune.combined.sct) <- "seurat_annotations" +DefaultAssay(immune.combined.sct) <- "SCT" +FeaturePlot(immune.combined.sct, features = c("CD3D", "GNLY", "IFI6"), + split.by = "stim", max.cutoff = 3, cols = c("grey", "red")) +``` + +```{r splitvln, fig.height = 12} +plots <- VlnPlot(immune.combined.sct, features = c("LYZ", "ISG15", "CXCL10"), + split.by = "stim", group.by = "seurat_annotations", pt.size = 0, combine = FALSE) +wrap_plots(plots = plots, ncol = 1) +``` + +### Identify conserved cell type markers + +To identify canonical cell type marker genes that are conserved across conditions, we provide the `FindConservedMarkers()` function. This function performs differential gene expression testing for each dataset/group and combines the p-values using meta-analysis methods from the MetaDE R package. For example, we can identify genes that are conserved markers irrespective of stimulation condition in NK cells. Note that the `PrepSCTFindMarkers` command does not to be rerun here. + +```{r conserved.markers, warning=FALSE} +nk.markers <- FindConservedMarkers(immune.combined.sct, assay = "SCT", ident.1 = "NK", grouping.var = "stim", verbose = FALSE) +head(nk.markers) +``` + +```{r save.times, include=TRUE} +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/sctransform2.csv") +``` + +
    + **Session Info** +```{r} +sessionInfo() +``` +
    diff --git a/vignettes/seurat5_sctransform_vignette.Rmd b/vignettes/seurat5_sctransform_vignette.Rmd new file mode 100644 index 000000000..86a5ae17a --- /dev/null +++ b/vignettes/seurat5_sctransform_vignette.Rmd @@ -0,0 +1,156 @@ +--- +title: "Using sctransform in Seurat" +author: Christoph Hafemeister & Rahul Satija +output: + html_document: + theme: united + df_print: kable +date: 'Compiled: `r Sys.Date()`' +--- + +```{r setup, include=TRUE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + tidy.opts = list(width.cutoff = 95), + message = FALSE, + warning = FALSE, + time_it = TRUE, + error = TRUE +) +``` + +Biological heterogeneity in single-cell RNA-seq data is often confounded by technical factors including sequencing depth. The number of molecules detected in each cell can vary significantly between cells, even within the same celltype. +Interpretation of scRNA-seq data requires effective pre-processing and normalization to remove this technical variability. +In [Hafemeister and Satija, 2019](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1874-1) we introduce a modeling framework for the normalization and variance stabilization of molecular count data from scRNA-seq experiment. +This procedure omits the need for heuristic steps including pseudocount addition or log-transformation and improves common downstream analytical tasks such as variable gene selection, dimensional reduction, and differential expression. + +In this vignette, we demonstrate how using [sctransform](https://github.com/ChristophH/sctransform/) based normalization enables recovering sharper biological distinction compared to log-normalization. + +```{r packages} +library(Seurat) +options(Seurat.object.assay.version = "v5") +library(ggplot2) +library(sctransform) +``` + +Load data and create Seurat object + +```{r load_data, warning=FALSE, message=FALSE} +pbmc_data <- Read10X(data.dir = "../data/pbmc3k/filtered_gene_bc_matrices/hg19/") +pbmc <- CreateSeuratObject(counts = pbmc_data) +``` + +Apply sctransform normalization + + * Note that this single command replaces `NormalizeData()`, `ScaleData()`, and `FindVariableFeatures()`. + * Transformed data will be available in the SCT assay, which is set as the default after running sctransform + * During normalization, we can also remove confounding sources of variation, for example, mitochondrial mapping percentage + +```{r apply_sct, warning=FALSE, message=FALSE} +# store mitochondrial percentage in object meta data +pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-") + +# run sctransform +pbmc <- SCTransform(pbmc, vars.to.regress = "percent.mt", verbose = FALSE) +``` + +The latest version of `sctransform` also supports using [glmGamPoi](https://bioconductor.org/packages/release/bioc/html/glmGamPoi.html) +package which substantially improves the speed of the learning procedure. It can be invoked by specifying +`method="glmGamPoi"`. + +```{r eval=FALSE} +if (!requireNamespace("BiocManager", quietly = TRUE)) + install.packages("BiocManager") + +BiocManager::install("glmGamPoi") +pbmc <- SCTransform(pbmc, method="glmGamPoi", vars.to.regress = "percent.mt", verbose = FALSE) +``` + +Perform dimensionality reduction by PCA and UMAP embedding +```{r pca, fig.width=5, fig.height=5} +# These are now standard steps in the Seurat workflow for visualization and clustering +pbmc <- RunPCA(pbmc, verbose = FALSE) +pbmc <- RunUMAP(pbmc, dims = 1:30, verbose = FALSE) + +pbmc <- FindNeighbors(pbmc, dims = 1:30, verbose = FALSE) +pbmc <- FindClusters(pbmc, verbose = FALSE) +DimPlot(pbmc, label = TRUE) + NoLegend() +``` + +
    + **Why can we choose more PCs when using sctransform?** + +In the [standard Seurat workflow](pbmc3k_tutorial.html) we focus on 10 PCs for this dataset, though we highlight that the results are similar with higher settings for this parameter. Interestingly, we've found that when using sctransform, we often benefit by pushing this parameter even higher. We believe this is because the sctransform workflow performs more effective normalization, strongly removing technical effects from the data. + +Even after standard log-normalization, variation in sequencing depth is still a confounding factor (see [Figure 1](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1874-1)), and this effect can subtly influence higher PCs. In sctransform, this effect is substantially mitigated (see [Figure 3](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1874-1)). This means that higher PCs are more likely to represent subtle, but biologically relevant, sources of heterogeneity -- so including them may improve downstream analysis. + +In addition, sctransform returns 3,000 variable features by default, instead of 2,000. The rationale is similar, the additional variable features are less likely to be driven by technical differences across cells, and instead may represent more subtle biological fluctuations. In general, we find that results produced with sctransform are less dependent on these parameters (indeed, we achieve nearly identical results when using all genes in the transcriptome, though this does reduce computational efficiency). This can help users generate more robust results, and in addition, enables the application of standard analysis pipelines with identical parameter settings that can quickly be applied to new datasets: + +For example, the following code replicates the full end-to-end workflow, in a single command: + +```{r oneliner, eval=FALSE} +pbmc <- CreateSeuratObject(pbmc_data) %>% PercentageFeatureSet(pattern = "^MT-",col.name = 'percent.mt') %>% SCTransform(vars.to.regress = 'percent.mt') %>% + RunPCA() %>% FindNeighbors(dims = 1:30) %>% RunUMAP(dims = 1:30) %>% FindClusters() + +``` + +
    + +
    + **Where are normalized values stored for sctransform?** + +As described in our [paper](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1874-1), sctransform calculates a model of technical noise in scRNA-seq data using 'regularized negative binomial regression'. The residuals for this model are normalized values, and can be positive or negative. Positive residuals for a given gene in a given cell indicate that we observed more UMIs than expected given the gene’s average expression in the population and cellular sequencing depth, while negative residuals indicate the converse. + +The results of sctransfrom are stored in the "SCT" assay. You can learn more about multi-assay data and commands in Seurat in our [vignette](multimodal_vignette.html), [command cheat sheet](essential_commands.html#multi-assay-features), or [developer guide](https://github.com/satijalab/seurat/wiki/Assay). + +* `pbmc[["SCT"]]@scale.data` contains the residuals (normalized values), and is used directly as input to PCA. Please note that this matrix is non-sparse, and can therefore take up a lot of memory if stored for all genes. To save memory, we store these values only for variable genes, by setting the return.only.var.genes = TRUE by default in the `SCTransform()` function call. +* To assist with visualization and interpretation. we also convert Pearson residuals back to ‘corrected’ UMI counts. You can interpret these as the UMI counts we would expect to observe if all cells were sequenced to the same depth. If you want to see exactly how we do this, please look at the correct function [here](https://github.com/ChristophH/sctransform/blob/master/R/denoise.R). +* The 'corrected' UMI counts are stored in `pbmc[["SCT"]]@counts`. We store log-normalized versions of these corrected counts in `pbmc[["SCT"]]@data`, which are very helpful for visualization. +* You can use the corrected log-normalized counts for differential expression and integration. However, in principle, it would be most optimal to perform these calculations directly on the residuals (stored in the `scale.data` slot) themselves. This is not currently supported in Seurat v3, but will be soon. + +------ +
    +\ + + +Users can individually annotate clusters based on canonical markers. However, the sctransform normalization reveals sharper biological distinctions compared to the [standard Seurat workflow](pbmc3k_tutorial.html), in a few ways: + + * Clear separation of at least 3 CD8 T cell populations (naive, memory, effector), based on CD8A, GZMK, CCL5, GZMK expression + * Clear separation of three CD4 T cell populations (naive, memory, IFN-activated) based on S100A4, CCR7, IL32, and ISG15 + * Additional developmental sub-structure in B cell cluster, based on TCL1A, FCER2 + * Additional separation of NK cells into CD56dim vs. bright clusters, based on XCL1 and FCGR3A + + +```{r fplot, fig.width = 10, fig.height=6} +# These are now standard steps in the Seurat workflow for visualization and clustering +# Visualize canonical marker genes as violin plots. +VlnPlot(pbmc, features = c("CD8A", "GZMK", "CCL5", "S100A4", "ANXA1", "CCR7", "ISG15", "CD3D"), pt.size = 0.2, ncol = 4) + +# Visualize canonical marker genes on the sctransform embedding. +FeaturePlot(pbmc, features = c("CD8A", "GZMK", "CCL5", "S100A4", "ANXA1", "CCR7"), pt.size = 0.2, ncol = 3) +FeaturePlot(pbmc, features = c("CD3D", "ISG15", "TCL1A", "FCER2", "XCL1", "FCGR3A"), pt.size = 0.2, ncol = 3) +``` + + +```{r save.times, include=TRUE} +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/sctransform_vignette_times.csv") +``` + +
    + **Session Info** +```{r} +sessionInfo() +``` +
    diff --git a/vignettes/seurat5_spatial_vignette.Rmd b/vignettes/seurat5_spatial_vignette.Rmd new file mode 100644 index 000000000..7070b8115 --- /dev/null +++ b/vignettes/seurat5_spatial_vignette.Rmd @@ -0,0 +1,474 @@ +--- +title: "Analysis, visualization, and integration of spatial datasets with Seurat" +output: + html_document: + theme: united + df_print: kable + pdf_document: default +date: 'Compiled: `r Sys.Date()`' +--- + +```{r setup, include=TRUE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + tidy.opts = list(width.cutoff = 95), + message = FALSE, + warning = FALSE, + fig.width = 10, + time_it = TRUE, + error = TRUE +) +``` + +# Overview + +This tutorial demonstrates how to use Seurat (>=3.2) to analyze spatially-resolved RNA-seq data. While the analytical pipelines are similar to the Seurat workflow for [single-cell RNA-seq analysis](pbmc3k_tutorial.html), we introduce updated interaction and visualization tools, with a particular emphasis on the integration of spatial and molecular information. This tutorial will cover the following tasks, which we believe will be common for many spatial analyses: + +* Normalization +* Dimensional reduction and clustering +* Detecting spatially-variable features +* Interactive visualization +* Integration with single-cell RNA-seq data +* Working with multiple slices + +For our first vignette, we analyze a dataset generated with the [Visium technology](https://www.10xgenomics.com/spatial-transcriptomics/) from 10x Genomics. We will be extending Seurat to work with additional data types in the near-future, including [SLIDE-Seq](https://science.sciencemag.org/content/363/6434/1463), [STARmap](https://science.sciencemag.org/content/361/6400/eaat5691), and [MERFISH](https://science.sciencemag.org/content/362/6416/eaau5324). + +First, we load Seurat and the other packages necessary for this vignette. + +```{r install} +library(Seurat) +options(Seurat.object.assay.version = "v5") +library(SeuratData) +library(ggplot2) +library(patchwork) +library(dplyr) +``` + +```{r libraries.for.rmd, echo = FALSE} +library("htmltools") +#library("vembedr") +``` + + +# 10x Visium + +## Dataset + +Here, we will be using a recently released dataset of sagital mouse brain slices generated using the Visium v1 chemistry. There are two serial anterior sections, and two (matched) serial posterior sections. + +You can download the data [here](https://support.10xgenomics.com/spatial-gene-expression/datasets), and load it into Seurat using the `Load10X_Spatial()` function. This reads in the output of the [spaceranger](https://support.10xgenomics.com/spatial-gene-expression/software/pipelines/latest/what-is-space-ranger) pipeline, and returns a Seurat object that contains both the spot-level expression data along with the associated image of the tissue slice. You can also use our [SeuratData package](https://github.com/satijalab/seurat-data) for easy data access, as demonstrated below. After installing the dataset, you can type `?stxBrain` to learn more. + +```{r data.install, eval = FALSE} +InstallData("stxBrain") +``` + +```{r data} +brain <- LoadData('stxBrain', type = 'anterior1') +brain <- UpdateSeuratObject(brain) +brain[["Spatial"]] <- CreateAssay5Object(brain[["Spatial"]]@counts) +``` + +
    + **How is the spatial data stored within Seurat? ** +The visium data from 10x consists of the following data types: + +* A spot by gene expression matrix +* An image of the tissue slice (obtained from H&E staining during data acquisition) +* Scaling factors that relate the original high resolution image to the lower resolution image used here for visualization. + +In the Seurat object, the spot by gene expression matrix is similar to a typical "RNA" `Assay` but contains spot level, not single-cell level data. The image itself is stored in a new `images` slot in the Seurat object. The `images` slot also stores the information necessary to associate spots with their physical position on the tissue image. +
    + +## Data preprocessing + +The initial preprocessing steps that we perform on the spot by gene expression data are similar to a typical scRNA-seq experiment. We first need to normalize the data in order to account for variance in sequencing depth across data points. We note that the variance in molecular counts / spot can be substantial for spatial datasets, particularly if there are differences in cell density across the tissue. We see substantial heterogeneity here, which requires effective normalization. + +```{r qc, fig.height=5} +plot1 <- VlnPlot(brain, features = 'nCount_Spatial', pt.size = 0.1) + NoLegend() +plot2 <- SpatialFeaturePlot(brain, features = 'nCount_Spatial') + theme(legend.position = "right") +wrap_plots(plot1, plot2) +``` + +These plots demonstrate that the variance in molecular counts across spots is not just technical in nature, but also is dependent on the tissue anatomy. For example, regions of the tissue that are depleted for neurons (such as the cortical white matter), reproducibly exhibit lower molecular counts. As a result, standard approaches (such as the `LogNormalize()` function), which force each data point to have the same underlying 'size' after normalization, can be problematic. + +As an alternative, we recommend using sctransform (Hafemeister and Satija, Genome Biology 2019), which which builds regularized negative binomial models of gene expression in order to account for technical artifacts while preserving biological variance. For more details on sctransform, please see the paper [here](https://doi.org/10.1186/s13059-019-1874-1) and the Seurat vignette [here](sctransform_vignette.html). sctransform normalizes the data, detects high-variance features, and stores the data in the `SCT` assay. + +```{r preprocess} +brain <- SCTransform(brain, assay = "Spatial", verbose = FALSE) +``` + +
    + **How do results compare to log-normalization?** +To explore the differences in normalization methods, we examine how both the sctransform and log normalization results correlate with the number of UMIs. For this comparison, we first rerun sctransform to store values for all genes and run a log-normalization procedure via `NormalizeData()`. + +```{r norm.test} +# rerun normalization to store sctransform residuals for all genes +brain <- SCTransform(brain, assay = "Spatial", return.only.var.genes = FALSE, verbose = FALSE) +# also run standard log normalization for comparison +brain <- NormalizeData(brain, verbose = FALSE, assay = "Spatial") +``` + +```{r norm.test2} +# Computes the correlation of the log normalized data and sctransform residuals with the number of UMIs +brain <- GroupCorrelation(brain, group.assay = "Spatial", assay = "Spatial", slot = "data", do.plot = FALSE) +brain <- GroupCorrelation(brain, group.assay = "Spatial", assay = "SCT", slot = "scale.data", do.plot = FALSE) +``` + +```{r norm.test3} +p1 <- GroupCorrelationPlot(brain, assay = "Spatial", cor = "nCount_Spatial_cor") + ggtitle("Log Normalization") + theme(plot.title = element_text(hjust = 0.5)) +p2 <- GroupCorrelationPlot(brain, assay = "SCT", cor = "nCount_Spatial_cor") + ggtitle("SCTransform Normalization") + theme(plot.title = element_text(hjust = 0.5)) +p1 + p2 +``` + +For the boxplots above, we calculate the correlation of each feature (gene) with the number of UMIs (the `nCount_Spatial` variable here). We then place genes into groups based on their mean expression, and generate boxplots of these correlations. You can see that log-normalization fails to adequately normalize genes in the first three groups, suggesting that technical factors continue to influence normalized expression estimates for highly expressed genes. In contrast, sctransform normalization substantially mitigates this effect. +
    + +## Gene expression visualization + +In Seurat, we have functionality to explore and interact with the inherently visual nature of spatial data. The `SpatialFeaturePlot()` function in Seurat extends `FeaturePlot()`, and can overlay molecular data on top of tissue histology. For example, in this data set of the mouse brain, the gene Hpca is a strong hippocampus marker and Ttr is a marker of the choroid plexus. + +```{r featureplot} +SpatialFeaturePlot(brain, features = c("Hpca", "Ttr")) +``` + + +```{r save.img, include=TRUE} +library(ggplot2) +plot <- SpatialFeaturePlot(brain, features = c("Ttr")) + + theme(legend.text = element_text(size = 0), legend.title = element_text(size = 20), legend.key.size = unit(1, "cm")) +jpeg(filename = "../output/images/spatial_vignette_ttr.jpg", height = 700, width = 1200, quality = 50) +print(plot) +dev.off() +``` + +The default parameters in Seurat emphasize the visualization of molecular data. However, you can also adjust the size of the spots (and their transparency) to improve the visualization of the histology image, by changing the following parameters: + +* `pt.size.factor`- This will scale the size of the spots. Default is 1.6 +* `alpha` - minimum and maximum transparency. Default is c(1, 1). +* Try setting to `alpha` c(0.1, 1), to downweight the transparency of points with lower expression + +```{r fpe1} +p1 <- SpatialFeaturePlot(brain, features = "Ttr", pt.size.factor = 1) +p2 <- SpatialFeaturePlot(brain, features = "Ttr", alpha = c(0.1, 1)) +p1 + p2 +``` + +## Dimensionality reduction, clustering, and visualization + +We can then proceed to run dimensionality reduction and clustering on the RNA expression data, using the same workflow as we use for scRNA-seq analysis. + +```{r dim.cluster} +brain <- RunPCA(brain, assay = "SCT", verbose = FALSE) +brain <- FindNeighbors(brain, reduction = "pca", dims = 1:30) +brain <- FindClusters(brain, verbose = FALSE) +brain <- RunUMAP(brain, reduction = "pca", dims = 1:30) +``` + +We can then visualize the results of the clustering either in UMAP space (with `DimPlot()`) or overlaid on the image with `SpatialDimPlot()`. + +```{r dim.plots,fig.height=5} +p1 <- DimPlot(brain, reduction = "umap", label = TRUE) +p2 <- SpatialDimPlot(brain, label = TRUE, label.size = 3) +p1 + p2 +``` +As there are many colors, it can be challenging to visualize which voxel belongs to which cluster. We have a few strategies to help with this. Setting the `label` parameter places a colored box at the median of each cluster (see the plot above). + +You can also use the `cells.highlight` parameter to demarcate particular cells of interest on a `SpatialDimPlot()`. This can be very useful for distinguishing the spatial localization of individual clusters, as we show below: + +```{r facetdim} +SpatialDimPlot(brain, cells.highlight = CellsByIdentities(object = brain,idents = c(2, 1, 4, 3, 5, 8)), facet.highlight = TRUE, ncol = 3) +``` + +## Interactive plotting + +We have also built in a number of interactive plotting capabilities. Both `SpatialDimPlot()` and `SpatialFeaturePlot()` now have an `interactive` parameter, that when set to `TRUE`, will open up the Rstudio viewer pane with an interactive Shiny plot. The example below demonstrates an interactive `SpatialDimPlot()` in which you can hover over spots and view the cell name and current identity class (analogous to the previous `do.hover` behavior). + +```{r ispatialdimplot, eval = FALSE} +SpatialDimPlot(brain, interactive = TRUE) +``` + +```{r, echo = FALSE} +embed_url("https://youtu.be/E1aZjmG1neQ") +``` + +For `SpatialFeaturePlot()`, setting interactive to `TRUE` brings up an interactive pane in which you can adjust the transparency of the spots, the point size, as well as the `Assay` and feature being plotted. After exploring the data, selecting the done button will return the last active plot as a ggplot object. + +```{r ispatialfeatureplot, eval = FALSE} +SpatialFeaturePlot(brain, features = "Ttr", interactive = TRUE) +``` + +```{r, echo = FALSE} +embed_url("https://youtu.be/ILmb8XNlgEM") +``` + +The `LinkedDimPlot()` function links the UMAP representation to the tissue image representation and allows for interactive selection. For example, you can select a region in the UMAP plot and the corresponding spots in the image representation will be highlighted. + +```{r linkedplot, eval=FALSE} +LinkedDimPlot(brain) +``` + +```{r, echo = FALSE} +embed_url("https://youtu.be/10PZqjcSKrg") +``` + +## Identification of Spatially Variable Features + +Seurat offers two workflows to identify molecular features that correlate with spatial location within a tissue. The first is to perform differential expression based on pre-annotated anatomical regions within the tissue, which may be determined either from unsupervised clustering or prior knowledge. This strategy works will in this case, as the clusters above exhibit clear spatial restriction. + +```{r de, fig.height = 4} +de_markers <- FindMarkers(brain, ident.1 = 5, ident.2 = 6) +SpatialFeaturePlot(object = brain, features = rownames(de_markers)[1:3], alpha = c(0.1, 1), ncol = 3) +``` + +An alternative approach, implemented in `FindSpatiallyVariables()`, is to search for features exhibiting spatial patterning in the absence of pre-annotation. The default method (`method = 'markvariogram`), is inspired by the [Trendsceek](https://www.nature.com/articles/nmeth.4634), which models spatial transcriptomics data as a mark point process and computes a 'variogram', which identifies genes whose expression level is dependent on their spatial location. More specifically, this process calculates gamma(r) values measuring the dependence between two spots a certain "r" distance apart. By default, we use an r-value of '5' in these analyses, and only compute these values for variable genes (where variation is calculated independently of spatial location) to save time. + +We note that there are multiple methods in the literature to accomplish this task, including [SpatialDE](https://www.nature.com/articles/nmeth.4636), and [Splotch](https://www.biorxiv.org/content/10.1101/757096v1.article-metrics). We encourage interested users to explore these methods, and hope to add support for them in the near future. + +```{r spatial.vf} +brain <- FindSpatiallyVariableFeatures(brain, assay = 'SCT', features = VariableFeatures(brain)[1:1000], selection.method = 'markvariogram') +``` + +Now we visualize the expression of the top 6 features identified by this measure. +```{r spatial.vf.plot, fig.height=8} +top.features <- head(SpatiallyVariableFeatures(brain, selection.method = 'markvariogram'),6) +SpatialFeaturePlot(brain, features = top.features, ncol = 3, alpha = c(0.1, 1)) +``` + +## Subset out anatomical regions + +As with single-cell objects, you can subset the object to focus on a subset of data. Here, we approximately subset the frontal cortex. This process also facilitates the integration of these data with a cortical scRNA-seq dataset in the next section. First, we take a subset of clusters, and then further segment based on exact positions. After subsetting, we can visualize the cortical cells either on the full image, or a cropped image. + +```{r subset1} +cortex <- subset(brain, idents = c(1, 2, 3, 4, 6, 7)) +# now remove additional cells, use SpatialDimPlots to visualize what to remove +# SpatialDimPlot(cortex,cells.highlight = WhichCells(cortex, expression = image_imagerow > 400 | image_imagecol < 150)) +cortex <- subset(cortex, anterior1_imagerow > 400 | anterior1_imagecol < 150, invert = TRUE) +cortex <- subset(cortex, anterior1_imagerow > 275 & anterior1_imagecol > 370, invert = TRUE) +cortex <- subset(cortex, anterior1_imagerow > 250 & anterior1_imagecol > 440, invert = TRUE) +``` + +```{r subset1.plot, fig.height = 4} +p1 <- SpatialDimPlot(cortex, crop = TRUE, label = TRUE) +p2 <- SpatialDimPlot(cortex, crop = FALSE, label = TRUE, pt.size.factor = 1, label.size = 3) +p1 + p2 +``` + +## Integration with single-cell data + +At ~50um, spots from the visium assay will encompass the expression profiles of multiple cells. For the growing list of systems where scRNA-seq data is available, users may be interested to 'deconvolute' each of the spatial voxels to predict the underlying composition of cell types. In preparing this vignette, we tested a wide variety of decovonlution and integration methods, using a [reference scRNA-seq dataset](https://www.nature.com/articles/nn.4216) of ~14,000 adult mouse cortical cell taxonomy from the Allen Institute, generated with the SMART-Seq2 protocol. +We consistently found superior performance using integration methods (as opposed to deconvolution methods), likely because of substantially different noise models that characterize spatial and single-cell datasets, and integration methods are specifiically designed to be robust to these differences. We therefore apply the 'anchor'-based integration workflow introduced in [Seurat v3](https://www.cell.com/cell/fulltext/S0092-8674(19)30559-8), that enables the probabilistic transfer of annotations from a reference to a query set. We therefore follow the label transfer workflow introduced [here](reference_mapping.html), taking advantage of sctransform normalization, but anticipate new methods to be developed to accomplish this task. + +We first load the data (download available [here](https://www.dropbox.com/s/cuowvm4vrf65pvq/allen_cortex.rds?dl=1)), pre-process the scRNA-seq reference, and then perform label transfer. The procedure outputs, for each spot, a probabilistic classification for each of the scRNA-seq derived classes. We add these predictions as a new assay in the Seurat object. + +```{r sc.data} +allen_reference <- readRDS("../data/allen_cortex.rds") +allen_reference <- UpdateSeuratObject(allen_reference) +``` + +```{r sc.data2} +# note that setting ncells=3000 normalizes the full dataset but learns noise models on 3k cells +# this speeds up SCTransform dramatically with no loss in performance +library(dplyr) +allen_reference <- SCTransform(allen_reference, ncells = 3000, verbose = FALSE) %>% RunPCA(verbose = FALSE) %>% RunUMAP(dims = 1:30) +``` + +```{r sc.data3, fig.width=8, fig.align="center"} +# After subsetting, we renormalize cortex +cortex <- SCTransform(cortex, assay = 'Spatial', verbose = FALSE) %>% RunPCA(verbose = FALSE) +# the annotation is stored in the 'subclass' column of object metadata +DimPlot(allen_reference, group.by = 'subclass', label = TRUE) +``` + +```{r sc.data5} +anchors <- FindTransferAnchors(reference = allen_reference, query = cortex, normalization.method = "SCT") +predictions.assay <- TransferData(anchorset = anchors, refdata = allen_reference$subclass, prediction.assay = TRUE, weight.reduction = cortex[["pca"]], dims = 1:30) +cortex[["predictions"]] <- predictions.assay +``` + +Now we get prediction scores for each spot for each class. Of particular interest in the frontal cortex region are the laminar excitatory neurons. Here we can distinguish between distinct sequential layers of these neuronal subtypes, for example: + +```{r sc.data7} +DefaultAssay(cortex) <- "predictions" +SpatialFeaturePlot(cortex, features = c("L2/3 IT", "L4"), pt.size.factor = 1.6, ncol = 2, crop = TRUE) +``` + +Based on these prediction scores, we can also predict *cell types* whose location is spatially restricted. We use the same methods based on marked point processes to define spatially variable features, but use the cell type prediction scores as the "marks" rather than gene expression. + +```{r sc.data8, fig.height = 10} +cortex <- FindSpatiallyVariableFeatures(cortex, assay = "predictions", selection.method = "markvariogram", features = rownames(cortex), r.metric = 5, slot = "data") +top.clusters <- head(SpatiallyVariableFeatures(cortex), 4) +SpatialPlot(object = cortex, features = top.clusters, ncol = 2) +``` + +Finally, we show that our integrative procedure is capable of recovering the known spatial localization patterns of both neuronal and non-neuronal subsets, including laminar excitatory, layer-1 astrocytes, and the cortical grey matter. + +```{r sc.data9,fig.height=20,fig.width=10} +SpatialFeaturePlot(cortex, features = c("Astro", "L2/3 IT", "L4", "L5 PT", "L5 IT", "L6 CT", "L6 IT", "L6b", "Oligo"), pt.size.factor = 1, ncol = 2, crop = FALSE, alpha = c(0.1, 1)) +``` + + +## Working with multiple slices in Seurat + +This dataset of the mouse brain contains another slice corresponding to the other half of the brain. Here we read it in and perform the same initial normalization. + +```{r brain2data} +brain2 <- LoadData('stxBrain', type = 'posterior1') +brain2 <- UpdateSeuratObject(brain2) +brain2[["Spatial"]] <- CreateAssay5Object(counts = brain2[["Spatial"]]@counts) +brain2 <- SCTransform(brain2, assay = "Spatial", verbose = FALSE) +``` + +In order to work with multiple slices in the same Seurat object, we provide the `merge` function. + +```{r merge} +brain.merge <- merge(brain, brain2) +``` + +This then enables joint dimensional reduction and clustering on the underlying RNA expression data. + +```{r joint.analysis} +DefaultAssay(brain.merge) <- "SCT" +VariableFeatures(brain.merge) <- c(VariableFeatures(brain), VariableFeatures(brain2)) +brain.merge <- RunPCA(brain.merge, verbose = FALSE) +brain.merge <- FindNeighbors(brain.merge, dims = 1:30) +brain.merge <- FindClusters(brain.merge, verbose = FALSE) +brain.merge <- RunUMAP(brain.merge, dims = 1:30) +``` + +Finally, the data can be jointly visualized in a single UMAP plot. `SpatialDimPlot()` and `SpatialFeaturePlot()` will by default plot all slices as columns and groupings/features as rows. + +```{r joint.viz, fig.height = 4} +DimPlot(brain.merge, reduction = "umap", group.by = c("ident", "orig.ident")) +``` + +```{r joint.viz2} +SpatialDimPlot(brain.merge) +``` + +```{r joint.viz3, fig.height = 10} +SpatialFeaturePlot(brain.merge, features = c('Hpca', 'Plp1')) +``` + +## Acknowledgments + +We would like to thank Nigel Delaney and Stephen Williams for their helpful feedback and contributions to the new spatial Seurat code. + +# Slide-seq + +## Dataset + +Here, we will be analyzing a dataset generated using [Slide-seq v2](https://www.biorxiv.org/content/10.1101/2020.03.12.989806v1) of the mouse hippocampus. This tutorial will follow much of the same structure as the spatial vignette for 10x Visium data but is tailored to give a demonstration specific to Slide-seq data. + +You can use our [SeuratData package](https://github.com/satijalab/seurat-data) for easy data access, as demonstrated below. After installing the dataset, you can type `?ssHippo` to see the commands used to create the Seurat object. + +```{r data.ss.install, eval = FALSE} +InstallData("ssHippo") +``` + +```{r data.ss} +slide.seq <- LoadData('ssHippo') +slide.seq <- UpdateSeuratObject(slide.seq) +slide.seq[["Spatial"]] <- CreateAssay5Object(counts = slide.seq[["Spatial"]]@counts) +``` + +## Data preprocessing + +The initial preprocessing steps for the bead by gene expression data are similar to other spatial Seurat analyses and to typical scRNA-seq experiments. Here, we note that many beads contain particularly low UMI counts but choose to keep all detected beads for downstream analysis. + +```{r qc.ss, fig.height=5} +plot1 <- VlnPlot(slide.seq, features = 'nCount_Spatial', pt.size = 0, log = TRUE) + NoLegend() +slide.seq$log_nCount_Spatial <- log(slide.seq$nCount_Spatial) +plot2 <- SpatialFeaturePlot(slide.seq, features = 'log_nCount_Spatial') + theme(legend.position = "right") +wrap_plots(plot1, plot2) +``` + +We then normalize the data using [sctransform](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1874-1) and perform a standard scRNA-seq dimensionality reduction and clustering workflow. + +```{r preprocess.ss} +slide.seq <- SCTransform(slide.seq, assay = "Spatial", ncells = 3000, verbose = FALSE) +slide.seq <- RunPCA(slide.seq) +slide.seq <- RunUMAP(slide.seq, dims = 1:30) +slide.seq <- FindNeighbors(slide.seq, dims = 1:30) +slide.seq <- FindClusters(slide.seq, resolution = 0.3, verbose = FALSE) +``` + +We can then visualize the results of the clustering either in UMAP space (with `DimPlot()`) or in the bead coordinate space with `SpatialDimPlot()`. + +```{r dim.plots.ss,fig.height=5} +plot1 <- DimPlot(slide.seq, reduction = "umap", label = TRUE) +plot2 <- SpatialDimPlot(slide.seq, stroke = 0) +plot1 + plot2 +SpatialDimPlot(slide.seq, cells.highlight = CellsByIdentities(object = slide.seq, idents = c(1, 6, 13)), facet.highlight = TRUE) +``` + +## Integration with a scRNA-seq reference + +To facilitate cell-type annotation of the Slide-seq dataset, we are leveraging an existing mouse single-cell RNA-seq hippocampus dataset, produced in [Saunders\*, Macosko\*, et al. 2018](https://doi.org/10.1016/j.cell.2018.07.028). The data is available for download as a processed Seurat object [here](https://www.dropbox.com/s/cs6pii5my4p3ke3/mouse_hippocampus_reference.rds?dl=0), with the raw count matrices available on the [DropViz website](http://dropviz.org/). + +```{r ref.saunders} +ref <- readRDS("../data/mouse_hippocampus_reference.rds") +ref <- UpdateSeuratObject(ref) +``` + +The original annotations from the paper are provided in the cell metadata of the Seurat object. These annotations are provided at several "resolutions", from broad classes (`ref$class`) to subclusters within celltypes (`ref$subcluster`). For the purposes of this vignette, we'll work off of a modification of the celltype annotations (`ref$celltype`) which we felt struck a good balance. + +We'll start by running the Seurat label transfer method to predict the major celltype for each bead. + +```{r ref.preprocessing.ss} +anchors <- FindTransferAnchors(reference = ref, query = slide.seq, normalization.method = "SCT", npcs = 50) +predictions.assay <- TransferData(anchorset = anchors, refdata = ref$celltype, prediction.assay = TRUE, weight.reduction = slide.seq[["pca"]], dims = 1:50) +slide.seq[["predictions"]] <- predictions.assay +``` + +We can then visualize the prediction scores for some of the major expected classes. + +```{r transfer.viz.ss, fig.height = 8} +DefaultAssay(slide.seq) <- 'predictions' +SpatialFeaturePlot(slide.seq, features = c("Dentate Principal cells", "CA3 Principal cells", "Entorhinal cortex", "Endothelial tip", "Ependymal", "Oligodendrocyte"), alpha = c(0.1, 1)) +``` + +```{r max.idents.ss} +slide.seq$predicted.id <- GetTransferPredictions(slide.seq) +Idents(slide.seq) <- "predicted.id" +SpatialDimPlot(slide.seq, cells.highlight = CellsByIdentities(object = slide.seq, idents = c("CA3 Principal cells", "Dentate Principal cells", "Endothelial tip")), facet.highlight = TRUE) +``` + +## Identification of Spatially Variable Features + +As mentioned in the Visium vignette, we can identify spatially variable features in two general ways: differential expression testing between pre-annotated anatomical regions or statistics that measure the dependence of a feature on spatial location. + +Here, we demonstrate the latter with an implementation of Moran's I available via `FindSpatiallyVariableFeatures()` by setting `method = 'moransi'`. Moran's I computes an overall spatial autocorrelation and gives a statistic (similar to a correlation coefficient) that measures the dependence of a feature on spatial location. This allows us to rank features based on how spatially variable their expression is. In order to facilitate quick estimation of this statistic, we implemented a basic binning strategy that will draw a rectangular grid over Slide-seq puck and average the feature and location within each bin. The number of bins in the x and y direction are controlled by the `x.cuts` and `y.cuts` parameters respectively. Additionally, while not required, installing the optional `Rfast2` package(`install.packages('Rfast2')`), will significantly decrease the runtime via a more efficient implementation. + +```{r spatial.vf.ss} +DefaultAssay(slide.seq) <- "SCT" +slide.seq <- FindSpatiallyVariableFeatures(slide.seq, assay = 'SCT', slot = "scale.data", features = VariableFeatures(slide.seq)[1:1000], selection.method = 'moransi', x.cuts = 100, y.cuts = 100) +``` + +Now we visualize the expression of the top 6 features identified by Moran's I. + +```{r spatial.vf.plot.ss, fig.height = 8} +SpatialFeaturePlot(slide.seq, features = head(SpatiallyVariableFeatures(slide.seq, selection.method = "moransi"), 6), ncol = 3, alpha = c(0.1, 1), max.cutoff = "q95") +``` + +```{r save.times, include=TRUE} +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/spatial_vignette_times.csv") +``` + +
    + **Session Info** +```{r} +sessionInfo() +``` +
    diff --git a/vignettes/seurat5_spatial_vignette_2.Rmd b/vignettes/seurat5_spatial_vignette_2.Rmd new file mode 100644 index 000000000..86779b7d3 --- /dev/null +++ b/vignettes/seurat5_spatial_vignette_2.Rmd @@ -0,0 +1,339 @@ +--- +title: "Analysis of Image-based Spatial Data in Seurat" +output: + html_document: + theme: united + df_print: kable +date: 'Compiled: `r format(Sys.Date(), "%B %d, %Y")`' +--- +*** + +```{r setup, include=FALSE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + tidy.opts = list(width.cutoff = 95), + message = FALSE, + warning = FALSE, + time_it = TRUE +) +``` + +# Overview + +In this vignette, we introduce a Seurat extension to analyze new types of spatially-resolved data. We have [previously introduced a spatial framework](https://satijalab.org/seurat/articles/spatial_vignette.html) which is compatible with sequencing-based technologies, like the 10x Genomics Visium system, or SLIDE-seq. Here, we extend this framework to analyze new data types that are captured via highly multiplexed imaging. In contrast to sequencing-based technologies, these datasets are often targeted (i.e. they profile a pre-selected set of genes). However they can resolve individual molecules - retaining single-cell (and subcellular) resolution. These approaches also often capture cellular boundaries (segmentations). + +We update the Seurat infrastructure to enable the analysis, visualization, and exploration of these exciting datasets. In this vignette, we focus on three datasets produced by different multiplexed imaging technologies, each of which is publicly available. We will be adding support for additional imaging-based technologies in the coming months. + +* Vizgen MERSCOPE (Mouse Brain) +* Nanostring CosMx Spatial Molecular Imager (FFPE Human Lung) +* Akoya CODEX (Human Lymph Node) + +First, we load the packages necessary for this vignette. + +```{r init, message=FALSE, warning=FALSE} +library(Seurat) +library(future) +plan("multisession", workers = 10) +``` + +# Mouse Brain: Vizgen MERSCOPE + +This dataset was produced using the Vizgen MERSCOPE system, which utilizes the MERFISH technology. The total dataset is available for [public download](https://info.vizgen.com/mouse-brain-data), and contains nine samples (three full coronal slices of the mouse brain, with three biological replicates per slice). The gene panel consists of 483 gene targets, representing known anonical cell type markers, nonsensory G-Protein coupled receptors (GPCRs), and Receptor Tyrosine Kinases (RTKs). In this vignette, we analyze one of the samples - slice 2, replicate 1. The median number of transcripts detected in each cell is 206. + +First, we read in the dataset and create a Seurat object. + +We use the `LoadVizgen()` function, which we have written to read in the output of the Vizgen analysis pipeline. The resulting Seurat object contains the following information: + +* A count matrix, indicating the number of observed molecules for each of the 483 transcripts in each cell. This matrix is analogous to a count matrix in scRNA-seq, and is stored by default in the RNA assay of the Seurat object + +```{r, message=FALSE, warning=FALSE} +# Loading segmentations is a slow process and multi processing with the future pacakge is recommended +vizgen.obj <- LoadVizgen(data.dir = "/brahms/hartmana/spatial_vignette_data/vizgen/s2r1/", fov = "s2r1") +``` + +The next pieces of information are specific to imaging assays, and is stored in the images slot of the resulting Seurat object: + +
    + **Cell Centroids: The spatial coordinates marking the centroid for each cell being profiled** + +```{r} +# Get the center position of each centroid. There is one row per cell in this dataframe. +head(GetTissueCoordinates(vizgen.obj[["s2r1"]][["centroids"]])) +``` +
    +
    + **Cell Segmentation Boundaries: The spatial coordinates that describe the polygon segmentation of each single cell** + +```{r} +# Get the coordinates for each segmentation vertice. Each cell will have a variable number of vertices describing its shape. +head(GetTissueCoordinates(vizgen.obj[["s2r1"]][["segmentation"]])) +``` +
    +
    + **Molecule positions: The spatial coordinates for each individual molecule that was detected during the multiplexed smFISH experiment.** + +```{r} +# Fetch molecules positions for Chrm1 +head(FetchData(vizgen.obj[["s2r1"]][["molecules"]], vars="Chrm1")) +``` +
    +\ + +## Preprocessing and unsupervised analysis +We start by performing a standard unsupervised clustering analysis, essentially first treating the dataset as an scRNA-seq experiment. We use SCTransform-based normalization, though we slightly modify the default clipping parameters to mitigate the effect of outliers that we occasionally observe in smFISH experiments. After normalization, we can run dimensional reduction and clustering. + +```{r analysis, results='hide'} +vizgen.obj <- SCTransform(vizgen.obj, assay = "Vizgen", clip.range = c(-10,10),) +vizgen.obj <- RunPCA(vizgen.obj, npcs = 30, features = rownames(vizgen.obj)) +vizgen.obj <- RunUMAP(vizgen.obj, dims = 1:30) +vizgen.obj <- FindNeighbors(vizgen.obj, reduction = "pca", dims = 1:30) +vizgen.obj <- FindClusters(vizgen.obj, resolution = 0.3) +``` + +We can then visualize the results of the clustering either in UMAP space (with `DimPlot()`) or overlaid on the image with `ImageDimPlot()`. + +```{r umap} +DimPlot(vizgen.obj, reduction = "umap") +``` + +```{r spatial.plot, fig.height=6, fig.width=6} +ImageDimPlot(vizgen.obj, fov = "s2r1", cols = "polychrome", axes = TRUE) +``` + +You can also customize multiple aspect of the plot, including the color scheme, cell border widths, and size (see below). + +
    + **Customizing spatial plots in Seurat** + +The `ImageDimPlot()` and `ImageFeaturePlot()` functions have a few parameters which you can customize individual visualizations. These include: + +* alpha: Ranges from 0 to 1. Sets the transparency of within-cell coloring. +* size: determines the size of points representing cells, if centroids are being plotted +* cols: Sets the color scheme for the internal shading of each cell. Examples settings are `polychrome`, `glasbey`, `Paired`, `Set3`, and `parade`. Default is the ggplot2 color palette +* shuffle.cols: In some cases the selection of `cols` is more effective when the same colors are assigned to different clusters. Set `shuffle.cols = TRUE` to randomly shuffle the colors in the palette. +* border.size: Sets the width of the cell segmentation borders. By default, segmentations are plotted with a border size of 0.3 and centroids are plotted without border. +* border.color: Sets the color of the cell segmentation borders +* dark.background: Sets a black background color (TRUE by default) +* axes: Display +
    + +Since it can be difficult to visualize the spatial localization patterns of an individual cluster when viewing them all together, we can highlight all cells that belong to a particular cluster: + +```{r, fig.height=8, fig.width=12} +p1 <- ImageDimPlot(vizgen.obj, fov = "s2r1", cols = "red", cells = WhichCells(vizgen.obj, idents = 14)) +p2 <- ImageDimPlot(vizgen.obj, fov = "s2r1", cols = "red", cells = WhichCells(vizgen.obj, idents = 15)) +p1 + p2 +``` + +We can find markers of individual clusters and visualize their spatial expression pattern. We can color cells based on their quantified expression of an individual gene, using `ImageFeaturePlot()`, which is analagous to the `FeaturePlot()` function for visualizing expression on a 2D embedding. Since MERFISH images individual molecules, we can also visualize the location of individual *molecules*. + +```{r, fig.height=7, fig.width=12} +p1 <- ImageFeaturePlot(vizgen.obj, features = "Slc17a7") +p2 <- ImageDimPlot(vizgen.obj, molecules = "Slc17a7", nmols = 10000, alpha = 0.3, mols.cols = "red") +p1 + p2 +``` + +Note that the `nmols` parameter can be used to reduce the total number of molecules shown to reduce overplotting. You can also use the `mols.size`, `mols.cols`, and `mols.alpha` parameter to further optimize. + +Plotting molecules is especially useful for visualizing co-expression of multiple genes on the same plot. + +```{r, fig.height=7, fig.width=12} +p1 <- ImageDimPlot(vizgen.obj, fov = "s2r1", alpha = 0.3, molecules = c("Slc17a7", "Olig1"), nmols = 10000) +markers.14 <- FindMarkers(vizgen.obj, ident.1 = "14") +p2 <- ImageDimPlot(vizgen.obj, fov = "s2r1", alpha = 0.3, molecules = rownames(markers.14)[1:4], nmols = 10000) +p1 + p2 +``` + +The updated Seurat spatial framework has the option to treat cells as individual points, or also to visualize cell boundaries (segmentations). By default, Seurat ignores cell segmentations and treats each cell as a point ('centroids'). This speeds up plotting, especially when looking at large areas, where cell boundaries are too small to visualize. + +We can zoom into a region of tissue, creating a new field of view. For example, we can zoom into a region that contains the hippocampus. Once zoomed-in, we can set `DefaultBoundary()` to show cell segmentations. You can also 'simplify' the cell segmentations, reducing the number of edges in each polygon to speed up plotting. + +```{r, fig.height=5, fig.width=14} +# create a Crop +cropped.coords <- Crop(vizgen.obj[["s2r1"]], x = c(1750, 3000), y = c(3750, 5250), coords = "plot") +# set a new field of view (fov) +vizgen.obj[["hippo"]] <- cropped.coords + +# visualize FOV using default settings (no cell boundaries) +p1 <- ImageDimPlot(vizgen.obj, fov = "hippo", axes = TRUE, size = 0.7, border.color = "white", cols = "polychrome", coord.fixed = FALSE) + +# visualize FOV with full cell segmentations +DefaultBoundary(vizgen.obj[["hippo"]]) <- "segmentation" +p2 <- ImageDimPlot(vizgen.obj, fov = "hippo", axes = TRUE, border.color = "white", border.size = 0.1, cols = "polychrome", coord.fixed = FALSE) + +# simplify cell segmentations +vizgen.obj[["hippo"]][["simplified.segmentations"]] <- Simplify(coords = vizgen.obj[["hippo"]][["segmentation"]], tol = 3) +DefaultBoundary(vizgen.obj[["hippo"]]) <- "simplified.segmentations" + +# visualize FOV with simplified cell segmentations +DefaultBoundary(vizgen.obj[["hippo"]]) <- "simplified.segmentations" +p3 <- ImageDimPlot(vizgen.obj, fov = "hippo", axes = TRUE, border.color = "white", border.size = 0.1, cols = "polychrome", coord.fixed = FALSE) + +p1 + p2 + p3 +``` + +
    + **What is the tol parameter?** + +The tol parameter determines how simplified the resulting segmentations are. A higher value of tol will reduce the number of vertices more drastically which will speed up plotting, but some segmentation detail will be lost. See https://rgeos.r-forge.r-project.org/reference/topo-unary-gSimplify.html for examples using different values for tol. + +
    + +We can visualize individual molecules plotted at higher resolution after zooming-in +```{r, fig.height=8, fig.width=8} +# Since there is nothing behind the segmentations, alpha will slightly mute colors +ImageDimPlot(vizgen.obj, fov = "hippo", molecules = rownames(markers.14)[1:4], cols = "polychrome", mols.size = 1, alpha = 0.5, mols.cols = c("red", "blue", "yellow", "green")) +``` + +# Human Lung: Nanostring CosMx Spatial Molecular Imager + +This dataset was produced using Nanostring CosMx Spatial Molecular Imager (SMI). The CosMX SMI performs multiplexed single molecule profiling, can profile both RNA and protein targets, and can be applied directly to FFPE tissues. The dataset represents 8 FFPE samples taken from 5 non-small-cell lung cancer (NSCLC) tissues, and is available for [public download](https://www.nanostring.com/products/cosmx-spatial-molecular-imager/ffpe-dataset/). The gene panel consists of 960 transcripts. + +In this vignette, we load one of 8 samples (lung 5, replicate 1). We use the `LoadNanostring()` function, which parses the outputs available on the public download site. Note that the coordinates for the cell boundaries were provided by Nanostring by request, and are available for download [here](https://www.dropbox.com/s/hl3peavrx92bluy/Lung5_Rep1-polygons.csv?dl=0). + +For this dataset, instead of performing unsupervised analysis, we map the Nanostring profiles to our Azimuth Healthy Human Lung reference, which was defined by scRNA-seq. We used Azimuth version 0.4.3 with the [human lung](https://azimuth.hubmapconsortium.org/references/#Human%20-%20Lung%20v1) reference version 1.0.0. You can download the precomputed results [here](https://seurat.nygenome.org/vignette_data/spatial_vignette_2/nanostring_data.Rds), which include annotations, prediction scores, and a UMAP visualization. The median number of detected transcripts/cell is 249, which does create uncertainty for the annotation process. + +```{r load} +nano.obj <- LoadNanostring(data.dir = "/brahms/hartmana/spatial_vignette_data/nanostring/lung5_rep1", fov="lung5.rep1") +``` + +```{r integration} +# add in precomputed Azimuth annotations +azimuth.data <- readRDS("/brahms/hartmana/spatial_vignette_data/nanostring_data.Rds") +nano.obj <- AddMetaData(nano.obj, metadata = azimuth.data$annotations) +nano.obj[["proj.umap"]] <- azimuth.data$umap +Idents(nano.obj) <- nano.obj$predicted.annotation.l1 + +# set to avoid error exceeding max allowed size of globals +options(future.globals.maxSize = 8000 * 1024^2) +nano.obj <- SCTransform(nano.obj, assay = "Nanostring", clip.range = c(-10, 10), verbose = FALSE) + +# text display of annotations and prediction scores +head(slot(object = nano.obj, name = "meta.data")[2:5]) +``` + +We can visualize the Nanostring cells and annotations, projected onto the reference-defined UMAP. Note that for this NSCLC sample, tumor samples are annotated as 'basal', which is the closest cell type match in the healthy reference. + +```{r, fig.width=9, fig.height=4} +DimPlot(nano.obj) +``` + +## Visualization of cell type and expression localization patterns + +As in the previous example, `ImageDimPlot()` plots c ells based on their spatial locations, and colors them based on their assigned cell type. Notice that the basal cell population (tumor cells) is tightly spatially organized, as expected. + +```{r, fig.width=11, fig.height=7} +ImageDimPlot(nano.obj, fov = "lung5.rep1", axes = TRUE, cols = "glasbey") +``` + +Since there are many cell types present, we can highlight the localization of a few select groups. + +```{r, fig.width=10, fig.height=7} +ImageDimPlot(nano.obj, fov = "lung5.rep1", cells = WhichCells(nano.obj, idents=c("Basal", "Macrophage", "Smooth Muscle", "CD4 T")), cols=c("red", "green", "blue", "orange"), size = 0.6) +``` + +We can also visualize gene expression markers a few different ways: + +```{r, fig.width=10, fig.height=5} +VlnPlot(nano.obj, features = "KRT17", slot = "counts", pt.size = 0.1, y.max = 30) + NoLegend() +``` + +```{r, fig.width=5, fig.height=4} +FeaturePlot(nano.obj, features = "KRT17") +``` + +```{r, fig.height=4, fig.width=8} +p1 <- ImageFeaturePlot(nano.obj, fov = "lung5.rep1", features = "KRT17", max.cutoff = "q95") +p2 <- ImageDimPlot(nano.obj, fov = "lung5.rep1", alpha = 0.3, molecules = "KRT17", nmols = 10000) + NoLegend() +p1 + p2 +``` + +We can plot molecules in order to co-visualize the expression of multiple markers, including KRT17 (basal cells), C1QA (macrophages), IL7R (T cells), and TAGLN (Smooth muscle cells). + +```{r, fig.width=10, fig.height=7} +# Plot some of the molecules which seem to display spatial correlation with each other +ImageDimPlot(nano.obj, fov = "lung5.rep1", group.by = NA, alpha = 0.3, molecules = c("KRT17", "C1QA", "IL7R", "TAGLN"), nmols = 20000) +``` + +We zoom in on one basal-rich region using the `Crop()` function. Once zoomed-in, we can visualize individual cell boundaries as well in all visualizations. + +```{r} +basal.crop <- Crop(nano.obj[["lung5.rep1"]], x = c(159500, 164000), y = c(8700, 10500)) +nano.obj[["zoom1"]] <- basal.crop +DefaultBoundary(nano.obj[["zoom1"]]) <- "segmentation" +``` + +```{r, fig.width=11, fig.height=7} +ImageDimPlot(nano.obj, fov = "zoom1", cols = "polychrome", coord.fixed = FALSE) +``` + +```{r, fig.width=11, fig.height=7} +# note the clouds of TPSAB1 molecules denoting mast cells +ImageDimPlot(nano.obj, fov = "zoom1", cols = "polychrome", alpha = 0.3, molecules = c("KRT17", "IL7R", "TPSAB1"), mols.size = 0.3, nmols = 20000, border.color = "black", coord.fixed = FALSE) +``` + +# Human Lymph Node: Akoya CODEX system + +This dataset was produced using Akoya CODEX system. The CODEX system performs multiplexed spatially-resolved protein profiling, iteratively visualizing antibody-binding events. The dataset here represents a tissue section from a human lymph node, and was generated by the University of Florida as part of the Human Biomolecular Atlas Program (HuBMAP). More information about the sample and experiment is available [here](https://portal.hubmapconsortium.org/browse/dataset/c95d9373d698faf60a66ffdc27499fe1). The protein panel in this dataset consists of 28 markers, and protein intensities were quantified as part of the Akoya processor pipeline, which outputs a CSV file providing the intensity of each marker in each cell, as well as the cell coordinates. The file is available for public download via Globus [here](https://app.globus.org/file-manager?origin_id=af603d86-eab9-4eec-bb1d-9d26556741bb&origin_path=%2Fc95d9373d698faf60a66ffdc27499fe1%2Fdrv_CX_20-008_lymphnode_n10_reg001%2Fprocessed_2020-12-2320-008LNn10r001%2Fsegm%2Fsegm-1%2Ffcs%2Fcompensated%2F). + + +First, we load in the data of a HuBMAP dataset using the `LoadAkoya()` function in Seurat: + +```{r} +codex.obj <- LoadAkoya( + filename = "/brahms/hartmana/spatial_vignette_data/LN7910_20_008_11022020_reg001_compensated.csv", + type = "processor", + fov = "HBM754.WKLP.262" +) +``` + +We can now run unsupervised analysis to identify cell clusters. To normalize the protein data, we use centered log-ratio based normalization, as we typically apply to the protein modality of CITE-seq data. We then run dimensional reduction and graph-based clustering. + +```{r} +codex.obj <- NormalizeData(object = codex.obj, normalization.method = "CLR", margin = 2) +codex.obj <- ScaleData(codex.obj) +VariableFeatures(codex.obj) <- rownames(codex.obj) # since the panel is small, treat all features as variable. +codex.obj <- RunPCA(object = codex.obj, npcs = 20, verbose = FALSE) +codex.obj <- RunUMAP(object = codex.obj, dims = 1:20, verbose = FALSE) +codex.obj <- FindNeighbors(object = codex.obj, dims = 1:20, verbose = FALSE) +codex.obj <- FindClusters(object = codex.obj, verbose = FALSE, resolution = 0.4, n.start = 1) +``` + +We can visualize the cell clusters based on a protein intensity-based UMAP embedding, or also based on their spatial location. + +```{r} +DimPlot(codex.obj, label = TRUE, label.box = TRUE) + NoLegend() +``` + +```{r, fig.width=6, fig.height=5} +ImageDimPlot(codex.obj, cols = "parade") +``` + +The expression patters of individual markers clearly denote different cell types and spatial structures, including Lyve1 (lymphatic endothelial cells), CD34 (blood endothelial cells), and CD21 (B cells). As expected, endothelial cells group together into vessels, and B cells are key components of specialized microstructures known as germinal zones. You can read more about protein markers in this dataset, as well as cellular networks in human lynmphatic tissues, in this [preprint](https://www.biorxiv.org/content/10.1101/2021.10.20.465151v1.full). + +```{r, fig.width=9, fig.height=8} +p1 <- ImageFeaturePlot(codex.obj, fov = "HBM754.WKLP.262", features = c("CD34", "CD21", "Lyve1"), min.cutoff = "q10", max.cutoff = "q90") +p2 <- ImageDimPlot(codex.obj, fov = "HBM754.WKLP.262", cols = "parade") +p1 + p2 +``` + +Each of these datasets represents an opportunity to learn organizing principles that govern the spatial localization of different cell types. Stay tuned for future updates to Seurat enabling further exploration and characterization of the relationship between spatial position and molecular state. + +
    + **Session Info** +```{r} +sessionInfo() +``` +
    diff --git a/vignettes/seurat5_v4_changes.Rmd b/vignettes/seurat5_v4_changes.Rmd new file mode 100644 index 000000000..d7020c930 --- /dev/null +++ b/vignettes/seurat5_v4_changes.Rmd @@ -0,0 +1,38 @@ +--- +title: "Changes in Seurat v4" +output: + html_document: + theme: united + df_print: kable +--- + +# Changes in Seurat v4 + +We have made minor changes in v4, primarily to improve the performance of Seurat v4 on large datasets. This includes minor changes to default parameter settings, and the use of newly available packages for tasks such as the identification of k-nearest neighbors, and graph-based clustering. These changes do not adversely impact downstream results, and we provide a detailed description of key changes below. Users who wish to continue using Seurat v3, or those interested in reproducing results produced by previous versions, may continue to install previous versions [here](articles/install.html#previous). + +# Changes to parameter defaults + +## FindNeighbors + +* The default method for identifying k-nearest neighbors has been set to [annoy](https://github.com/spotify/annoy). This is an approximate nearest-neighbor approach that is widely used for high-dimensional analysis in many fields, including single-cell analysis. Extensive [community benchmarking](http://ann-benchmarks.com/) has shown that annoy substantially improves the speed and memory requirements of neighbor discovery, with negligible impact to downstream results, and is consistent with our analysis and testing. Users may switch back to using the previous default setting using `nn.method="rann"`. + +## FindMarkers + +* We have restructured the code of the `FindMarkers()` function to be easier to understand, interpret, and debug. The results of differential expression are unchanged. However, by default we now prefilter genes and report fold change using base 2, as is commonly done in other differential expression packages, instead of natural log. If the default option is set, the output of `FindMarkers()` will include the column avg_log2FC, instead of avg_logFC. Users can restore the previous behavior (natural log), by specifying `base = exp(1)`. + +## IntegrateData/TransferData + +* We have made minor changes to the exact calculation of the anchor weight matrix, for example, in cases where a query cell participates in multiple anchor pairs. These changes reflect an improved workflow, but do not result in meaningful differences for downstream analysis (for example, see you can compare the results of our integration vignettes using [Seurat v3]() and [Seurat v4](articles/immune_alignment.html). + +## SCTransform + +* In `SCTransform()`, we slightly modify default parameters to improve scalability of parameter estimation for large datasets. For example, when estimating the regularized relationship between mu and theta, we compute this on a subset of the data by setting the `ncells` parameter to 5,000. The `vst()` function in sctransform v0.3 (available on CRAN [here](https://cran.r-project.org/web/packages/sctransform/index.html)) also introduces minor changes to the process of regularization. We have tested these changes extensively and found a substantial improvement in speed and memory, particularly for large dataset, with no adverse impact to performance. User can compare the results of the SCTransform vignette computed using [Seurat v3]() and [Seurat v4](articles/sctransform_vignette.html), or set `ncells=NULL` on larger datasets to compare results. + +## Removed functions + +The following functions have been removed in Seurat v4: + +- `CreateGeneActivityMatrix` replaced by `GeneActivity` in [Signac](https://satijalab.org/signac/reference/GeneActivity.html) +- `RunLSI` replaced by `RunTFIDF` and `RunSVD` in [Signac](https://satijalab.org/signac/reference/RunTFIDF.html) +- `ReadAlevin` and `ReadAlevinCsv` moved to [SeuratWrappers](https://github.com/satijalab/seurat-wrappers), see details [here](https://htmlpreview.github.io/?https://github.com/satijalab/seurat-wrappers/blob/master/docs/alevin.html) +- `ExportToCellbrowser` and `StopCellbrowser` moved to [SeuratWrappers](https://github.com/satijalab/seurat-wrappers), see details [here](https://htmlpreview.github.io/?https://github.com/satijalab/seurat-wrappers/blob/master/docs/cellbrowser.html) diff --git a/vignettes/seurat5_visualization_vignette.Rmd b/vignettes/seurat5_visualization_vignette.Rmd new file mode 100644 index 000000000..2b8e8bdf6 --- /dev/null +++ b/vignettes/seurat5_visualization_vignette.Rmd @@ -0,0 +1,251 @@ +--- +title: 'Data visualization methods in Seurat' +output: + html_document: + theme: united + df_print: kable + pdf_document: default +date: 'Compiled: `r Sys.Date()`' +--- + +```{r setup, include=TRUE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + fig.cap = '', + fig.width = 9, + fig.height = 7, + tidy = TRUE, + tidy.opts = list(width.cutoff = 95), + message = FALSE, + warning = FALSE, + time_it = TRUE, + error = TRUE +) +options(SeuratData.repo.use = 'satijalab04.nygenome.org') +``` + +We'll demonstrate visualization techniques in Seurat using our previously computed Seurat object from the 2,700 PBMC tutorial. You can download this dataset from [SeuratData](https://github.com/satijalab/seurat-data) + +```{r data, eval = FALSE} +SeuratData::InstallData('pbmc3k') +``` + +```{r seed, include=TRUE} +set.seed(seed = 42) +``` + +```{r initialize_object} +library(Seurat) +options(Seurat.object.assay.version = "v5") +library(SeuratData) +library(ggplot2) +library(patchwork) +data("pbmc3k.final") +pbmc3k.final <- UpdateSeuratObject(pbmc3k.final) +pbmc3k.final[["RNA"]] <- CreateAssay5Object(counts=pbmc3k.final[["RNA"]]@counts) +pbmc3k.final <- NormalizeData(pbmc3k.final) +DefaultLayer(pbmc3k.final[["RNA"]]) <- "counts" +pbmc3k.final <- FindVariableFeatures(pbmc3k.final) +DefaultLayer(pbmc3k.final[["RNA"]]) <- "data" +pbmc3k.final <- ScaleData(pbmc3k.final) +pbmc3k.final$groups <- sample(c('group1', 'group2'), size = ncol(pbmc3k.final), replace = TRUE) +features <- c("LYZ", "CCL5", "IL32", "PTPRCAP", "FCGR3A", "PF4") +pbmc3k.final +``` + + +# Five visualizations of marker feature expression + +```{r visualization_smorgasbord, fig.height=11} +# Ridge plots - from ggridges. Visualize single cell expression distributions in each cluster +RidgePlot(pbmc3k.final, features = features, ncol = 2) + +# Violin plot - Visualize single cell expression distributions in each cluster +VlnPlot(pbmc3k.final, features = features) + +# Feature plot - visualize feature expression in low-dimensional space +FeaturePlot(pbmc3k.final, features = features) +``` + +```{r visualization_smorgasbord2, fig.height = 5} +# Dot plots - the size of the dot corresponds to the percentage of cells expressing the feature in each cluster. The color represents the average expression level +DotPlot(pbmc3k.final, features = features) + RotatedAxis() +# Single cell heatmap of feature expression +DoHeatmap(subset(pbmc3k.final, downsample = 100), features = features, size = 3) +``` + +# New additions to `FeaturePlot` + +```{r featureplot} +# Plot a legend to map colors to expression levels +FeaturePlot(pbmc3k.final, features = 'MS4A1') + +# Adjust the contrast in the plot +FeaturePlot(pbmc3k.final, features = 'MS4A1', min.cutoff = 1, max.cutoff = 3) +``` + +```{r featureplot2, fig.height = 4} +# Calculate feature-specific contrast levels based on quantiles of non-zero expression. Particularly useful when plotting multiple markers +FeaturePlot(pbmc3k.final, features = c('MS4A1', "PTPRCAP"), min.cutoff = "q10", max.cutoff = "q90") + +# Visualize co-expression of two features simultaneously +FeaturePlot(pbmc3k.final, features = c('MS4A1', 'CD79A'), blend = TRUE) +``` + +```{r featureplot.split} +# Split visualization to view expression by groups (replaces FeatureHeatmap) +FeaturePlot(pbmc3k.final, features = c('MS4A1', 'CD79A'), split.by = 'groups') +``` + +# Updated and expanded visualization functions + +In addition to changes to `FeaturePlot()`, several other plotting functions have been updated and expanded with new features and taking over the role of now-deprecated functions + +```{r new_functions} +# Violin plots can also be split on some variable. Simply add the splitting variable to object metadata and pass it to the split.by argument +VlnPlot(pbmc3k.final, features = 'percent.mt', split.by = 'groups') + +# SplitDotPlotGG has been replaced with the `split.by` parameter for DotPlot +DotPlot(pbmc3k.final, features = features, split.by = 'groups') + RotatedAxis() + +# DimPlot replaces TSNEPlot, PCAPlot, etc. In addition, it will plot either "umap", "tsne", or "pca" by default, in that order +DimPlot(pbmc3k.final) +pbmc3k.final.no.umap <- pbmc3k.final +pbmc3k.final.no.umap[['umap']] <- NULL +DimPlot(pbmc3k.final.no.umap) + RotatedAxis() +``` + +```{r new2, fig.width=11, fig.height = 15} +# DoHeatmap now shows a grouping bar, splitting the heatmap into groups or clusters. This can be changed with the `group.by` parameter +DoHeatmap(pbmc3k.final, features = VariableFeatures(pbmc3k.final)[1:100], cells = 1:500, size = 4, angle = 90) + NoLegend() +``` + +# Applying themes to plots + +With Seurat, all plotting functions return ggplot2-based plots by default, allowing one to easily capture and manipulate plots just like any other ggplot2-based plot. + +```{r themeing, fig.height=6} +baseplot <- DimPlot(pbmc3k.final, reduction = 'umap') +# Add custom labels and titles +baseplot + labs(title = 'Clustering of 2,700 PBMCs') +# Use community-created themes, overwriting the default Seurat-applied theme +# Install ggmin with remotes::install_github("sjessa/ggmin") +baseplot + ggmin::theme_powerpoint() +# Seurat also provides several built-in themes, such as DarkTheme; for more details see ?SeuratTheme +baseplot + DarkTheme() +# Chain themes together +baseplot + FontSize(x.title = 20, y.title = 20) + NoLegend() +``` + +```{r save.img, include=TRUE} +library(ggplot2) +plot <- baseplot + DarkTheme() + + theme(axis.title = element_text(size = 18), legend.text = element_text(size = 18)) + + guides(colour = guide_legend(override.aes = list(size = 10))) +ggsave(filename = "../output/images/visualization_vignette.jpg", height = 7, width = 12, plot = plot, quality = 50) +``` + +# Interactive plotting features + +Seurat utilizes R's plotly graphing library to create interactive plots. This interactive plotting feature works with any ggplot2-based scatter plots (requires a `geom_point` layer). To use, simply make a ggplot2-based scatter plot (such as `DimPlot()` or `FeaturePlot()`) and pass the resulting plot to `HoverLocator()` + +```{r hover} +# Include additional data to display alongside cell names by passing in a data frame of information +# Works well when using FetchData +plot <- FeaturePlot(pbmc3k.final, features = 'MS4A1') +HoverLocator(plot = plot, information = FetchData(pbmc3k.final, vars = c('ident', 'PC_1', 'nFeature_RNA'))) +``` + +Another interactive feature provided by Seurat is being able to manually select cells for further investigation. We have found this particularly useful for small clusters that do not always separate using unbiased clustering, but which look tantalizingly distinct. You can now select these cells by creating a ggplot2-based scatter plot (such as with `DimPlot()` or `FeaturePlot()`, and passing the returned plot to `CellSelector()`. `CellSelector()` will return a vector with the names of the points selected, so that you can then set them to a new identity class and perform differential expression. + +For example, lets pretend that DCs had merged with monocytes in the clustering, but we wanted to see what was unique about them based on their position in the tSNE plot. + +```{r identify, eval=FALSE} +pbmc3k.final <- RenameIdents(pbmc3k.final, "DC" = "CD14+ Mono") +plot <- DimPlot(pbmc3k.final, reduction = 'umap') +select.cells <- CellSelector(plot = plot) +``` + +![](./assets/pbmc_select.gif) + +```{r load_cells, echo=FALSE} +select.cells <- readLines(con = './assets/pbmc_dcs_cells.txt') +# select.cells <- paste0(select.cells, "-1") +``` + +We can then change the identity of these cells to turn them into their own mini-cluster. + +```{r ident} +head(select.cells) +Idents(pbmc3k.final, cells = select.cells) <- 'NewCells' + +# Now, we find markers that are specific to the new cells, and find clear DC markers +newcells.markers <- FindMarkers(pbmc3k.final, ident.1 = "NewCells", ident.2 = "CD14+ Mono", min.diff.pct = 0.3, only.pos = TRUE) +head(newcells.markers) +``` + +
    + Using `CellSelector` to Automatically Assign Cell Identities + + In addition to returning a vector of cell names, `CellSelector()` can also take the selected cells and assign a new identity to them, returning a Seurat object with the identity classes already set. This is done by passing the Seurat object used to make the plot into `CellSelector()`, as well as an identity class. As an example, we're going to select the same set of cells as before, and set their identity class to "selected" + +```{r ident2, eval=FALSE} +pbmc3k.final <- CellSelector(plot = plot, object = pbmc3k.final, ident = 'selected') +``` + +![](./assets/pbmc_select.gif) + +```{r ident2_hidden, echo=FALSE} +pbmc3k.final <- RenameIdents(pbmc3k.final, 'NewCells' = 'selected') +``` + +```{r ident2_levels} +levels(pbmc3k.final) +``` + +
    + +# Plotting Accessories + +Along with new functions add interactive functionality to plots, Seurat provides new accessory functions for manipulating and combining plots. + +```{r labelling} +# LabelClusters and LabelPoints will label clusters (a coloring variable) or individual points on a ggplot2-based scatter plot +plot <- DimPlot(pbmc3k.final, reduction = 'pca') + NoLegend() +LabelClusters(plot = plot, id = 'ident') +# Both functions support `repel`, which will intelligently stagger labels and draw connecting lines from the labels to the points or clusters +LabelPoints(plot = plot, points = TopCells(object = pbmc3k.final[['pca']]), repel = TRUE) +``` + +Plotting multiple plots was previously achieved with the `CombinePlot()` function. We are deprecating this functionality in favor of the [patchwork](https://patchwork.data-imaginist.com/) system. Below is a brief demonstration but please see the patchwork package website [here](https://patchwork.data-imaginist.com/) for more details and examples. + +```{r combining_plots, fig.height = 5} +plot1 <- DimPlot(pbmc3k.final) +plot2 <- FeatureScatter(pbmc3k.final, feature1 = 'LYZ', feature2 = 'CCL5') +# Combine two plots +plot1 + plot2 +# Remove the legend from all plots +(plot1 + plot2) & NoLegend() +``` + +```{r save.times, include=TRUE} +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/visualization_vignette_times.csv") +``` + +
    + **Session Info** +```{r} +sessionInfo() +``` +
    diff --git a/vignettes/seurat5_weighted_nearest_neighbor_analysis.Rmd b/vignettes/seurat5_weighted_nearest_neighbor_analysis.Rmd new file mode 100644 index 000000000..c6b6b08bf --- /dev/null +++ b/vignettes/seurat5_weighted_nearest_neighbor_analysis.Rmd @@ -0,0 +1,443 @@ +--- +title: "Weighted Nearest Neighbor Analysis" +output: + html_document: + theme: united + df_print: kable +date: 'Compiled: `r format(Sys.Date(), "%B %d, %Y")`' +--- + +```{r setup, include=TRUE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now) + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + message = FALSE, + warning = FALSE, + time_it = TRUE, + error = TRUE +) +``` +The simultaneous measurement of multiple modalities, known as multimodal analysis, represents an exciting frontier for single-cell genomics and necessitates new computational methods that can define cellular states based on multiple data types. The varying information content of each modality, even across cells in the same dataset, represents a pressing challenge for the analysis and integration of multimodal datasets. In ([Hao\*, Hao\* et al, Cell 2021](https://doi.org/10.1016/j.cell.2021.04.048)), we introduce 'weighted-nearest neighbor' (WNN) analysis, an unsupervised framework to learn the relative utility of each data type in each cell, enabling an integrative analysis of multiple modalities. + +This vignette introduces the WNN workflow for the analysis of multimodal single-cell datasets. The workflow consists of three steps + +* Independent preprocessing and dimensional reduction of each modality individually +* Learning cell-specific modality 'weights', and constructing a WNN graph that integrates the modalities +* Downstream analysis (i.e. visualization, clustering, etc.) of the WNN graph + +We demonstrate the use of WNN analysis to two single-cell multimodal technologies: CITE-seq and 10x multiome. We define the cellular states based on both modalities, instead of either individual modality. + + +# WNN analysis of CITE-seq, RNA + ADT +We use the CITE-seq dataset from ([Stuart\*, Butler\* et al, Cell 2019](https://www.cell.com/cell/fulltext/S0092-8674(19)30559-8)), which consists of 30,672 scRNA-seq profiles measured alongside a panel of 25 antibodies from bone marrow. The object contains two assays, RNA and antibody-derived tags (ADT). + +To run this vignette please install Seurat v4, available on [CRAN](https://cran.r-project.org/web/packages/Seurat/index.html), and SeuratData, available on [GitHub](https://github.com/satijalab/seurat-data). + +```{r install, eval = FALSE} +install.packages("Seurat") +``` + +```{r, include=TRUE, cache=FALSE} +options(SeuratData.repo.use = "http://satijalab04.nygenome.org") +``` + +```{r packages, cache=FALSE} +library(Seurat) +options(Seurat.object.assay.version = "v5") +library(SeuratData) +library(cowplot) +library(dplyr) +``` + +```{r} +InstallData("bmcite") +bm <- LoadData(ds = "bmcite") +bm <- UpdateSeuratObject(bm) +bm[["ADT"]] <- CreateAssay5Object(bm[["ADT"]]@counts) +bm[["RNA"]] <- CreateAssay5Object(bm[["RNA"]]@counts) +``` + +We first perform pre-processing and dimensional reduction on both assays independently. We use standard normalization, but you can also use SCTransform or any alternative method. + +```{r pp.rna} +DefaultAssay(bm) <- 'RNA' +bm <- NormalizeData(bm) +DefaultLayer(bm[["RNA"]]) <- "counts" +bm <- FindVariableFeatures(bm) +DefaultLayer(bm[["RNA"]]) <- "data" +bm <- ScaleData(bm) +bm <- RunPCA(bm) + +DefaultAssay(bm) <- 'ADT' +# we will use all ADT features for dimensional reduction +# we set a dimensional reduction name to avoid overwriting the +VariableFeatures(bm) <- rownames(bm[["ADT"]]) +bm <- NormalizeData(bm, normalization.method = 'CLR', margin = 2) %>% + ScaleData() %>% RunPCA(reduction.name = 'apca') +``` +For each cell, we calculate its closest neighbors in the dataset based on a weighted combination of RNA and protein similarities. The cell-specific modality weights and multimodal neighbors are calculated in a single function, which takes ~2 minutes to run on this dataset. We specify the dimensionality of each modality (similar to specifying the number of PCs to include in scRNA-seq clustering), but you can vary these settings to see that small changes have minimal effect on the overall results. + +```{r jc} +# Identify multimodal neighbors. These will be stored in the neighbors slot, +# and can be accessed using bm[['weighted.nn']] +# The WNN graph can be accessed at bm[["wknn"]], +# and the SNN graph used for clustering at bm[["wsnn"]] +# Cell-specific modality weights can be accessed at bm$RNA.weight +bm <- FindMultiModalNeighbors( + bm, reduction.list = list("pca", "apca"), + dims.list = list(1:30, 1:18), modality.weight.name = "RNA.weight" +) +``` + +We can now use these results for downstream analysis, such as visualization and clustering. For example, we can create a UMAP visualization of the data based on a weighted combination of RNA and protein data We can also perform graph-based clustering and visualize these results on the UMAP, alongside a set of cell annotations. + +```{r wumap, fig.width=10} +bm <- RunUMAP(bm, nn.name = "weighted.nn", reduction.name = "wnn.umap", reduction.key = "wnnUMAP_") +bm <- FindClusters(bm, graph.name = "wsnn", algorithm = 3, resolution = 2, verbose = FALSE) +``` + +```{r wumap.plot, fig.width = 10} +p1 <- DimPlot(bm, reduction = 'wnn.umap', label = TRUE, repel = TRUE, label.size = 2.5) + NoLegend() +p2 <- DimPlot(bm, reduction = 'wnn.umap', group.by = 'celltype.l2', label = TRUE, repel = TRUE, label.size = 2.5) + NoLegend() +p1 + p2 +``` + +We can also compute UMAP visualization based on only the RNA and protein data and compare. We find that the RNA analysis is more informative than the ADT analysis in identifying progenitor states (the ADT panel contains markers for differentiated cells), while the converse is true of T cell states (where the ADT analysis outperforms RNA). + +```{r compumap} +bm <- RunUMAP(bm, reduction = 'pca', dims = 1:30, assay = 'RNA', + reduction.name = 'rna.umap', reduction.key = 'rnaUMAP_') +bm <- RunUMAP(bm, reduction = 'apca', dims = 1:18, assay = 'ADT', + reduction.name = 'adt.umap', reduction.key = 'adtUMAP_') +``` + +```{r umapplot2, fig.width=10} +p3 <- DimPlot(bm, reduction = 'rna.umap', group.by = 'celltype.l2', label = TRUE, + repel = TRUE, label.size = 2.5) + NoLegend() +p4 <- DimPlot(bm, reduction = 'adt.umap', group.by = 'celltype.l2', label = TRUE, + repel = TRUE, label.size = 2.5) + NoLegend() +p3 + p4 +``` + +We can visualize the expression of canonical marker genes and proteins on the multimodal UMAP, which can assist in verifying the provided annotations: +```{r ftplot, fig.width = 10, fig.height = 7} +p5 <- FeaturePlot(bm, features = c("adt_CD45RA","adt_CD16","adt_CD161"), + reduction = 'wnn.umap', max.cutoff = 2, + cols = c("lightgrey","darkgreen"), ncol = 3) +p6 <- FeaturePlot(bm, features = c("rna_TRDC","rna_MPO","rna_AVP"), + reduction = 'wnn.umap', max.cutoff = 3, ncol = 3) +p5 / p6 +``` + +Finally, we can visualize the modality weights that were learned for each cell. Each of the populations with the highest RNA weights represent progenitor cells, while the populations with the highest protein weights represent T cells. This is in line with our biological expectations, as the antibody panel does not contain markers that can distinguish between different progenitor populations. + +```{r plotwts, fig.width=10} + VlnPlot(bm, features = "RNA.weight", group.by = 'celltype.l2', sort = TRUE, pt.size = 0.1) + + NoLegend() +``` + +```{r save.img, include=TRUE} +library(ggplot2) +plot <- VlnPlot(bm, features = "RNA.weight", group.by = 'celltype.l2', sort = TRUE, pt.size = 0.1) + + NoLegend() + xlab("") + ggtitle("RNA Modality Weights") + theme(plot.title = element_text(hjust = 0.5, size = 30), axis.text = element_text(size = 20)) + +ggsave(filename = "../output/images/weighted_nearest_neighbor_analysis.jpg", height = 7, width = 12, plot = plot, quality = 50) +``` + +# WNN analysis of 10x Multiome, RNA + ATAC + +Here, we demonstrate the use of WNN analysis to a second multimodal technology, the 10x multiome RNA+ATAC kit. We use a dataset that is publicly available on the 10x website, where paired transcriptomes and ATAC-seq profiles are measured in 10,412 PBMCs. + +We use the same WNN methods as we use in the previous tab, where we apply integrated multimodal analysis to a CITE-seq dataset. In this example we will demonstrate how to: + +* Create a multimodal Seurat object with paired transcriptome and ATAC-seq profiles +* Perform weighted neighbor clustering on RNA+ATAC data in single cells +* Leverage both modalities to identify putative regulators of different cell types and states + +You can download the dataset from the 10x Genomics website [here](https://support.10xgenomics.com/single-cell-multiome-atac-gex/datasets/1.0.0/pbmc_granulocyte_sorted_10k). Please make sure to download the following files: + +* Filtered feature barcode matrix (HDF5) +* ATAC Per fragment information file (TSV.GZ) +* ATAC Per fragment information index (TSV.GZ index) + +Finally, in order to run the vignette, make sure the following packages are installed: + +* [Seurat v4](install.html) +* [Signac](https://satijalab.org/signac/) for the analysis of single-cell chromatin datasets +* [EnsDb.Hsapiens.v86](https://bioconductor.org/packages/release/data/annotation/html/EnsDb.Hsapiens.v86.html) for a set of annotations for hg38 +* [dplyr](https://cran.r-project.org/web/packages/dplyr/index.html) to help manipulate data tables + +```{r pkgs} +library(Seurat) +library(Signac) +library(EnsDb.Hsapiens.v86) +library(dplyr) +library(ggplot2) +``` + + + +We'll create a Seurat object based on the gene expression data, and then add in the ATAC-seq data as a second assay. You can explore the [Signac getting started vignette](https://satijalab.org/signac/articles/pbmc_vignette.html) for more information on the creation and processing of a ChromatinAssay object. + +```{r CreateObject} +# the 10x hdf5 file contains both data types. +inputdata.10x <- Read10X_h5("../data/pbmc_granulocyte_sorted_10k_filtered_feature_bc_matrix.h5") + +# extract RNA and ATAC data +rna_counts <- inputdata.10x$`Gene Expression` +atac_counts <- inputdata.10x$Peaks + +# Create Seurat object +pbmc <- CreateSeuratObject(counts = rna_counts) +pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")$nCount_RNA +pbmc@meta.data[["percent.mt"]] <- as.numeric(pbmc@meta.data[["percent.mt"]]) + +# Now add in the ATAC-seq data +# we'll only use peaks in standard chromosomes +grange.counts <- StringToGRanges(rownames(atac_counts), sep = c(":", "-")) +grange.use <- seqnames(grange.counts) %in% standardChromosomes(grange.counts) +atac_counts <- atac_counts[as.vector(grange.use), ] +annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86) +seqlevelsStyle(annotations) <- 'UCSC' +genome(annotations) <- "hg38" + +frag.file <- "../data/pbmc_granulocyte_sorted_10k_atac_fragments.tsv.gz" +chrom_assay <- CreateChromatinAssay( + counts = atac_counts, + sep = c(":", "-"), + genome = 'hg38', + fragments = frag.file, + min.cells = 10, + annotation = annotations + ) +pbmc[["ATAC"]] <- chrom_assay +``` + +We perform basic QC based on the number of detected molecules for each modality as well as mitochondrial percentage. + +```{r QCObject, fig.width=10} +VlnPlot(pbmc, features = c("nCount_ATAC", "nCount_RNA","percent.mt"), ncol = 3, + log = TRUE, pt.size = 0) + NoLegend() + +pbmc <- subset( + x = pbmc, + subset = nCount_ATAC < 7e4 & + nCount_ATAC > 5e3 & + nCount_RNA < 25000 & + nCount_RNA > 1000 & + percent.mt < 20 +) +``` + +We next perform pre-processing and dimensional reduction on both assays independently, using standard approaches for RNA and ATAC-seq data. + +```{r IndependentAnalysis} +# RNA analysis +DefaultAssay(pbmc) <- "RNA" +pbmc <- SCTransform(pbmc, verbose = FALSE) %>% RunPCA() %>% RunUMAP(dims = 1:50, reduction.name = 'umap.rna', reduction.key = 'rnaUMAP_') + +# ATAC analysis +# We exclude the first dimension as this is typically correlated with sequencing depth +DefaultAssay(pbmc) <- "ATAC" +pbmc <- RunTFIDF(pbmc) +pbmc <- FindTopFeatures(pbmc, min.cutoff = 'q0') +pbmc <- RunSVD(pbmc) +pbmc <- RunUMAP(pbmc, reduction = 'lsi', dims = 2:50, reduction.name = "umap.atac", reduction.key = "atacUMAP_") +``` + +We calculate a WNN graph, representing a weighted combination of RNA and ATAC-seq modalities. We use this graph for UMAP visualization and clustering + +```{r MultiModalAnalysis} +pbmc <- FindMultiModalNeighbors(pbmc, reduction.list = list("pca", "lsi"), dims.list = list(1:50, 2:50)) +pbmc <- RunUMAP(pbmc, nn.name = "weighted.nn", reduction.name = "wnn.umap", reduction.key = "wnnUMAP_") +pbmc <- FindClusters(pbmc, graph.name = "wsnn", algorithm = 3, verbose = FALSE) +``` + +We annotate the clusters below. Note that you could also annotate the dataset using our supervised mapping pipelines, using either our [vignette](multimodal_reference_mapping.html), or [automated web tool, Azimuth](www.satijalab.org/azimuth). + +```{r Annotate, results = 'hide'} +# perform sub-clustering on cluster 6 to find additional structure +pbmc <- FindSubCluster(pbmc, cluster = 6, graph.name = "wsnn", algorithm = 3) +Idents(pbmc) <- "sub.cluster" +``` + +```{r Annotate2} +# add annotations +pbmc <- RenameIdents(pbmc, '19' = 'pDC','20' = 'HSPC','15' = 'cDC') +pbmc <- RenameIdents(pbmc, '0' = 'CD14 Mono', '9' ='CD14 Mono', '5' = 'CD16 Mono') +pbmc <- RenameIdents(pbmc, '10' = 'Naive B', '11' = 'Intermediate B', '17' = 'Memory B', '21' = 'Plasma') +pbmc <- RenameIdents(pbmc, '7' = 'NK') +pbmc <- RenameIdents(pbmc, '4' = 'CD4 TCM', '13'= "CD4 TEM", '3' = "CD4 TCM", '16' ="Treg", '1' ="CD4 Naive", '14' = "CD4 Naive") +pbmc <- RenameIdents(pbmc, '2' = 'CD8 Naive', '8'= "CD8 Naive", '12' = 'CD8 TEM_1', '6_0' = 'CD8 TEM_2', '6_1' ='CD8 TEM_2', '6_4' ='CD8 TEM_2') +pbmc <- RenameIdents(pbmc, '18' = 'MAIT') +pbmc <- RenameIdents(pbmc, '6_2' ='gdT', '6_3' = 'gdT') +pbmc$celltype <- Idents(pbmc) +``` + +We can visualize clustering based on gene expression, ATAC-seq, or WNN analysis. The differences are more subtle than in the previous analysis (you can explore the weights, which are more evenly split than in our CITE-seq example), but we find that WNN provides the clearest separation of cell states. + +```{r UMAPs, fig.width=10} +p1 <- DimPlot(pbmc, reduction = "umap.rna", group.by = "celltype", label = TRUE, label.size = 2.5, repel = TRUE) + ggtitle("RNA") +p2 <- DimPlot(pbmc, reduction = "umap.atac", group.by = "celltype", label = TRUE, label.size = 2.5, repel = TRUE) + ggtitle("ATAC") +p3 <- DimPlot(pbmc, reduction = "wnn.umap", group.by = "celltype", label = TRUE, label.size = 2.5, repel = TRUE) + ggtitle("WNN") +p1 + p2 + p3 & NoLegend() & theme(plot.title = element_text(hjust = 0.5)) +``` + +For example, the ATAC-seq data assists in the separation of CD4 and CD8 T cell states. This is due to the presence of multiple loci that exhibit differential accessibility between different T cell subtypes. For example, we can visualize 'pseudobulk' tracks of the CD8A locus alongside violin plots of gene expression levels, using tools in the [Signac visualization vignette](https://satijalab.org/signac/articles/visualization.html). + +```{r coverageplotcd8, fig.width=10} +## to make the visualization easier, subset T cell clusters +celltype.names <- levels(pbmc) +tcell.names <- grep("CD4|CD8|Treg", celltype.names,value = TRUE) +tcells <- subset(pbmc, idents = tcell.names) +CoveragePlot(tcells, region = 'CD8A', features = 'CD8A', assay = 'ATAC', expression.assay = 'SCT', peaks = FALSE) +``` + +Next, we will examine the accessible regions of each cell to determine enriched motifs. As described in the [Signac motifs vignette](https://satijalab.org/signac/articles/motif_vignette.html), there are a few ways to do this, but we will use the [chromVAR](https://www.nature.com/articles/nmeth.4401) package from the Greenleaf lab. This calculates a per-cell accessibility score for known motifs, and adds these scores as a third assay (`chromvar`) in the Seurat object. + +To continue, please make sure you have the following packages installed. + +* [chromVAR](https://bioconductor.org/packages/release/bioc/html/chromVAR.html) for the analysis of motif accessibility in scATAC-seq +* [presto](https://github.com/immunogenomics/presto) for fast differential expression analyses. +* [TFBSTools](http://www.bioconductor.org/packages/release/bioc/html/TFBSTools.html) for TFBS analysis +* [JASPAR2020](https://bioconductor.org/packages/release/data/annotation/html/JASPAR2020.html) for JASPAR motif models +* [motifmatchr](https://www.bioconductor.org/packages/release/bioc/html/motifmatchr.html) for motif matching +* [BSgenome.Hsapiens.UCSC.hg38](https://bioconductor.org/packages/release/data/annotation/html/BSgenome.Hsapiens.UCSC.hg38.html) for chromVAR + +
    + **Install command for all dependencies** + +```{r install.deps, eval=FALSE} +remotes::install_github("immunogenomics/presto") +BiocManager::install(c("chromVAR", "TFBSTools", "JASPAR2020", "motifmatchr", "BSgenome.Hsapiens.UCSC.hg38")) +``` + +
    + +```{r chromVar} +library(chromVAR) +library(JASPAR2020) +library(TFBSTools) +library(motifmatchr) +library(BSgenome.Hsapiens.UCSC.hg38) + +# Scan the DNA sequence of each peak for the presence of each motif, and create a Motif object +DefaultAssay(pbmc) <- "ATAC" +pwm_set <- getMatrixSet(x = JASPAR2020, opts = list(species = 9606, all_versions = FALSE)) +motif.matrix <- CreateMotifMatrix(features = granges(pbmc), pwm = pwm_set, genome = 'hg38', use.counts = FALSE) +motif.object <- CreateMotifObject(data = motif.matrix, pwm = pwm_set) +pbmc <- SetAssayData(pbmc, assay = 'ATAC', slot = 'motifs', new.data = motif.object) + +# Note that this step can take 30-60 minutes +pbmc <- RunChromVAR( + object = pbmc, + genome = BSgenome.Hsapiens.UCSC.hg38 +) +``` + +Finally, we explore the multimodal dataset to identify key regulators of each cell state. Paired data provides a unique opportunity to identify transcription factors (TFs) that satisfy multiple criteria, helping to narrow down the list of putative regulators to the most likely candidates. We aim to identify TFs whose expression is enriched in multiple cell types in the RNA measurements, but *also* have enriched accessibility for their motifs in the ATAC measurements. + +As an example and positive control, the CCAAT Enhancer Binding Protein (CEBP) family of proteins, including the TF CEBPB, have been repeatedly shown to play important roles in the differentiation and function of myeloid cells including monocytes and dendritic cells. We can see that both the expression of the CEBPB, and the accessibility of the MA0466.2.4 motif (which encodes the binding site for CEBPB), are both enriched in monocytes. + +```{r CEBPB, fig.width=10} +#returns MA0466.2 +motif.name <- ConvertMotifID(pbmc, name = 'CEBPB') +gene_plot <- FeaturePlot(pbmc, features = "sct_CEBPB", reduction = 'wnn.umap') +motif_plot <- FeaturePlot(pbmc, features = motif.name, min.cutoff = 0, cols = c("lightgrey", "darkred"), reduction = 'wnn.umap') +gene_plot | motif_plot +``` + +We'd like to quantify this relationship, and search across all cell types to find similar examples. To do so, we will use the `presto` package to perform fast differential expression. We run two tests: one using gene expression data, and the other using chromVAR motif accessibilities. `presto` calculates a p-value based on the Wilcox rank sum test, which is also the default test in Seurat, and we restrict our search to TFs that return significant results in both tests. + +`presto` also calculates an "AUC" statistic, which reflects the power of each gene (or motif) to serve as a marker of cell type. A maximum AUC value of 1 indicates a perfect marker. Since the AUC statistic is on the same scale for both genes and motifs, we take the average of the AUC values from the two tests, and use this to rank TFs for each cell type: + +```{r presto} +markers_rna <- presto:::wilcoxauc.Seurat(X = pbmc, group_by = 'celltype', assay = 'data', seurat_assay = 'SCT') +markers_motifs <- presto:::wilcoxauc.Seurat(X = pbmc, group_by = 'celltype', assay = 'data', seurat_assay = 'chromvar') +motif.names <- markers_motifs$feature +colnames(markers_rna) <- paste0("RNA.", colnames(markers_rna)) +colnames(markers_motifs) <- paste0("motif.", colnames(markers_motifs)) +markers_rna$gene <- markers_rna$RNA.feature +markers_motifs$gene <- ConvertMotifID(pbmc, id = motif.names) +``` + +```{r defineTests} +# a simple function to implement the procedure above +topTFs <- function(celltype, padj.cutoff = 1e-2) { + ctmarkers_rna <- dplyr::filter( + markers_rna, RNA.group == celltype, RNA.padj < padj.cutoff, RNA.logFC > 0) %>% + arrange(-RNA.auc) + ctmarkers_motif <- dplyr::filter( + markers_motifs, motif.group == celltype, motif.padj < padj.cutoff, motif.logFC > 0) %>% + arrange(-motif.auc) + top_tfs <- inner_join( + x = ctmarkers_rna[, c(2, 11, 6, 7)], + y = ctmarkers_motif[, c(2, 1, 11, 6, 7)], by = "gene" + ) + top_tfs$avg_auc <- (top_tfs$RNA.auc + top_tfs$motif.auc) / 2 + top_tfs <- arrange(top_tfs, -avg_auc) + return(top_tfs) +} +``` + +We can now compute, and visualize, putative regulators for any cell type. We recover well-established regulators, including [TBX21 for NK cells](https://www.sciencedirect.com/science/article/pii/S1074761304000767), [IRF4 for plasma cells](https://pubmed.ncbi.nlm.nih.gov/16767092/), [SOX4 for hematopoietic progenitors](https://ashpublications.org/blood/article/124/21/1577/88774/Sox4-Is-Required-for-the-Formation-and-Maintenance), [EBF1 and PAX5 for B cells](https://www.nature.com/articles/ni.2641), [IRF8 and TCF4 for pDC](https://www.nature.com/articles/s41590-018-0136-9). We believe that similar strategies can be used to help focus on a set of putative regulators in diverse systems. + +```{r NK, fig.width=10} +# identify top markers in NK and visualize +head(topTFs("NK"), 3) + +motif.name <- ConvertMotifID(pbmc, name = 'TBX21') +gene_plot <- FeaturePlot(pbmc, features = "sct_TBX21", reduction = 'wnn.umap') +motif_plot <- FeaturePlot(pbmc, features = motif.name, min.cutoff = 0, cols = c("lightgrey", "darkred"), reduction = 'wnn.umap') +gene_plot | motif_plot +``` + +```{r pDC, fig.width=10} +# identify top markers in pDC and visualize +head(topTFs("pDC"), 3) + +motif.name <- ConvertMotifID(pbmc, name = 'TCF4') +gene_plot <- FeaturePlot(pbmc, features = "sct_TCF4", reduction = 'wnn.umap') +motif_plot <- FeaturePlot(pbmc, features = motif.name, min.cutoff = 0, cols = c("lightgrey", "darkred"), reduction = 'wnn.umap') +gene_plot | motif_plot +``` + +```{r CD16Mono, fig.width=10} +# identify top markers in HSPC and visualize +head(topTFs("CD16 Mono"),3) + +motif.name <- ConvertMotifID(pbmc, name = 'SPI1') +gene_plot <- FeaturePlot(pbmc, features = "sct_SPI1", reduction = 'wnn.umap') +motif_plot <- FeaturePlot(pbmc, features = motif.name, min.cutoff = 0, cols = c("lightgrey", "darkred"), reduction = 'wnn.umap') +gene_plot | motif_plot +``` + +```{r moreTFS, fig.width=10} +# identify top markers in other cell types +head(topTFs("Naive B"), 3) +head(topTFs("HSPC"), 3) +head(topTFs("Plasma"), 3) +``` + + +```{r save.times, include=TRUE} +# write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/weighted_nearest_neighbor_analysis_times.csv") +print("done") +``` + +
    + **Session Info** +```{r} +sessionInfo() +``` +
    diff --git a/vignettes/spatial_vignette_2.Rmd b/vignettes/spatial_vignette_2.Rmd new file mode 100644 index 000000000..86779b7d3 --- /dev/null +++ b/vignettes/spatial_vignette_2.Rmd @@ -0,0 +1,339 @@ +--- +title: "Analysis of Image-based Spatial Data in Seurat" +output: + html_document: + theme: united + df_print: kable +date: 'Compiled: `r format(Sys.Date(), "%B %d, %Y")`' +--- +*** + +```{r setup, include=FALSE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + tidy.opts = list(width.cutoff = 95), + message = FALSE, + warning = FALSE, + time_it = TRUE +) +``` + +# Overview + +In this vignette, we introduce a Seurat extension to analyze new types of spatially-resolved data. We have [previously introduced a spatial framework](https://satijalab.org/seurat/articles/spatial_vignette.html) which is compatible with sequencing-based technologies, like the 10x Genomics Visium system, or SLIDE-seq. Here, we extend this framework to analyze new data types that are captured via highly multiplexed imaging. In contrast to sequencing-based technologies, these datasets are often targeted (i.e. they profile a pre-selected set of genes). However they can resolve individual molecules - retaining single-cell (and subcellular) resolution. These approaches also often capture cellular boundaries (segmentations). + +We update the Seurat infrastructure to enable the analysis, visualization, and exploration of these exciting datasets. In this vignette, we focus on three datasets produced by different multiplexed imaging technologies, each of which is publicly available. We will be adding support for additional imaging-based technologies in the coming months. + +* Vizgen MERSCOPE (Mouse Brain) +* Nanostring CosMx Spatial Molecular Imager (FFPE Human Lung) +* Akoya CODEX (Human Lymph Node) + +First, we load the packages necessary for this vignette. + +```{r init, message=FALSE, warning=FALSE} +library(Seurat) +library(future) +plan("multisession", workers = 10) +``` + +# Mouse Brain: Vizgen MERSCOPE + +This dataset was produced using the Vizgen MERSCOPE system, which utilizes the MERFISH technology. The total dataset is available for [public download](https://info.vizgen.com/mouse-brain-data), and contains nine samples (three full coronal slices of the mouse brain, with three biological replicates per slice). The gene panel consists of 483 gene targets, representing known anonical cell type markers, nonsensory G-Protein coupled receptors (GPCRs), and Receptor Tyrosine Kinases (RTKs). In this vignette, we analyze one of the samples - slice 2, replicate 1. The median number of transcripts detected in each cell is 206. + +First, we read in the dataset and create a Seurat object. + +We use the `LoadVizgen()` function, which we have written to read in the output of the Vizgen analysis pipeline. The resulting Seurat object contains the following information: + +* A count matrix, indicating the number of observed molecules for each of the 483 transcripts in each cell. This matrix is analogous to a count matrix in scRNA-seq, and is stored by default in the RNA assay of the Seurat object + +```{r, message=FALSE, warning=FALSE} +# Loading segmentations is a slow process and multi processing with the future pacakge is recommended +vizgen.obj <- LoadVizgen(data.dir = "/brahms/hartmana/spatial_vignette_data/vizgen/s2r1/", fov = "s2r1") +``` + +The next pieces of information are specific to imaging assays, and is stored in the images slot of the resulting Seurat object: + +
    + **Cell Centroids: The spatial coordinates marking the centroid for each cell being profiled** + +```{r} +# Get the center position of each centroid. There is one row per cell in this dataframe. +head(GetTissueCoordinates(vizgen.obj[["s2r1"]][["centroids"]])) +``` +
    +
    + **Cell Segmentation Boundaries: The spatial coordinates that describe the polygon segmentation of each single cell** + +```{r} +# Get the coordinates for each segmentation vertice. Each cell will have a variable number of vertices describing its shape. +head(GetTissueCoordinates(vizgen.obj[["s2r1"]][["segmentation"]])) +``` +
    +
    + **Molecule positions: The spatial coordinates for each individual molecule that was detected during the multiplexed smFISH experiment.** + +```{r} +# Fetch molecules positions for Chrm1 +head(FetchData(vizgen.obj[["s2r1"]][["molecules"]], vars="Chrm1")) +``` +
    +\ + +## Preprocessing and unsupervised analysis +We start by performing a standard unsupervised clustering analysis, essentially first treating the dataset as an scRNA-seq experiment. We use SCTransform-based normalization, though we slightly modify the default clipping parameters to mitigate the effect of outliers that we occasionally observe in smFISH experiments. After normalization, we can run dimensional reduction and clustering. + +```{r analysis, results='hide'} +vizgen.obj <- SCTransform(vizgen.obj, assay = "Vizgen", clip.range = c(-10,10),) +vizgen.obj <- RunPCA(vizgen.obj, npcs = 30, features = rownames(vizgen.obj)) +vizgen.obj <- RunUMAP(vizgen.obj, dims = 1:30) +vizgen.obj <- FindNeighbors(vizgen.obj, reduction = "pca", dims = 1:30) +vizgen.obj <- FindClusters(vizgen.obj, resolution = 0.3) +``` + +We can then visualize the results of the clustering either in UMAP space (with `DimPlot()`) or overlaid on the image with `ImageDimPlot()`. + +```{r umap} +DimPlot(vizgen.obj, reduction = "umap") +``` + +```{r spatial.plot, fig.height=6, fig.width=6} +ImageDimPlot(vizgen.obj, fov = "s2r1", cols = "polychrome", axes = TRUE) +``` + +You can also customize multiple aspect of the plot, including the color scheme, cell border widths, and size (see below). + +
    + **Customizing spatial plots in Seurat** + +The `ImageDimPlot()` and `ImageFeaturePlot()` functions have a few parameters which you can customize individual visualizations. These include: + +* alpha: Ranges from 0 to 1. Sets the transparency of within-cell coloring. +* size: determines the size of points representing cells, if centroids are being plotted +* cols: Sets the color scheme for the internal shading of each cell. Examples settings are `polychrome`, `glasbey`, `Paired`, `Set3`, and `parade`. Default is the ggplot2 color palette +* shuffle.cols: In some cases the selection of `cols` is more effective when the same colors are assigned to different clusters. Set `shuffle.cols = TRUE` to randomly shuffle the colors in the palette. +* border.size: Sets the width of the cell segmentation borders. By default, segmentations are plotted with a border size of 0.3 and centroids are plotted without border. +* border.color: Sets the color of the cell segmentation borders +* dark.background: Sets a black background color (TRUE by default) +* axes: Display +
    + +Since it can be difficult to visualize the spatial localization patterns of an individual cluster when viewing them all together, we can highlight all cells that belong to a particular cluster: + +```{r, fig.height=8, fig.width=12} +p1 <- ImageDimPlot(vizgen.obj, fov = "s2r1", cols = "red", cells = WhichCells(vizgen.obj, idents = 14)) +p2 <- ImageDimPlot(vizgen.obj, fov = "s2r1", cols = "red", cells = WhichCells(vizgen.obj, idents = 15)) +p1 + p2 +``` + +We can find markers of individual clusters and visualize their spatial expression pattern. We can color cells based on their quantified expression of an individual gene, using `ImageFeaturePlot()`, which is analagous to the `FeaturePlot()` function for visualizing expression on a 2D embedding. Since MERFISH images individual molecules, we can also visualize the location of individual *molecules*. + +```{r, fig.height=7, fig.width=12} +p1 <- ImageFeaturePlot(vizgen.obj, features = "Slc17a7") +p2 <- ImageDimPlot(vizgen.obj, molecules = "Slc17a7", nmols = 10000, alpha = 0.3, mols.cols = "red") +p1 + p2 +``` + +Note that the `nmols` parameter can be used to reduce the total number of molecules shown to reduce overplotting. You can also use the `mols.size`, `mols.cols`, and `mols.alpha` parameter to further optimize. + +Plotting molecules is especially useful for visualizing co-expression of multiple genes on the same plot. + +```{r, fig.height=7, fig.width=12} +p1 <- ImageDimPlot(vizgen.obj, fov = "s2r1", alpha = 0.3, molecules = c("Slc17a7", "Olig1"), nmols = 10000) +markers.14 <- FindMarkers(vizgen.obj, ident.1 = "14") +p2 <- ImageDimPlot(vizgen.obj, fov = "s2r1", alpha = 0.3, molecules = rownames(markers.14)[1:4], nmols = 10000) +p1 + p2 +``` + +The updated Seurat spatial framework has the option to treat cells as individual points, or also to visualize cell boundaries (segmentations). By default, Seurat ignores cell segmentations and treats each cell as a point ('centroids'). This speeds up plotting, especially when looking at large areas, where cell boundaries are too small to visualize. + +We can zoom into a region of tissue, creating a new field of view. For example, we can zoom into a region that contains the hippocampus. Once zoomed-in, we can set `DefaultBoundary()` to show cell segmentations. You can also 'simplify' the cell segmentations, reducing the number of edges in each polygon to speed up plotting. + +```{r, fig.height=5, fig.width=14} +# create a Crop +cropped.coords <- Crop(vizgen.obj[["s2r1"]], x = c(1750, 3000), y = c(3750, 5250), coords = "plot") +# set a new field of view (fov) +vizgen.obj[["hippo"]] <- cropped.coords + +# visualize FOV using default settings (no cell boundaries) +p1 <- ImageDimPlot(vizgen.obj, fov = "hippo", axes = TRUE, size = 0.7, border.color = "white", cols = "polychrome", coord.fixed = FALSE) + +# visualize FOV with full cell segmentations +DefaultBoundary(vizgen.obj[["hippo"]]) <- "segmentation" +p2 <- ImageDimPlot(vizgen.obj, fov = "hippo", axes = TRUE, border.color = "white", border.size = 0.1, cols = "polychrome", coord.fixed = FALSE) + +# simplify cell segmentations +vizgen.obj[["hippo"]][["simplified.segmentations"]] <- Simplify(coords = vizgen.obj[["hippo"]][["segmentation"]], tol = 3) +DefaultBoundary(vizgen.obj[["hippo"]]) <- "simplified.segmentations" + +# visualize FOV with simplified cell segmentations +DefaultBoundary(vizgen.obj[["hippo"]]) <- "simplified.segmentations" +p3 <- ImageDimPlot(vizgen.obj, fov = "hippo", axes = TRUE, border.color = "white", border.size = 0.1, cols = "polychrome", coord.fixed = FALSE) + +p1 + p2 + p3 +``` + +
    + **What is the tol parameter?** + +The tol parameter determines how simplified the resulting segmentations are. A higher value of tol will reduce the number of vertices more drastically which will speed up plotting, but some segmentation detail will be lost. See https://rgeos.r-forge.r-project.org/reference/topo-unary-gSimplify.html for examples using different values for tol. + +
    + +We can visualize individual molecules plotted at higher resolution after zooming-in +```{r, fig.height=8, fig.width=8} +# Since there is nothing behind the segmentations, alpha will slightly mute colors +ImageDimPlot(vizgen.obj, fov = "hippo", molecules = rownames(markers.14)[1:4], cols = "polychrome", mols.size = 1, alpha = 0.5, mols.cols = c("red", "blue", "yellow", "green")) +``` + +# Human Lung: Nanostring CosMx Spatial Molecular Imager + +This dataset was produced using Nanostring CosMx Spatial Molecular Imager (SMI). The CosMX SMI performs multiplexed single molecule profiling, can profile both RNA and protein targets, and can be applied directly to FFPE tissues. The dataset represents 8 FFPE samples taken from 5 non-small-cell lung cancer (NSCLC) tissues, and is available for [public download](https://www.nanostring.com/products/cosmx-spatial-molecular-imager/ffpe-dataset/). The gene panel consists of 960 transcripts. + +In this vignette, we load one of 8 samples (lung 5, replicate 1). We use the `LoadNanostring()` function, which parses the outputs available on the public download site. Note that the coordinates for the cell boundaries were provided by Nanostring by request, and are available for download [here](https://www.dropbox.com/s/hl3peavrx92bluy/Lung5_Rep1-polygons.csv?dl=0). + +For this dataset, instead of performing unsupervised analysis, we map the Nanostring profiles to our Azimuth Healthy Human Lung reference, which was defined by scRNA-seq. We used Azimuth version 0.4.3 with the [human lung](https://azimuth.hubmapconsortium.org/references/#Human%20-%20Lung%20v1) reference version 1.0.0. You can download the precomputed results [here](https://seurat.nygenome.org/vignette_data/spatial_vignette_2/nanostring_data.Rds), which include annotations, prediction scores, and a UMAP visualization. The median number of detected transcripts/cell is 249, which does create uncertainty for the annotation process. + +```{r load} +nano.obj <- LoadNanostring(data.dir = "/brahms/hartmana/spatial_vignette_data/nanostring/lung5_rep1", fov="lung5.rep1") +``` + +```{r integration} +# add in precomputed Azimuth annotations +azimuth.data <- readRDS("/brahms/hartmana/spatial_vignette_data/nanostring_data.Rds") +nano.obj <- AddMetaData(nano.obj, metadata = azimuth.data$annotations) +nano.obj[["proj.umap"]] <- azimuth.data$umap +Idents(nano.obj) <- nano.obj$predicted.annotation.l1 + +# set to avoid error exceeding max allowed size of globals +options(future.globals.maxSize = 8000 * 1024^2) +nano.obj <- SCTransform(nano.obj, assay = "Nanostring", clip.range = c(-10, 10), verbose = FALSE) + +# text display of annotations and prediction scores +head(slot(object = nano.obj, name = "meta.data")[2:5]) +``` + +We can visualize the Nanostring cells and annotations, projected onto the reference-defined UMAP. Note that for this NSCLC sample, tumor samples are annotated as 'basal', which is the closest cell type match in the healthy reference. + +```{r, fig.width=9, fig.height=4} +DimPlot(nano.obj) +``` + +## Visualization of cell type and expression localization patterns + +As in the previous example, `ImageDimPlot()` plots c ells based on their spatial locations, and colors them based on their assigned cell type. Notice that the basal cell population (tumor cells) is tightly spatially organized, as expected. + +```{r, fig.width=11, fig.height=7} +ImageDimPlot(nano.obj, fov = "lung5.rep1", axes = TRUE, cols = "glasbey") +``` + +Since there are many cell types present, we can highlight the localization of a few select groups. + +```{r, fig.width=10, fig.height=7} +ImageDimPlot(nano.obj, fov = "lung5.rep1", cells = WhichCells(nano.obj, idents=c("Basal", "Macrophage", "Smooth Muscle", "CD4 T")), cols=c("red", "green", "blue", "orange"), size = 0.6) +``` + +We can also visualize gene expression markers a few different ways: + +```{r, fig.width=10, fig.height=5} +VlnPlot(nano.obj, features = "KRT17", slot = "counts", pt.size = 0.1, y.max = 30) + NoLegend() +``` + +```{r, fig.width=5, fig.height=4} +FeaturePlot(nano.obj, features = "KRT17") +``` + +```{r, fig.height=4, fig.width=8} +p1 <- ImageFeaturePlot(nano.obj, fov = "lung5.rep1", features = "KRT17", max.cutoff = "q95") +p2 <- ImageDimPlot(nano.obj, fov = "lung5.rep1", alpha = 0.3, molecules = "KRT17", nmols = 10000) + NoLegend() +p1 + p2 +``` + +We can plot molecules in order to co-visualize the expression of multiple markers, including KRT17 (basal cells), C1QA (macrophages), IL7R (T cells), and TAGLN (Smooth muscle cells). + +```{r, fig.width=10, fig.height=7} +# Plot some of the molecules which seem to display spatial correlation with each other +ImageDimPlot(nano.obj, fov = "lung5.rep1", group.by = NA, alpha = 0.3, molecules = c("KRT17", "C1QA", "IL7R", "TAGLN"), nmols = 20000) +``` + +We zoom in on one basal-rich region using the `Crop()` function. Once zoomed-in, we can visualize individual cell boundaries as well in all visualizations. + +```{r} +basal.crop <- Crop(nano.obj[["lung5.rep1"]], x = c(159500, 164000), y = c(8700, 10500)) +nano.obj[["zoom1"]] <- basal.crop +DefaultBoundary(nano.obj[["zoom1"]]) <- "segmentation" +``` + +```{r, fig.width=11, fig.height=7} +ImageDimPlot(nano.obj, fov = "zoom1", cols = "polychrome", coord.fixed = FALSE) +``` + +```{r, fig.width=11, fig.height=7} +# note the clouds of TPSAB1 molecules denoting mast cells +ImageDimPlot(nano.obj, fov = "zoom1", cols = "polychrome", alpha = 0.3, molecules = c("KRT17", "IL7R", "TPSAB1"), mols.size = 0.3, nmols = 20000, border.color = "black", coord.fixed = FALSE) +``` + +# Human Lymph Node: Akoya CODEX system + +This dataset was produced using Akoya CODEX system. The CODEX system performs multiplexed spatially-resolved protein profiling, iteratively visualizing antibody-binding events. The dataset here represents a tissue section from a human lymph node, and was generated by the University of Florida as part of the Human Biomolecular Atlas Program (HuBMAP). More information about the sample and experiment is available [here](https://portal.hubmapconsortium.org/browse/dataset/c95d9373d698faf60a66ffdc27499fe1). The protein panel in this dataset consists of 28 markers, and protein intensities were quantified as part of the Akoya processor pipeline, which outputs a CSV file providing the intensity of each marker in each cell, as well as the cell coordinates. The file is available for public download via Globus [here](https://app.globus.org/file-manager?origin_id=af603d86-eab9-4eec-bb1d-9d26556741bb&origin_path=%2Fc95d9373d698faf60a66ffdc27499fe1%2Fdrv_CX_20-008_lymphnode_n10_reg001%2Fprocessed_2020-12-2320-008LNn10r001%2Fsegm%2Fsegm-1%2Ffcs%2Fcompensated%2F). + + +First, we load in the data of a HuBMAP dataset using the `LoadAkoya()` function in Seurat: + +```{r} +codex.obj <- LoadAkoya( + filename = "/brahms/hartmana/spatial_vignette_data/LN7910_20_008_11022020_reg001_compensated.csv", + type = "processor", + fov = "HBM754.WKLP.262" +) +``` + +We can now run unsupervised analysis to identify cell clusters. To normalize the protein data, we use centered log-ratio based normalization, as we typically apply to the protein modality of CITE-seq data. We then run dimensional reduction and graph-based clustering. + +```{r} +codex.obj <- NormalizeData(object = codex.obj, normalization.method = "CLR", margin = 2) +codex.obj <- ScaleData(codex.obj) +VariableFeatures(codex.obj) <- rownames(codex.obj) # since the panel is small, treat all features as variable. +codex.obj <- RunPCA(object = codex.obj, npcs = 20, verbose = FALSE) +codex.obj <- RunUMAP(object = codex.obj, dims = 1:20, verbose = FALSE) +codex.obj <- FindNeighbors(object = codex.obj, dims = 1:20, verbose = FALSE) +codex.obj <- FindClusters(object = codex.obj, verbose = FALSE, resolution = 0.4, n.start = 1) +``` + +We can visualize the cell clusters based on a protein intensity-based UMAP embedding, or also based on their spatial location. + +```{r} +DimPlot(codex.obj, label = TRUE, label.box = TRUE) + NoLegend() +``` + +```{r, fig.width=6, fig.height=5} +ImageDimPlot(codex.obj, cols = "parade") +``` + +The expression patters of individual markers clearly denote different cell types and spatial structures, including Lyve1 (lymphatic endothelial cells), CD34 (blood endothelial cells), and CD21 (B cells). As expected, endothelial cells group together into vessels, and B cells are key components of specialized microstructures known as germinal zones. You can read more about protein markers in this dataset, as well as cellular networks in human lynmphatic tissues, in this [preprint](https://www.biorxiv.org/content/10.1101/2021.10.20.465151v1.full). + +```{r, fig.width=9, fig.height=8} +p1 <- ImageFeaturePlot(codex.obj, fov = "HBM754.WKLP.262", features = c("CD34", "CD21", "Lyve1"), min.cutoff = "q10", max.cutoff = "q90") +p2 <- ImageDimPlot(codex.obj, fov = "HBM754.WKLP.262", cols = "parade") +p1 + p2 +``` + +Each of these datasets represents an opportunity to learn organizing principles that govern the spatial localization of different cell types. Stay tuned for future updates to Seurat enabling further exploration and characterization of the relationship between spatial position and molecular state. + +
    + **Session Info** +```{r} +sessionInfo() +``` +
    From ac2873bc09ab4e908bc661849dc3891d62fd6125 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 4 Jan 2023 14:27:45 -0500 Subject: [PATCH 340/979] v3 pseudo bulk --- R/utilities.R | 84 +++++++++++++++++++++++---------------------------- 1 file changed, 38 insertions(+), 46 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index 1d6a061c5..115f26d8c 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -367,9 +367,9 @@ AggregateExpression <- function( ... ) { return( - PseudobulkExpression( + AverageExpression( object = object, - pb.method = 'aggregate', + method = 'aggregate', assays = assays, features = features, return.seurat = return.seurat, @@ -478,26 +478,31 @@ AverageExpression <- function( category.matrix <- CreateCategoryMatrix(labels = data, method = method) data.return <- list() for (i in 1:length(x = assays)) { - data.return[[i]] <- PseudobulkExpression( - object = object, - method = 'average', - assays = assays[i], - features = features, - group.by = group.by, - add.ident = add.ident, - slot = slot, + if (inherits(x = features, what = "list")) { + features.i <- features[[i]] + } else { + features.i <- features + } + data.return[[assays[i]]] <- PseudobulkExpression.Assay( + object = object[[assays[i]]], + assay = assays[i], + category.matrix = category.matrix, + features = features.i, + slot = slot[i], verbose = verbose, ... ) } if (return.seurat) { + op <- options(Seurat.object.assay.version = "v3", Seurat.object.assay.calcn = FALSE) + on.exit(expr = options(op), add = TRUE) if (slot[1] == 'scale.data') { - na.matrix <- as.matrix(x = as.madata.return[[1]]) + na.matrix <- as.matrix(x = data.return[[1]]) na.matrix[1:length(x = na.matrix)] <- NA toRet <- CreateSeuratObject( counts = na.matrix, - project = if (pb.method == "average") "Average" else "Aggregate", + project = if (method == "average") "Average" else "Aggregate", assay = names(x = data.return)[1], check.matrix = FALSE, ... @@ -523,7 +528,7 @@ AverageExpression <- function( } else { toRet <- CreateSeuratObject( counts = data.return[[1]], - project = if (pb.method == "average") "Average" else "Aggregate", + project = if (method == "average") "Average" else "Aggregate", assay = names(x = data.return)[1], check.matrix = FALSE, ... @@ -584,7 +589,9 @@ AverageExpression <- function( return(category.matrix[,x, drop = FALSE ]@i[1] + 1) } ) - Idents(object = toRet) <- Idents(object = object)[first.cells] + Idents(object = toRet, + cells = colnames(x = toRet) + ) <- Idents(object = object)[first.cells] } return(toRet) } else { @@ -1326,7 +1333,7 @@ PercentageFeatureSet <- function( # Returns a representative expression value for each identity class # # @param object Seurat object -# @param pb.method Whether to 'average' (default) or 'aggregate' expression levels +# @param method Whether to 'average' (default) or 'aggregate' expression levels # @param assays Which assays to use. Default is all assays # @param features Features to analyze. Default is all features in the assay # @param return.seurat Whether to return the data as a Seurat object. Default is FALSE @@ -1349,69 +1356,54 @@ PercentageFeatureSet <- function( # data("pbmc_small") # head(PseudobulkExpression(object = pbmc_small)) # -PseudobulkExpression <- function( +PseudobulkExpression.Assay <- function( object, - pb.method = 'average', - assays = NULL, + assay, + category.matrix, features = NULL, - return.seurat = FALSE, - group.by = 'ident', - add.ident = NULL, slot = 'data', verbose = TRUE, ... ) { - - - - for (i in 1:length(x = assays)) { data.use <- GetAssayData( - object = object, - assay = assays[i], - slot = slot[i] + object = object, + slot = slot ) features.to.avg <- features %||% rownames(x = data.use) - if (inherits(x = features, what = "list")) { - features.to.avg <- features[i] - } if (IsMatrixEmpty(x = data.use)) { warning( - "The ", slot[i], " slot for the ", assays[i], + "The ", slot, " slot for the ", assay, " assay is empty. Skipping assay.", immediate. = TRUE, call. = FALSE) - next + return(NULL) } bad.features <- setdiff(x = features.to.avg, y = rownames(x = data.use)) if (length(x = bad.features) > 0) { warning( "The following ", length(x = bad.features), - " features were not found in the ", assays[i], " assay: ", + " features were not found in the ", assay, " assay: ", paste(bad.features, collapse = ", "), call. = FALSE, immediate. = TRUE) } features.assay <- intersect(x = features.to.avg, y = rownames(x = data.use)) if (length(x = features.assay) > 0) { data.use <- data.use[features.assay, ] } else { - warning("None of the features specified were found in the ", assays[i], + warning("None of the features specified were found in the ", assay, " assay.", call. = FALSE, immediate. = TRUE) - next + return(NULL) } - if (slot[i] == 'data') { + if (slot == 'data') { data.use <- expm1(x = data.use) if (any(data.use == Inf)) { warning("Exponentiation yielded infinite values. `data` may not be log-normed.") } } - if (inherits(x = data.use, what = 'DelayedArray')) { - data.return[[i]] <- tcrossprod_DelayedAssay(x = data.use, y = t(category.matrix)) - } else { - browser() - data.return[[i]] <- data.use %*% category.matrix - } - names(x = data.return)[i] <- assays[[i]] - } + data.return <- data.use %*% category.matrix + return(data.return) + } + #' Regroup idents based on meta.data info #' #' For cells in each ident, set a new identity based on the most common value @@ -2617,7 +2609,7 @@ SweepNonzero <- function( CreateCategoryMatrix <- function( labels, - method = c('sum', 'average'), + method = c('aggregate', 'average'), cells.name = NULL ) { method <- match.arg(arg = method) From 6c08356a081e98b1e45e3f35785e5f924d032419 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 4 Jan 2023 14:55:57 -0500 Subject: [PATCH 341/979] generic PseudobulkExpression --- DESCRIPTION | 2 +- NAMESPACE | 3 ++ R/generics.R | 16 ++++++++++ R/utilities.R | 64 ++++++++++++++++++++++++++++++++------ man/AverageExpression.Rd | 3 +- man/FeaturePlot.Rd | 8 +++-- man/IntegrateData.Rd | 6 ++-- man/IntegrateEmbeddings.Rd | 6 ++-- man/PolyFeaturePlot.Rd | 5 +-- man/Seurat-package.Rd | 2 +- man/reexports.Rd | 2 +- 11 files changed, 93 insertions(+), 24 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 933328420..e02c8655b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -105,7 +105,7 @@ Collate: 'sketching.R' 'tree.R' 'utilities.R' -RoxygenNote: 7.2.2 +RoxygenNote: 7.2.3 Encoding: UTF-8 Suggests: ape, diff --git a/NAMESPACE b/NAMESPACE index 58a33cda3..0a4089276 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -85,6 +85,8 @@ S3method(ProjectCellEmbeddings,default) S3method(ProjectUMAP,DimReduc) S3method(ProjectUMAP,Seurat) S3method(ProjectUMAP,default) +S3method(PseudobulkExpression,Assay) +S3method(PseudobulkExpression,StdAssay) S3method(Radius,STARmap) S3method(Radius,SlideSeq) S3method(Radius,VisiumV1) @@ -341,6 +343,7 @@ export(ProjectCellEmbeddings) export(ProjectDim) export(ProjectDimReduc) export(ProjectUMAP) +export(PseudobulkExpression) export(PurpleAndYellow) export(RPCAIntegration) export(Radius) diff --git a/R/generics.R b/R/generics.R index 6d0c54072..b02c3de6c 100644 --- a/R/generics.R +++ b/R/generics.R @@ -424,6 +424,22 @@ ProjectUMAP <- function(query, ...) { UseMethod(generic = "ProjectUMAP", object = query) } +#' Pseudobulk Expression +#' +#' Normalize the count data present in a given assay. +#' +#' @param object An assay +#' @param ... Arguments passed to other methods +#' +#' @return Returns object after normalization +#' +#' @rdname PseudobulkExpression +#' @export PseudobulkExpression +#' +PseudobulkExpression <- function(object, ...) { + UseMethod(generic = "PseudobulkExpression", object = object) +} + #' Perform Canonical Correlation Analysis #' #' Runs a canonical correlation analysis using a diagonal implementation of CCA. diff --git a/R/utilities.R b/R/utilities.R index 115f26d8c..9911bd5f0 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -483,7 +483,7 @@ AverageExpression <- function( } else { features.i <- features } - data.return[[assays[i]]] <- PseudobulkExpression.Assay( + data.return[[assays[i]]] <- PseudobulkExpression( object = object[[assays[i]]], assay = assays[i], category.matrix = category.matrix, @@ -493,7 +493,6 @@ AverageExpression <- function( ... ) } - if (return.seurat) { op <- options(Seurat.object.assay.version = "v3", Seurat.object.assay.calcn = FALSE) on.exit(expr = options(op), add = TRUE) @@ -1347,14 +1346,9 @@ PercentageFeatureSet <- function( # # @return Returns a matrix with genes as rows, identity classes as columns. # If return.seurat is TRUE, returns an object of class \code{\link{Seurat}}. +#' @method PseudobulkExpression Assay +#' @export # -#' @importFrom Matrix rowMeans sparse.model.matrix -#' @importFrom stats as.formula -# @export -# -# @examples -# data("pbmc_small") -# head(PseudobulkExpression(object = pbmc_small)) # PseudobulkExpression.Assay <- function( object, @@ -1403,6 +1397,54 @@ PseudobulkExpression.Assay <- function( } +#' @method PseudobulkExpression StdAssay +#' @export +# +# +PseudobulkExpression.StdAssay <- function( + object, + assay, + category.matrix, + features = NULL, + slot = 'data', + verbose = TRUE, + ... +) { + if (slot == 'data') { + message("Assay5 will use arithmetic mean for data slot.") + } + browser() + layers.set <- Layers(object = object, search = slot) + data.use <- GetAssayData( + object = object, + slot = slot + ) + features.to.avg <- features %||% rownames(x = data.use) + if (IsMatrixEmpty(x = data.use)) { + warning( + "The ", slot, " slot for the ", assay, + " assay is empty. Skipping assay.", immediate. = TRUE, call. = FALSE) + return(NULL) + } + bad.features <- setdiff(x = features.to.avg, y = rownames(x = data.use)) + if (length(x = bad.features) > 0) { + warning( + "The following ", length(x = bad.features), + " features were not found in the ", assay, " assay: ", + paste(bad.features, collapse = ", "), call. = FALSE, immediate. = TRUE) + } + features.assay <- intersect(x = features.to.avg, y = rownames(x = data.use)) + if (length(x = features.assay) > 0) { + data.use <- data.use[features.assay, ] + } else { + warning("None of the features specified were found in the ", assay, + " assay.", call. = FALSE, immediate. = TRUE) + return(NULL) + } + + data.return <- data.use %*% category.matrix + return(data.return) +} #' Regroup idents based on meta.data info #' @@ -2605,6 +2647,8 @@ SweepNonzero <- function( #' Create one hot matrix for a given label +#' @importFrom Matrix colSums sparse.model.matrix +#' @importFrom stats as.formula #' @export CreateCategoryMatrix <- function( @@ -2655,7 +2699,7 @@ CreateCategoryMatrix <- function( colsums <- colsums[colsums > 0] if (method =='average') { - category.matrix <- Seurat:::SweepNonzero( + category.matrix <- SweepNonzero( x = category.matrix, MARGIN = 2, STATS = colsums, diff --git a/man/AverageExpression.Rd b/man/AverageExpression.Rd index ca481cb3d..f4b2f8ff9 100644 --- a/man/AverageExpression.Rd +++ b/man/AverageExpression.Rd @@ -11,7 +11,8 @@ AverageExpression( return.seurat = FALSE, group.by = "ident", add.ident = NULL, - slot = "data", + slot = "counts", + method = "average", verbose = TRUE, ... ) diff --git a/man/FeaturePlot.Rd b/man/FeaturePlot.Rd index 8df3185d9..6778ee488 100644 --- a/man/FeaturePlot.Rd +++ b/man/FeaturePlot.Rd @@ -10,8 +10,12 @@ FeaturePlot( features, dims = c(1, 2), cells = NULL, - cols = if (blend) { c("lightgrey", "#ff0000", "#00ff00") } else { - c("lightgrey", "blue") }, + cols = if (blend) { + c("lightgrey", "#ff0000", "#00ff00") + } else { + + c("lightgrey", "blue") + }, pt.size = NULL, alpha = 1, order = FALSE, diff --git a/man/IntegrateData.Rd b/man/IntegrateData.Rd index c02543005..e08bd682e 100644 --- a/man/IntegrateData.Rd +++ b/man/IntegrateData.Rd @@ -64,10 +64,12 @@ should be encoded in a matrix, where each row represents one of the pairwise integration steps. Negative numbers specify a dataset, positive numbers specify the integration results from a given row (the format of the merge matrix included in the \code{\link{hclust}} function output). For example: -\code{matrix(c(-2, 1, -3, -1), ncol = 2)} gives:\preformatted{ [,1] [,2] +\code{matrix(c(-2, 1, -3, -1), ncol = 2)} gives: + +\if{html}{\out{
    }}\preformatted{ [,1] [,2] [1,] -2 -3 [2,] 1 -1 -} +}\if{html}{\out{
    }} Which would cause dataset 2 and 3 to be integrated first, then the resulting object integrated with dataset 1. diff --git a/man/IntegrateEmbeddings.Rd b/man/IntegrateEmbeddings.Rd index c3f96ffa5..dc0469132 100644 --- a/man/IntegrateEmbeddings.Rd +++ b/man/IntegrateEmbeddings.Rd @@ -75,10 +75,12 @@ should be encoded in a matrix, where each row represents one of the pairwise integration steps. Negative numbers specify a dataset, positive numbers specify the integration results from a given row (the format of the merge matrix included in the \code{\link{hclust}} function output). For example: -\code{matrix(c(-2, 1, -3, -1), ncol = 2)} gives:\preformatted{ [,1] [,2] +\code{matrix(c(-2, 1, -3, -1), ncol = 2)} gives: + +\if{html}{\out{
    }}\preformatted{ [,1] [,2] [1,] -2 -3 [2,] 1 -1 -} +}\if{html}{\out{
    }} Which would cause dataset 2 and 3 to be integrated first, then the resulting object integrated with dataset 1. diff --git a/man/PolyFeaturePlot.Rd b/man/PolyFeaturePlot.Rd index 1eacd0ecd..a2b2fc588 100644 --- a/man/PolyFeaturePlot.Rd +++ b/man/PolyFeaturePlot.Rd @@ -33,10 +33,7 @@ PolyFeaturePlot( \item{ncol}{Number of columns to split the plot into} -\item{min.cutoff}{Vector of minimum and maximum cutoff values for each feature, -may specify quantile in the form of 'q##' where '##' is the quantile (eg, 'q1', 'q10')} - -\item{max.cutoff}{Vector of minimum and maximum cutoff values for each feature, +\item{min.cutoff, max.cutoff}{Vector of minimum and maximum cutoff values for each feature, may specify quantile in the form of 'q##' where '##' is the quantile (eg, 'q1', 'q10')} \item{common.scale}{...} diff --git a/man/Seurat-package.Rd b/man/Seurat-package.Rd index 9b3fc3749..351af75c9 100644 --- a/man/Seurat-package.Rd +++ b/man/Seurat-package.Rd @@ -6,7 +6,7 @@ \alias{Seurat-package} \title{Seurat: Tools for Single Cell Genomics} \description{ -A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. +A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) \doi{10.1038/nbt.3192}, Macosko E, Basu A, Satija R, et al (2015) \doi{10.1016/j.cell.2015.05.002}, Stuart T, Butler A, et al (2019) \doi{10.1016/j.cell.2019.05.031}, and Hao, Hao, et al (2020) \doi{10.1101/2020.10.12.335331} for more details. } \section{Package options}{ diff --git a/man/reexports.Rd b/man/reexports.Rd index 6320f6d47..fa7258d42 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -73,6 +73,6 @@ below to see their documentation. \describe{ \item{generics}{\code{\link[generics]{components}}} - \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}} + \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}} }} From 2a5784c293278d64ecc145402442228f3fc9186c Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 4 Jan 2023 15:29:05 -0500 Subject: [PATCH 342/979] pseudo bulk for assay5 --- R/utilities.R | 51 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 19 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index 9911bd5f0..d0f81f225 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -1413,36 +1413,49 @@ PseudobulkExpression.StdAssay <- function( if (slot == 'data') { message("Assay5 will use arithmetic mean for data slot.") } - browser() layers.set <- Layers(object = object, search = slot) - data.use <- GetAssayData( - object = object, - slot = slot - ) - features.to.avg <- features %||% rownames(x = data.use) - if (IsMatrixEmpty(x = data.use)) { - warning( - "The ", slot, " slot for the ", assay, - " assay is empty. Skipping assay.", immediate. = TRUE, call. = FALSE) - return(NULL) - } - bad.features <- setdiff(x = features.to.avg, y = rownames(x = data.use)) + features.to.avg <- features %||% rownames(x = object) + bad.features <- setdiff(x = features.to.avg, y = rownames(x = object)) if (length(x = bad.features) > 0) { warning( "The following ", length(x = bad.features), " features were not found in the ", assay, " assay: ", paste(bad.features, collapse = ", "), call. = FALSE, immediate. = TRUE) } - features.assay <- intersect(x = features.to.avg, y = rownames(x = data.use)) - if (length(x = features.assay) > 0) { - data.use <- data.use[features.assay, ] - } else { + features.assay <- Reduce( + f = intersect, + x = c(list(features.to.avg), + lapply(X = layers.set, FUN = function(l) rownames(object[[l]])) + ) + ) + if (length(x = features.assay) == 0) { warning("None of the features specified were found in the ", assay, " assay.", call. = FALSE, immediate. = TRUE) return(NULL) } - - data.return <- data.use %*% category.matrix + data.return <- as.sparse( + x = matrix( + data = 0, + nrow = length(x = features.assay), + ncol = ncol(x = category.matrix) + ) + ) + for (i in seq_along(layers.set)) { + data.i <- LayerData(object = object, + layer = layers.set[i], + features = features.assay + ) + category.matrix.i <- category.matrix[colnames(x = data.i),] + if (inherits(x = data.i, what = 'DelayedArray')) { + data.return.i<- tcrossprod_DelayedAssay(x = data.i, y = t(category.matrix.i)) + } else { + data.return.i <- as.sparse(x = data.i %*% category.matrix.i) + } + data.return <- data.return + data.return.i + } + if (slot == 'data') { + data.return <- expm1(x = data.return) + } return(data.return) } From 5dc90f1d4a69d9f1a9f03a94f23468ebae943ed9 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Wed, 4 Jan 2023 15:30:01 -0500 Subject: [PATCH 343/979] temporarily remove bpcells calcn --- NAMESPACE | 1 - R/preprocessing5.R | 20 ++++++++++---------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 5798b4796..25c16914a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,7 +5,6 @@ S3method("SCTResults<-",SCTModel) S3method("[",SlideSeq) S3method("[",VisiumV1) S3method("levels<-",SCTAssay) -S3method(.CalcN,IterableMatrix) S3method(AnnotateAnchors,IntegrationAnchorSet) S3method(AnnotateAnchors,TransferAnchorSet) S3method(AnnotateAnchors,default) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index c1ebb86a9..227641a31 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1005,16 +1005,16 @@ VST.matrix <- function( # Internal #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -#' @method .CalcN IterableMatrix -#' @export -#' -.CalcN.IterableMatrix <- function(object) { - col_stat <- BPCells::matrix_stats(matrix = object, col_stats = 'mean')$col_stats - return(list( - nCount = round(col_stat['mean',] *nrow(object)), - nFeature = col_stat['nonzero',] - )) -} +# #' @method .CalcN IterableMatrix +# #' @export +# #' +# .CalcN.IterableMatrix <- function(object) { +# col_stat <- BPCells::matrix_stats(matrix = object, col_stats = 'mean')$col_stats +# return(list( +# nCount = round(col_stat['mean',] *nrow(object)), +# nFeature = col_stat['nonzero',] +# )) +# } .FeatureVar <- function( data, From b8e8937734c7b826c1943a4c417958ad4c3af378 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Wed, 4 Jan 2023 15:38:49 -0500 Subject: [PATCH 344/979] undo --- NAMESPACE | 1 + R/preprocessing5.R | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 25c16914a..5798b4796 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,6 +5,7 @@ S3method("SCTResults<-",SCTModel) S3method("[",SlideSeq) S3method("[",VisiumV1) S3method("levels<-",SCTAssay) +S3method(.CalcN,IterableMatrix) S3method(AnnotateAnchors,IntegrationAnchorSet) S3method(AnnotateAnchors,TransferAnchorSet) S3method(AnnotateAnchors,default) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 227641a31..c1ebb86a9 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1005,16 +1005,16 @@ VST.matrix <- function( # Internal #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# #' @method .CalcN IterableMatrix -# #' @export -# #' -# .CalcN.IterableMatrix <- function(object) { -# col_stat <- BPCells::matrix_stats(matrix = object, col_stats = 'mean')$col_stats -# return(list( -# nCount = round(col_stat['mean',] *nrow(object)), -# nFeature = col_stat['nonzero',] -# )) -# } +#' @method .CalcN IterableMatrix +#' @export +#' +.CalcN.IterableMatrix <- function(object) { + col_stat <- BPCells::matrix_stats(matrix = object, col_stats = 'mean')$col_stats + return(list( + nCount = round(col_stat['mean',] *nrow(object)), + nFeature = col_stat['nonzero',] + )) +} .FeatureVar <- function( data, From f0f70b2c2b63e1cd28c61d906889dcda8b76d77f Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Wed, 4 Jan 2023 15:55:56 -0500 Subject: [PATCH 345/979] tmp skip presto DE --- R/differential_expression.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/differential_expression.R b/R/differential_expression.R index a30828f45..8497b3a4e 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -2338,7 +2338,7 @@ WilcoxDETest <- function( group.info[cells.1, "group"] <- "Group1" group.info[cells.2, "group"] <- "Group2" group.info[, "group"] <- factor(x = group.info[, "group"]) - if (presto.check[1] && overflow.check) { + if (FALSE) { data.use <- data.use[, names(x = group.info), drop = FALSE] res <- presto::wilcoxauc(X = data.use, y = group.info) res <- res[1:(nrow(x = res)/2),] From e4f284ac8eaf9b947890ad43b6bad5f6080a7bbe Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 4 Jan 2023 18:34:54 -0500 Subject: [PATCH 346/979] cast data and counts --- R/sketching.R | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/R/sketching.R b/R/sketching.R index 5f9cde7e3..27ef92321 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -84,6 +84,7 @@ LeverageScoreSampling <- function( default = TRUE, seed = NA_integer_, cast = NULL, + layers = c('counts', 'data'), ... ) { assay <- assay[1L] %||% DefaultAssay(object = object) @@ -98,13 +99,7 @@ LeverageScoreSampling <- function( } object[[save]] <- NULL } - vars <- grep( - pattern = '^seurat_leverage_score_', - x = names(x = object[[]]), - value = TRUE - ) - names(x = vars) <- vars - vars <- gsub(pattern = '^seurat_leverage_score_', replacement = '', x = vars) + vars <- layers vars <- vars[vars %in% Layers(object = object[[assay]])] if (!length(x = vars)) { stop("No leverage scores found for assay ", assay, call. = FALSE) From 945f8f0da0e391933108bc0602d7ce573e863077 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Thu, 5 Jan 2023 10:01:20 -0500 Subject: [PATCH 347/979] add seurat5 vignettes --- vignettes/seurat5_atacseq_integration_vignette.Rmd | 2 +- vignettes/seurat5_cell_cycle_vignette.Rmd | 2 +- vignettes/seurat5_conversion_vignette.Rmd | 2 +- vignettes/seurat5_de_vignette.Rmd | 2 +- vignettes/seurat5_dim_reduction_vignette.Rmd | 2 +- vignettes/seurat5_future_vignette.Rmd | 2 +- vignettes/seurat5_hashing_vignette.Rmd | 2 +- vignettes/seurat5_integration_introduction.Rmd | 2 +- vignettes/seurat5_integration_large_datasets.Rmd | 2 +- vignettes/seurat5_integration_mapping.Rmd | 2 +- vignettes/seurat5_integration_rpca.Rmd | 2 +- vignettes/seurat5_interaction_vignette.Rmd | 2 +- vignettes/seurat5_merge_vignette.Rmd | 2 +- vignettes/seurat5_mixscape_vignette.Rmd | 2 +- vignettes/seurat5_multimodal_reference_mapping.Rmd | 2 +- vignettes/seurat5_multimodal_vignette.Rmd | 2 +- vignettes/seurat5_pbmc3k_tutorial.Rmd | 2 +- vignettes/seurat5_sctransform_v2_vignette.Rmd | 2 +- vignettes/seurat5_sctransform_vignette.Rmd | 2 +- vignettes/seurat5_spatial_vignette.Rmd | 2 +- vignettes/seurat5_spatial_vignette_2.Rmd | 3 ++- vignettes/seurat5_visualization_vignette.Rmd | 2 +- vignettes/seurat5_weighted_nearest_neighbor_analysis.Rmd | 2 +- vignettes/weighted_nearest_neighbor_analysis.Rmd | 2 +- 24 files changed, 25 insertions(+), 24 deletions(-) diff --git a/vignettes/seurat5_atacseq_integration_vignette.Rmd b/vignettes/seurat5_atacseq_integration_vignette.Rmd index 4682479cf..0d4d2b1c7 100644 --- a/vignettes/seurat5_atacseq_integration_vignette.Rmd +++ b/vignettes/seurat5_atacseq_integration_vignette.Rmd @@ -224,7 +224,7 @@ DimPlot(coembed, group.by = c("orig.ident","seurat_annotations")) ``` ```{r save.times, include = TRUE} -write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/atacseq_integration_vignette.csv") +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/seurat5_atacseq_integration_vignette.csv") ```
    diff --git a/vignettes/seurat5_cell_cycle_vignette.Rmd b/vignettes/seurat5_cell_cycle_vignette.Rmd index 434af8f93..d67e2fed5 100644 --- a/vignettes/seurat5_cell_cycle_vignette.Rmd +++ b/vignettes/seurat5_cell_cycle_vignette.Rmd @@ -139,7 +139,7 @@ DimPlot(marrow) ``` ```{r save.times, include = FALSE} -write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/cell_cycle_vignette_times.csv") +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/seurat5_cell_cycle_vignette_times.csv") ```
    diff --git a/vignettes/seurat5_conversion_vignette.Rmd b/vignettes/seurat5_conversion_vignette.Rmd index b0e5c8a12..5e69fbb66 100644 --- a/vignettes/seurat5_conversion_vignette.Rmd +++ b/vignettes/seurat5_conversion_vignette.Rmd @@ -121,7 +121,7 @@ For more details about interacting with loom files in R and Seurat, please see [ Many thanks to [Davis McCarthy](https://twitter.com/davisjmcc?ref_src=twsrc%5Egoogle%7Ctwcamp%5Eserp%7Ctwgr%5Eauthor) and [Alex Wolf](https://twitter.com/falexwolf) for their help in drafting the conversion functions. ```{r save.times, include = FALSE} -write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/conversion_vignette_times.csv") +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/seurat5_conversion_vignette_times.csv") ```
    diff --git a/vignettes/seurat5_de_vignette.Rmd b/vignettes/seurat5_de_vignette.Rmd index 7aa962979..97ba236cf 100644 --- a/vignettes/seurat5_de_vignette.Rmd +++ b/vignettes/seurat5_de_vignette.Rmd @@ -132,7 +132,7 @@ head(FindMarkers(pbmc, ident.1 = "CD14+ Mono", ident.2 = "FCGR3A+ Mono", test.us We thank the authors of the MAST and DESeq2 packages for their kind assistance and advice. We also point users to the following [study](https://www.nature.com/articles/nmeth.4612) by Charlotte Soneson and Mark Robinson, which performs careful and extensive evaluation of methods for single cell differential expression testing. ```{r save.times, include = FALSE} -write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/de_vignette_times.csv") +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/seurat5_de_vignette_times.csv") ```
    diff --git a/vignettes/seurat5_dim_reduction_vignette.Rmd b/vignettes/seurat5_dim_reduction_vignette.Rmd index 7c193e94a..6a3b624ad 100644 --- a/vignettes/seurat5_dim_reduction_vignette.Rmd +++ b/vignettes/seurat5_dim_reduction_vignette.Rmd @@ -105,7 +105,7 @@ ggsave(filename = "../output/images/pbmc_mds.jpg", height = 7, width = 12, plot ``` ```{r save.times, include = FALSE} -write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/dim_reduction_vignette_times.csv") +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/seurat5_dim_reduction_vignette_times.csv") ```
    diff --git a/vignettes/seurat5_future_vignette.Rmd b/vignettes/seurat5_future_vignette.Rmd index cdbddf3ad..98b57ea8e 100644 --- a/vignettes/seurat5_future_vignette.Rmd +++ b/vignettes/seurat5_future_vignette.Rmd @@ -122,7 +122,7 @@ For certain functions, each worker needs access to certain global variables. If ```{r save.times, include = FALSE} -write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/future_vignette_times.csv") +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/seurat5_future_vignette_times.csv") ```
    diff --git a/vignettes/seurat5_hashing_vignette.Rmd b/vignettes/seurat5_hashing_vignette.Rmd index 5893cbac5..5321d8062 100644 --- a/vignettes/seurat5_hashing_vignette.Rmd +++ b/vignettes/seurat5_hashing_vignette.Rmd @@ -280,7 +280,7 @@ DimPlot(hto12) + NoLegend() ``` ```{r save.times, include = TRUE} -write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/hashing_vignette_times.csv") +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/seurat5_hashing_vignette_times.csv") ```
    diff --git a/vignettes/seurat5_integration_introduction.Rmd b/vignettes/seurat5_integration_introduction.Rmd index 43e828531..904e4a71f 100644 --- a/vignettes/seurat5_integration_introduction.Rmd +++ b/vignettes/seurat5_integration_introduction.Rmd @@ -258,7 +258,7 @@ p1 + p2 Now that the datasets have been integrated, you can follow the previous steps in this vignette identify cell types and cell type-specific responses. ```{r save.times, include=TRUE} -write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/integration_introduction.csv") +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/seurat5_integration_introduction.csv") ```
    diff --git a/vignettes/seurat5_integration_large_datasets.Rmd b/vignettes/seurat5_integration_large_datasets.Rmd index 8801c7faa..24eb3ad23 100644 --- a/vignettes/seurat5_integration_large_datasets.Rmd +++ b/vignettes/seurat5_integration_large_datasets.Rmd @@ -108,7 +108,7 @@ ggsave(filename = "../output/images/bm280k_integrated.jpg", height = 7, width = ``` ```{r save.times, include=TRUE} -write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/integration_large_datasets.csv") +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/seurat5_integration_large_datasets.csv") ```
    diff --git a/vignettes/seurat5_integration_mapping.Rmd b/vignettes/seurat5_integration_mapping.Rmd index ee52e8d36..c660b0e28 100644 --- a/vignettes/seurat5_integration_mapping.Rmd +++ b/vignettes/seurat5_integration_mapping.Rmd @@ -203,7 +203,7 @@ p1 + p2 ``` ```{r save.times, include=TRUE} -write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/integration_reference_mapping.csv") +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/seurat5_integration_reference_mapping.csv") ```
    diff --git a/vignettes/seurat5_integration_rpca.Rmd b/vignettes/seurat5_integration_rpca.Rmd index 9505be734..d78236ddd 100644 --- a/vignettes/seurat5_integration_rpca.Rmd +++ b/vignettes/seurat5_integration_rpca.Rmd @@ -176,7 +176,7 @@ p1 + p2 ``` ```{r save.times, include=TRUE} -write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/integration_rpca.csv") +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/seurat5_integration_rpca.csv") ```
    diff --git a/vignettes/seurat5_interaction_vignette.Rmd b/vignettes/seurat5_interaction_vignette.Rmd index 50770bd9d..7e812f461 100644 --- a/vignettes/seurat5_interaction_vignette.Rmd +++ b/vignettes/seurat5_interaction_vignette.Rmd @@ -139,7 +139,7 @@ DoHeatmap(cluster.averages, features = unlist(TopFeatures(pbmc[['pca']], balance ``` ```{r save.times, include=TRUE} -write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/interaction_vignette_times.csv") +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/seurat5_interaction_vignette_times.csv") ```
    diff --git a/vignettes/seurat5_merge_vignette.Rmd b/vignettes/seurat5_merge_vignette.Rmd index f5a6f585d..84814bb7f 100644 --- a/vignettes/seurat5_merge_vignette.Rmd +++ b/vignettes/seurat5_merge_vignette.Rmd @@ -99,7 +99,7 @@ GetAssayData(pbmc.normalized)[1:10, 1:15] ``` ```{r save.times, include=TRUE} -write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/merge_vignette_times.csv") +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/seurat5_merge_vignette_times.csv") ```
    diff --git a/vignettes/seurat5_mixscape_vignette.Rmd b/vignettes/seurat5_mixscape_vignette.Rmd index e59d80e83..0ee06edd5 100644 --- a/vignettes/seurat5_mixscape_vignette.Rmd +++ b/vignettes/seurat5_mixscape_vignette.Rmd @@ -361,7 +361,7 @@ p2 ``` ```{r save.times, include=TRUE} -write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/mixscape_vignette_times.csv") +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/seurat5_mixscape_vignette_times.csv") ```
    diff --git a/vignettes/seurat5_multimodal_reference_mapping.Rmd b/vignettes/seurat5_multimodal_reference_mapping.Rmd index bda439eaa..cac375e58 100644 --- a/vignettes/seurat5_multimodal_reference_mapping.Rmd +++ b/vignettes/seurat5_multimodal_reference_mapping.Rmd @@ -383,7 +383,7 @@ p3 / p4 / p5 ``` ```{r save.times, include=TRUE} -write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/reference_mapping_times.csv") +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/seurat5_reference_mapping_times.csv") ```
    diff --git a/vignettes/seurat5_multimodal_vignette.Rmd b/vignettes/seurat5_multimodal_vignette.Rmd index ad8b52a12..1b09a3821 100644 --- a/vignettes/seurat5_multimodal_vignette.Rmd +++ b/vignettes/seurat5_multimodal_vignette.Rmd @@ -218,7 +218,7 @@ ggsave(filename = "../output/images/citeseq_plot.jpg", height = 7, width = 12, p ``` ```{r save.times, include=TRUE} -write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/multimodal_vignette_times.csv") +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/seurat5_multimodal_vignette_times.csv") ``` # Additional functionality for multimodal data in Seurat diff --git a/vignettes/seurat5_pbmc3k_tutorial.Rmd b/vignettes/seurat5_pbmc3k_tutorial.Rmd index 78e6f32ff..d6d73c31a 100644 --- a/vignettes/seurat5_pbmc3k_tutorial.Rmd +++ b/vignettes/seurat5_pbmc3k_tutorial.Rmd @@ -384,7 +384,7 @@ saveRDS(pbmc, file = "../data/pbmc3k_final.rds") ``` ```{r save.times, include=TRUE} -write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/pbmc3k_tutorial_times.csv") +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/seurat5_pbmc3k_tutorial_times.csv") ```
    diff --git a/vignettes/seurat5_sctransform_v2_vignette.Rmd b/vignettes/seurat5_sctransform_v2_vignette.Rmd index 5e8795cf8..3a6ba3faf 100644 --- a/vignettes/seurat5_sctransform_v2_vignette.Rmd +++ b/vignettes/seurat5_sctransform_v2_vignette.Rmd @@ -222,7 +222,7 @@ head(nk.markers) ``` ```{r save.times, include=TRUE} -write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/sctransform2.csv") +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/seurat5_sctransform2.csv") ```
    diff --git a/vignettes/seurat5_sctransform_vignette.Rmd b/vignettes/seurat5_sctransform_vignette.Rmd index 86a5ae17a..1fd6af556 100644 --- a/vignettes/seurat5_sctransform_vignette.Rmd +++ b/vignettes/seurat5_sctransform_vignette.Rmd @@ -145,7 +145,7 @@ FeaturePlot(pbmc, features = c("CD3D", "ISG15", "TCL1A", "FCER2", "XCL1", "FCGR3 ```{r save.times, include=TRUE} -write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/sctransform_vignette_times.csv") +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/seurat5_sctransform_vignette_times.csv") ```
    diff --git a/vignettes/seurat5_spatial_vignette.Rmd b/vignettes/seurat5_spatial_vignette.Rmd index 7070b8115..8252642d5 100644 --- a/vignettes/seurat5_spatial_vignette.Rmd +++ b/vignettes/seurat5_spatial_vignette.Rmd @@ -463,7 +463,7 @@ SpatialFeaturePlot(slide.seq, features = head(SpatiallyVariableFeatures(slide.se ``` ```{r save.times, include=TRUE} -write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/spatial_vignette_times.csv") +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/seurat5_spatial_vignette_times.csv") ```
    diff --git a/vignettes/seurat5_spatial_vignette_2.Rmd b/vignettes/seurat5_spatial_vignette_2.Rmd index 86779b7d3..90bbee2a3 100644 --- a/vignettes/seurat5_spatial_vignette_2.Rmd +++ b/vignettes/seurat5_spatial_vignette_2.Rmd @@ -26,7 +26,8 @@ knitr::opts_chunk$set( tidy.opts = list(width.cutoff = 95), message = FALSE, warning = FALSE, - time_it = TRUE + time_it = TRUE, + error = TRUE ) ``` diff --git a/vignettes/seurat5_visualization_vignette.Rmd b/vignettes/seurat5_visualization_vignette.Rmd index 2b8e8bdf6..98fc2eb09 100644 --- a/vignettes/seurat5_visualization_vignette.Rmd +++ b/vignettes/seurat5_visualization_vignette.Rmd @@ -240,7 +240,7 @@ plot1 + plot2 ``` ```{r save.times, include=TRUE} -write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/visualization_vignette_times.csv") +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/seurat5_visualization_vignette_times.csv") ```
    diff --git a/vignettes/seurat5_weighted_nearest_neighbor_analysis.Rmd b/vignettes/seurat5_weighted_nearest_neighbor_analysis.Rmd index c6b6b08bf..9d4b17055 100644 --- a/vignettes/seurat5_weighted_nearest_neighbor_analysis.Rmd +++ b/vignettes/seurat5_weighted_nearest_neighbor_analysis.Rmd @@ -431,7 +431,7 @@ head(topTFs("Plasma"), 3) ```{r save.times, include=TRUE} -# write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/weighted_nearest_neighbor_analysis_times.csv") +# write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/seurat5_weighted_nearest_neighbor_analysis_times.csv") print("done") ``` diff --git a/vignettes/weighted_nearest_neighbor_analysis.Rmd b/vignettes/weighted_nearest_neighbor_analysis.Rmd index 4c207c7d0..5566c05ab 100644 --- a/vignettes/weighted_nearest_neighbor_analysis.Rmd +++ b/vignettes/weighted_nearest_neighbor_analysis.Rmd @@ -215,7 +215,7 @@ pbmc[["ATAC"]] <- chrom_assay We perform basic QC based on the number of detected molecules for each modality as well as mitochondrial percentage. ```{r QCObject, fig.width=10} -VlnPlot(pbmc, features = c("nCount_ATAC", "nCount_RNA","percent.mt"), ncol = 3, +VlnPlot(pbmc, features = c("nCount_ATAC", "nCount_RNA", "percent.mt"), ncol = 3, log = TRUE, pt.size = 0) + NoLegend() pbmc <- subset( From 556e6aff7fc4267fa06c5d241a19a8df4731dfc1 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Thu, 5 Jan 2023 15:07:50 -0500 Subject: [PATCH 348/979] add FetchData for VisiumV1 class --- DESCRIPTION | 4 ++-- NAMESPACE | 1 + R/objects.R | 22 ++++++++++++++++++++++ 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 3ac9154bc..0f0fe4e25 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9015 -Date: 2023-01-04 +Version: 4.9.9.9016 +Date: 2023-01-05 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( diff --git a/NAMESPACE b/NAMESPACE index 5798b4796..d10501c6e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -16,6 +16,7 @@ S3method(Cells,SlideSeq) S3method(Cells,VisiumV1) S3method(Features,SCTAssay) S3method(Features,SCTModel) +S3method(FetchData,VisiumV1) S3method(FindClusters,Seurat) S3method(FindClusters,default) S3method(FindMarkers,Assay) diff --git a/R/objects.R b/R/objects.R index 60eb261eb..2137b8ef5 100644 --- a/R/objects.R +++ b/R/objects.R @@ -1985,6 +1985,28 @@ ScaleFactors.VisiumV1 <- function(object, ...) { return(slot(object = object, name = 'scale.factors')) } +#' @rdname FetchData +#' @method FetchData VisiumV1 +#' @export +#' @concept spatial +#' +FetchData.VisiumV1 <- function( + object, + vars, + cells = NULL, + ... +) { + if (is.numeric(x = cells)) { + cells <- Cells(x = object)[cells] + } else if (is.null(x = cells)) { + cells <- Cells(x = object) + } + vars.unkeyed <- gsub(pattern = paste0('^', Key(object)), replacement = '', x = vars) + coords <- GetTissueCoordinates(object = object)[cells, vars.unkeyed, drop = FALSE] + colnames(x = coords) <- vars + return(coords) +} + #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Methods for R-defined generics #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% From d7e78c748170bdf32b16da8d1e49a2addf3ec707 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Thu, 5 Jan 2023 15:14:01 -0500 Subject: [PATCH 349/979] Fixes for FetchSCTResidual --- R/preprocessing5.R | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index c1ebb86a9..b5e906293 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -365,7 +365,7 @@ LogNormalize.IterableMatrix <- function( data <- BPCells::t(BPCells::t(data) / colSums(data)) # Log normalization data <- log1p(data * scale.factor) - return(data) + return(data) } #' @method LogNormalize TileDBMatrix #' @export @@ -800,7 +800,7 @@ VST.default <- function( #' @rdname VST #' @method VST IterableMatrix #' @export -#' +#' VST.IterableMatrix <-function( data, nselect = 2000L, @@ -1700,7 +1700,7 @@ SCTransform.StdAssay <- function( #' FetchResiduals <- function(object, features, - assay = "SCT", + assay = NULL, umi.assay = "RNA", layer = "counts", clip.range = NULL, @@ -1867,17 +1867,24 @@ FetchResidualSCTModel <- function(object, clip.range = NULL, replace.value = FALSE, verbose = FALSE) { - clip.range <- clip.range %||% SCTResults(object = object[[assay]], slot = "clips", model = SCTModel)$sct + model.cells <- character() + model.features <- Features(x = obj.query, layer = layer) + if (is.null(x = reference.SCT.model)){ + clip.range <- clip.range %||% SCTResults(object = object[[assay]], slot = "clips", model = SCTModel)$sct + model.features <- rownames(x = SCTResults(object = object[[assay]], slot = "feature.attributes", model = SCTModel)) + model.cells <- Cells(x = slot(object = object[[assay]], name = "SCTModel.list")[[SCTModel]]) + sct.method <- SCTResults(object = object[[assay]], slot = "arguments", model = SCTModel)$sct.method %||% "default" + } - model.features <- rownames(x = SCTResults(object = object[[assay]], slot = "feature.attributes", model = SCTModel)) - model.cells <- Cells(x = slot(object = object[[assay]], name = "SCTModel.list")[[SCTModel]]) - sct.method <- SCTResults(object = object[[assay]], slot = "arguments", model = SCTModel)$sct.method %||% "default" layer.cells <- layer.cells %||% Cells(x = object[[umi.assay]], layer = layer) if (!is.null(reference.SCT.model)) { # use reference SCT model sct.method <- "reference" } - existing.scale.data <- suppressWarnings(GetAssayData(object = object, assay = assay, slot = "scale.data")) + existing.scale.data <- NULL + if (is.null(x=reference.SCT.model)){ + existing.scale.data <- suppressWarnings(GetAssayData(object = object, assay = assay, slot = "scale.data")) + } scale.data.cells <- colnames(x = existing.scale.data) scale.data.cells.common <- intersect(scale.data.cells, layer.cells) scale.data.cells <- intersect(x = scale.data.cells, y = scale.data.cells.common) @@ -1904,7 +1911,7 @@ FetchResidualSCTModel <- function(object, return (existing.scale.data[intersect(x = rownames(x = scale.data.cells), y = new_features),,drop=FALSE]) } - if (length(x = setdiff(x = model.cells, y = scale.data.cells)) == 0) { + if (is.null(x = reference.SCT.model) & length(x = setdiff(x = model.cells, y = scale.data.cells)) == 0) { existing_features <- names(x = which(x = ! apply( X = GetAssayData(object = object, assay = assay, slot = "scale.data")[, model.cells], MARGIN = 1, From 81aa01fec72f92a8268bcb007475629bfa17c8ed Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Thu, 5 Jan 2023 15:58:15 -0500 Subject: [PATCH 350/979] Centering based on reference model in FetchSCTResidual --- R/preprocessing5.R | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index b5e906293..61eeea80c 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -2044,8 +2044,23 @@ FetchResidualSCTModel <- function(object, new_residuals[[i]] <- new_residual } new_residual <- do.call(what = cbind, args = new_residuals) - # centered data - new_residual <- new_residual - rowMeans(x = new_residual) + # centered data if no reference model is provided + if (is.null(x = reference.SCT.model)){ + new_residual <- new_residual - rowMeans(x = new_residual) + } else { + # subtract residual mean from reference model + if (verbose){ + message("Using residual mean from reference for centering") + } + vst_out <- SCTModel_to_vst(SCTModel = reference.SCT.model) + ref.residuals.mean <- vst_out$gene_attr[rownames(x = new_residual),"residual_mean"] + new_residual <- sweep( + x = new_residual, + MARGIN = 1, + STATS = ref.residuals.mean, + FUN = "-" + ) + } # return (new_residuals) } else { # Some features do not exist From b2dfb26b16a828649b782f7d3c189c6263bee8c8 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 5 Jan 2023 17:01:56 -0500 Subject: [PATCH 351/979] residuals based on reference model --- R/preprocessing5.R | 67 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 65 insertions(+), 2 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 61eeea80c..147057017 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1868,7 +1868,7 @@ FetchResidualSCTModel <- function(object, replace.value = FALSE, verbose = FALSE) { model.cells <- character() - model.features <- Features(x = obj.query, layer = layer) + model.features <- Features(x = object, layer = layer) if (is.null(x = reference.SCT.model)){ clip.range <- clip.range %||% SCTResults(object = object[[assay]], slot = "clips", model = SCTModel)$sct model.features <- rownames(x = SCTResults(object = object[[assay]], slot = "feature.attributes", model = SCTModel)) @@ -2086,4 +2086,67 @@ FetchResidualSCTModel <- function(object, return(new_residual) } - +#' temporal function to get residuals from reference +#' @importFrom sctransform get_residuals +#' @importFrom Matrix colSums +#' + +FetchResiduals_reference <- function(object, + reference.SCT.model = NULL, + features = NULL, + verbose = FALSE) { + features_to_compute <- features + vst_out <- SCTModel_to_vst(SCTModel = reference.SCT.model) + + # override clip.range + clip.range <- vst_out$arguments$sct.clip.range + # get rid of the cell attributes + vst_out$cell_attr <- NULL + all.features <- intersect( + x = rownames(x = vst_out$gene_attr), + y = features_to_compute + ) + vst_out$gene_attr <- vst_out$gene_attr[all.features, , drop = FALSE] + vst_out$model_pars_fit <- vst_out$model_pars_fit[all.features, , drop = FALSE] + + clip.max <- max(clip.range) + clip.min <- min(clip.range) + + + umi <- object[features_to_compute, , drop = FALSE] + + ## Add cell_attr for missing cells + cell_attr <- data.frame( + umi = colSums(object), + log_umi = log10(x = colSums(object)) + ) + rownames(cell_attr) <- colnames(object) + vst_out$cell_attr <- cell_attr + + if (verbose) { + message("using reference sct model") + } + + if (vst_out$arguments$min_variance == "umi_median"){ + min_var <- min_var_custom + } else { + min_var <- vst_out$arguments$min_variance + } + new_residual <- get_residuals( + vst_out = vst_out, + umi = umi, + residual_type = "pearson", + min_variance = min_var, + res_clip_range = c(clip.min, clip.max), + verbosity = as.numeric(x = verbose) * 2 + ) + + ref.residuals.mean <- vst_out$gene_attr[rownames(x = new_residual),"residual_mean"] + new_residual <- sweep( + x = new_residual, + MARGIN = 1, + STATS = ref.residuals.mean, + FUN = "-" + ) + return(new_residual) +} From 7c423d8f5718e74e838ba64894869e7c9ecfde8a Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 5 Jan 2023 18:18:37 -0500 Subject: [PATCH 352/979] residuals for BPcells --- R/integration.R | 139 ++++++++++++++++++++++++++++++++++++------------ 1 file changed, 104 insertions(+), 35 deletions(-) diff --git a/R/integration.R b/R/integration.R index e533131ef..76fe767ec 100644 --- a/R/integration.R +++ b/R/integration.R @@ -4894,6 +4894,7 @@ ProjectCellEmbeddings.Seurat <- function( reference.assay = NULL, reduction = "pca", dims = 1:50, + normalization.method = c("LogNormalize", "SCT"), scale = TRUE, verbose = TRUE, feature.mean = NULL, @@ -4902,8 +4903,23 @@ ProjectCellEmbeddings.Seurat <- function( if (verbose) { message("Projecting cell embeddings") } + normalization.method <- match.arg(arg = normalization.method) query.assay <- query.assay %||% DefaultAssay(object = query) reference.assay <- reference.assay %||% DefaultAssay(object = reference) + if (normalization.method == 'SCT') { + if (!IsSCT(assay = reference[[reference.assay]])) { + stop('reference in ', reference.assay, ' assay does not have a SCT model' ) + } + reference.model.num <- length(slot(object = reference[[reference.assay]], name = "SCTModel.list")) + if (reference.model.num > 1) { + stop("Given reference assay (", reference.assay, ") has ", reference.model.num , + " reference sct models. Please provide a reference assay with a ", + " single reference sct model.", call. = FALSE) + } else if (reference.model.num == 0) { + stop("Given reference assay (", reference.assay, + ") doesn't contain a reference SCT model.") + } + } proj.pca <- ProjectCellEmbeddings( query = query[[query.assay]], reference = reference, @@ -4911,6 +4927,7 @@ ProjectCellEmbeddings.Seurat <- function( reduction = reduction, dims = dims, scale = scale, + normalization.method = normalization.method, verbose = verbose, feature.mean = feature.mean, feature.sd = feature.sd @@ -4929,6 +4946,7 @@ ProjectCellEmbeddings.Assay <- function( reduction = "pca", dims = 1:50, scale = TRUE, + normalization.method = NULL, verbose = TRUE, feature.mean = NULL, feature.sd = NULL @@ -4944,13 +4962,15 @@ ProjectCellEmbeddings.Assay <- function( proj.pca <- ProjectCellEmbeddings( query = GetAssayData( object = query, - slot = "data")[features,], + slot = "data"), reference = reference, reference.assay = reference.assay, reduction = reduction, dims = dims, scale = scale, + normalization.method = normalization.method, verbose = verbose, + features = features, feature.mean = feature.mean, feature.sd = feature.sd ) @@ -4968,10 +4988,14 @@ ProjectCellEmbeddings.SCTAssay <- function( reduction = "pca", dims = 1:50, scale = TRUE, + normalization.method = NULL, verbose = TRUE, feature.mean = NULL, feature.sd = NULL ) { + if (normalization.method != 'SCT') { + warning('Query data is SCT normalized, but normalization.method is set to LogNormalize') + } features <- Reduce( f = intersect, x = list( @@ -4999,6 +5023,7 @@ ProjectCellEmbeddings.StdAssay <- function( reduction = "pca", dims = 1:50, scale = TRUE, + normalization.method = NULL, verbose = TRUE, feature.mean = NULL, feature.sd = NULL @@ -5012,18 +5037,24 @@ ProjectCellEmbeddings.StdAssay <- function( rownames(x = query) ) ) - layers.set <- Layers(object = query, search = 'data') + if (normalization.method == 'SCT') { + layers.set <- Layers(object = query, search = 'counts') + } else { + layers.set <- Layers(object = query, search = 'data') + } proj.pca.list <- list() cell.list <- list() for (i in seq_along(layers.set)) { proj.pca.list[[i]] <- t(ProjectCellEmbeddings( - query = LayerData(object = query, layer = layers.set[i], features = features), + query = LayerData(object = query, layer = layers.set[i]), reference = reference, reference.assay = reference.assay, reduction = reduction, dims = dims, scale = scale, + normalization.method = normalization.method, verbose = verbose, + features = features, feature.mean = feature.mean, feature.sd = feature.sd )) @@ -5052,23 +5083,32 @@ ProjectCellEmbeddings.default <- function( reduction = "pca", dims = 1:50, scale = TRUE, + normalization.method = NULL, verbose = TRUE, + features = NULL, feature.mean = NULL, feature.sd = NULL ){ - features <- rownames(query) + features <- features %||% rownames(x = Loadings(object = reference[[reduction]])) +if (normalization.method == 'SCT') { + reference.SCT.model <- slot(object = reference[[reference.assay]], name = "SCTModel.list")[[1]] + query <- FetchResiduals_reference( + object = query, + reference.SCT.model = reference.SCT.model, + features = features) +} else { reference.data <- GetAssayData( object = reference, assay = reference.assay, slot = "data")[features, ] -if (is.null(x = feature.mean)) { - if (inherits(x = reference.data, what = 'dgCMatrix')) { - feature.mean <- RowMeanSparse(mat = reference.data) - } else { - feature.mean <- rowMeans(mat = reference.data) - } + if (is.null(x = feature.mean)) { + if (inherits(x = reference.data, what = 'dgCMatrix')) { + feature.mean <- RowMeanSparse(mat = reference.data) + } else { + feature.mean <- rowMeans(mat = reference.data) + } if (scale) { - feature.sd <- sqrt( + feature.sd <- sqrt( x = RowVarSparse( mat = as.sparse(reference.data) ) @@ -5088,8 +5128,8 @@ if (is.null(x = feature.mean)) { display_progress = FALSE ) } - dimnames(x = query) <- store.names +} ref.feature.loadings <- Loadings(object = reference[[reduction]])[features, dims] proj.pca <- t(crossprod(x = ref.feature.loadings, y = query)) return(proj.pca) @@ -5107,35 +5147,63 @@ ProjectCellEmbeddings.IterableMatrix <- function( reduction = "pca", dims = 1:50, scale = TRUE, + normalization.method = NULL, verbose = TRUE, + features = features, feature.mean = NULL, - feature.sd = NULL + feature.sd = NULL, + block.size = 10000 ) { - features <- rownames(query) - reference.data <- LayerData(object = reference[[reference.assay]], layer = 'data')[features,] - if (is.null(x = feature.mean)) { - if (inherits(x = reference.data, what = 'dgCMatrix')) { - feature.mean <- RowMeanSparse(mat = reference.data) - } else { - feature.mean <- rowMeans(mat = reference.data) - } - if (scale) { - feature.sd <- sqrt( - x = RowVarSparse( - mat = as.sparse(reference.data) - ) + features <- features %||% rownames(x = Loadings(object = reference[[reduction]])) + + if (normalization.method == 'SCT') { + reference.SCT.model <- slot(object = reference[[reference.assay]], name = "SCTModel.list")[[1]] + cells.grid <- split( + x = 1:ncol(query), + f = ceiling(seq_along(along.with = 1:ncol(query))/block.size) ) - feature.sd[is.na(x = feature.sd)] <- 1 - } else { - feature.sd <- rep(x = 1, nrow(x = reference.data)) + proj.list <- list() + for (i in seq_along(cells.grid)) { + query.i <- FetchResiduals_reference( + object = as.sparse(query[,cells.grid[[i]]]), + reference.SCT.model = reference.SCT.model, + features = features) + proj.list[[i]] <- t(Loadings(object = reference[[reduction]])[features,dims]) %*% query.i + } + proj.pca <- t(matrix( + data = unlist(proj.list), + nrow = length(dims), + ncol = ncol(query), + dimnames = list( + colnames(Embeddings(object = reference[[reduction]]))[dims], + colnames(query)) + )) + } else { + reference.data <- LayerData(object = reference[[reference.assay]], layer = 'data')[features,] + if (is.null(x = feature.mean)) { + if (inherits(x = reference.data, what = 'dgCMatrix')) { + feature.mean <- RowMeanSparse(mat = reference.data) + } else { + feature.mean <- rowMeans(mat = reference.data) + } + if (scale) { + feature.sd <- sqrt( + x = RowVarSparse( + mat = as.sparse(reference.data) + ) + ) + feature.sd[is.na(x = feature.sd)] <- 1 + } else { + feature.sd <- rep(x = 1, nrow(x = reference.data)) + } + feature.mean[is.na(x = feature.mean)] <- 1 } - feature.mean[is.na(x = feature.mean)] <- 1 + query.scale <- (query - feature.mean)/feature.sd + query.scale <- BPCells::min_scalar(mat = query.scale, val = 10) + proj.pca <- t(query.scale) %*% Loadings(object = reference[[reduction]])[features,dims] + rownames(proj.pca) <- colnames(query) + colnames(proj.pca) <- colnames(Embeddings(object = reference[[reduction]]))[dims] } - query.scale <- (query - feature.mean)/feature.sd - query.scale <- BPCells::min_scalar(mat = query.scale, val = 10) - proj.pca <- t(query.scale) %*% Loadings(object = reference[[reduction]])[features,dims] - rownames(proj.pca) <- colnames(query) - colnames(proj.pca) <- colnames(Embeddings(object = reference[[reduction]]))[dims] return(proj.pca) } @@ -5149,6 +5217,7 @@ ProjectCellEmbeddings.DelayedMatrix <- function( reference, assay = NULL, reduction, + normalization.method = NULL, dims = NULL, feature.mean = NULL, feature.sd = NULL From 6f1182e9be16f38c03d2a2372ea247139e666257 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 5 Jan 2023 22:51:54 -0500 Subject: [PATCH 353/979] fix bug create cate matrix --- R/utilities.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/utilities.R b/R/utilities.R index d0f81f225..951c05522 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -2680,7 +2680,7 @@ CreateCategoryMatrix <- function( data <- labels } cells.name <- cells.name %||% rownames(data) - if (length(cells.name) != nrow(data)) { + if (!is.null(cells.name) & length(cells.name) != nrow(data)) { stop('length of cells name should be equal to the length of input labels') } if (ncol(x = data) == 0) { From 26af87f51938c90eca844d7694a366b616192aa5 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 5 Jan 2023 23:26:54 -0500 Subject: [PATCH 354/979] add CalcN in Seurat --- NAMESPACE | 1 + R/preprocessing5.R | 7 +++++++ man/FetchResiduals.Rd | 2 +- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index 0a4089276..dc00defd1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -504,6 +504,7 @@ importFrom(SeuratObject,"Misc<-") importFrom(SeuratObject,"Project<-") importFrom(SeuratObject,"Tool<-") importFrom(SeuratObject,"VariableFeatures<-") +importFrom(SeuratObject,.CalcN) importFrom(SeuratObject,.FilterObjects) importFrom(SeuratObject,.MARGIN) importFrom(SeuratObject,.PropagateList) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 147057017..00f380fa6 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1005,6 +1005,13 @@ VST.matrix <- function( # Internal #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#' @importFrom SeuratObject .CalcN +#' +CalcN <- function(object) { + return(.CalcN(object)) +} + #' @method .CalcN IterableMatrix #' @export #' diff --git a/man/FetchResiduals.Rd b/man/FetchResiduals.Rd index cf3a65edd..81536048f 100644 --- a/man/FetchResiduals.Rd +++ b/man/FetchResiduals.Rd @@ -7,7 +7,7 @@ FetchResiduals( object, features, - assay = "SCT", + assay = NULL, umi.assay = "RNA", layer = "counts", clip.range = NULL, From d8f01eaefdb427c5b398b1c35d45197ea5ab822d Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 6 Jan 2023 12:59:57 -0500 Subject: [PATCH 355/979] fix leverage schore --- R/sketching.R | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/R/sketching.R b/R/sketching.R index 27ef92321..c0f6bd119 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -99,7 +99,11 @@ LeverageScoreSampling <- function( } object[[save]] <- NULL } - vars <- layers + vars <- grep(pattern = "^seurat_leverage_score_", x = names(x = object[[]]), + value = TRUE) + names(x = vars) <- vars + vars <- gsub(pattern = "^seurat_leverage_score_", replacement = "", + x = vars) vars <- vars[vars %in% Layers(object = object[[assay]])] if (!length(x = vars)) { stop("No leverage scores found for assay ", assay, call. = FALSE) @@ -125,7 +129,7 @@ LeverageScoreSampling <- function( sketched <- suppressWarnings(expr = subset( x = object[[assay]], cells = Reduce(f = union, x = cells), - layers = vars + layers = Layers(object = object[[assay]], search = layers) )) for (lyr in vars) { try( @@ -228,7 +232,7 @@ LeverageScore.default <- function( seed = seed )) Z <- object %*% (R.inv %*% JL) - if (inherits(x = Z, what = 'MatrixMultiply')) { + if (inherits(x = Z, what = 'IterableMatrix')) { Z.score <- matrix_stats(matrix = Z, row_stats = 'variance')$row_stats['variance',]*ncol(Z) } else { Z.score <- rowSums(x = Z ^ 2) From 9016027d13d945f623c642f05112a0a1bdbb7720 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sun, 8 Jan 2023 18:19:47 -0500 Subject: [PATCH 356/979] set var features --- R/preprocessing5.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 00f380fa6..49c519f84 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -147,6 +147,11 @@ FindVariableFeatures.StdAssay <- function( rownames(x = hvf.info) <- Features(x = object, layer = layer[i]) object[colnames(x = hvf.info)] <- hvf.info } + VariableFeatures(object = object) <- VariableFeatures( + object = object, + nfeatures = nselect, + simplify = TRUE + ) return(object) } From 8fcfc1274369d07b65fcaea55a15e874a2069a48 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sun, 8 Jan 2023 20:37:19 -0500 Subject: [PATCH 357/979] fix select inte features v5 --- R/integration.R | 27 ++++----------------------- 1 file changed, 4 insertions(+), 23 deletions(-) diff --git a/R/integration.R b/R/integration.R index 76fe767ec..91f699838 100644 --- a/R/integration.R +++ b/R/integration.R @@ -3001,33 +3001,14 @@ SelectIntegrationFeatures5 <- function( ) { assay <- assay %||% DefaultAssay(object = object) layers <- Layers(object = object[[assay]], search = layers) - vf.list <- VariableFeatures( + var.features <- VariableFeatures( object = object, assay = assay, method = method, - layer = layers - ) - var.features <- unlist(x = vf.list, use.names = FALSE) - var.features <- sort(x = table(var.features), decreasing = TRUE) - # Select only variable features present in all layers - fmat <- slot(object = object[[assay]], name = 'features')[, layers] - idx <- which(x = apply( - X = fmat[names(x = var.features), , drop = FALSE], - MARGIN = 1L, - FUN = all - )) - var.features <- var.features[idx] - tie.val <- var.features[min(nfeatures, length(x = var.features))] - # Select integration features - features <- names(x = var.features[which(x = var.features > tie.val)]) - if (length(x = features)) { - features <- .FeatureRank(features = features, flist = vf.list) - } - features.tie <- .FeatureRank( - features = names(x = var.features[which(x = var.features == tie.val)]), - flist = vf.list + layer = layers, + simplify = TRUE ) - return(head(x = c(features, features.tie), n = nfeatures)) + return(var.features) } #' @export From 93e85f9c9ebb2c9f11fd4d0fe404af125ab7ff69 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Mon, 9 Jan 2023 10:06:45 -0500 Subject: [PATCH 358/979] fix group correlation plot bug --- DESCRIPTION | 2 +- R/utilities.R | 5 ++--- R/visualization.R | 3 ++- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0f0fe4e25..c76e7154d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 4.9.9.9016 +Version: 4.9.9.9017 Date: 2023-01-05 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. diff --git a/R/utilities.R b/R/utilities.R index beab62eba..16000f50b 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -971,9 +971,8 @@ GroupCorrelation <- function( grp.cors <- grp.cors[names(x = gene.grp)] grp.cors <- as.data.frame(x = grp.cors[which(x = !is.na(x = grp.cors))]) grp.cors$gene_grp <- gene.grp[rownames(x = grp.cors)] - colnames(x = grp.cors) <- c("cor", "feature_grp") - object[[assay]]["feature.grp"] <- grp.cors[, "feature_grp", drop = FALSE] - object[[assay]][paste0(var, "_cor")] <- grp.cors[, "cor", drop = FALSE] + colnames(x = grp.cors) <- c(paste0(var, "_cor"), "feature.grp") + object[[assay]][] <- grp.cors if (isTRUE(x = do.plot)) { print(GroupCorrelationPlot( object = object, diff --git a/R/visualization.R b/R/visualization.R index b3ffc2418..7f988379e 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -4535,9 +4535,10 @@ GroupCorrelationPlot <- function( cor = "nCount_RNA_cor" ) { assay <- assay %||% DefaultAssay(object = object) - data <- object[[assay]][[c(feature.group, cor)]] + data <- object[[assay]][c(feature.group, cor)] data <- data[complete.cases(data), ] colnames(x = data) <- c('grp', 'cor') + data$grp <- as.character(data$grp) plot <- ggplot(data = data, aes_string(x = "grp", y = "cor", fill = "grp")) + geom_boxplot() + theme_cowplot() + From 49d19e981ea45f5698ef2c767b4b41b74e0fa473 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Mon, 9 Jan 2023 10:08:42 -0500 Subject: [PATCH 359/979] update yaml for v5 --- _pkgdown.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/_pkgdown.yaml b/_pkgdown.yaml index 326caab73..49c45764c 100644 --- a/_pkgdown.yaml +++ b/_pkgdown.yaml @@ -17,6 +17,8 @@ navbar: href: articles/install.html - text: "Get started" href: articles/get_started.html + - text: "Get started v5" + href: articles/get_started_v5.html - text: "Vignettes" menu: - text: Introductory Vignettes From 3ed644d7b144d9a8a71165a9c73b1331027bf15c Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Mon, 9 Jan 2023 13:52:25 -0500 Subject: [PATCH 360/979] begin slot -> layer transition --- R/preprocessing5.R | 14 +++++++------- R/visualization.R | 34 ++++++++++++++++++++++++++++------ _pkgdown.yaml | 2 +- 3 files changed, 36 insertions(+), 14 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 49c519f84..9ea560498 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -70,14 +70,14 @@ FindVariableFeatures.StdAssay <- function( object, method = VST, nselect = 2000L, - layer = NULL, + layer = 'counts', span = 0.3, clip = NULL, key = NULL, verbose = TRUE, ... ) { - layer <- unique(x = layer) %||% DefaultLayer(object = object) + layer <- unique(x = layer) layer <- Layers(object = object, search = layer) if (is.null(x = key)) { false <- function(...) { @@ -549,13 +549,13 @@ NormalizeData.StdAssay <- function( method = 'LogNormalize', scale.factor = 1e4, margin = 1L, - layer = NULL, # TODO: set to counts + layer = 'counts', save = 'data', default = TRUE, verbose = TRUE, ... ) { - olayer <- layer <- unique(x = layer) %||% DefaultLayer(object = object) + olayer <- layer <- unique(x = layer) layer <- Layers(object = object, search = layer) if (save %in% olayer) { default <- FALSE @@ -635,7 +635,7 @@ NormalizeData.Seurat5 <- function( ScaleData.StdAssay <- function( object, features = NULL, - layer = NULL, + layer = 'data', vars.to.regress = NULL, latent.data = NULL, by.layer = FALSE, @@ -652,7 +652,7 @@ ScaleData.StdAssay <- function( ... ) { use.umi <- ifelse(test = model.use != 'linear', yes = TRUE, no = use.umi) - olayer <- layer <- unique(x = layer) %||% DefaultLayer(object = object) + olayer <- layer <- unique(x = layer) layer <- Layers(object = object, search = layer) if (isTRUE(x = use.umi)) { inform( @@ -1349,7 +1349,7 @@ SCTransform.StdAssay <- function( do.correct.umi <- FALSE do.center <- FALSE } - olayer <- layer <- unique(x = layer) %||% DefaultLayer(object = object) + olayer <- layer <- unique(x = layer) layers <- Layers(object = object, search = layer) dataset.names <- gsub(pattern = paste0(layer, "."), replacement = "", x = layers) sct.assay.list <- list() diff --git a/R/visualization.R b/R/visualization.R index 7f988379e..d0b28c609 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -537,11 +537,20 @@ RidgePlot <- function( same.y.lims = FALSE, log = FALSE, ncol = NULL, - slot = 'data', + slot = deprecated(), + layer = 'data', stack = FALSE, combine = TRUE, fill.by = 'feature' ) { + if (is_present(arg = slot)) { + deprecate_soft( + when = '5.0.0', + what = 'RidgePlot(slot = )', + with = 'RidgePlot(layer = )' + ) + layer <- layer %||% slot + } return(ExIPlot( object = object, type = 'ridge', @@ -555,7 +564,7 @@ RidgePlot <- function( cols = cols, group.by = group.by, log = log, - slot = slot, + layer = layer, stack = stack, combine = combine, fill.by = fill.by @@ -609,7 +618,8 @@ VlnPlot <- function( same.y.lims = FALSE, log = FALSE, ncol = NULL, - slot = 'data', + slot = deprecated(), + layer = 'data', split.plot = FALSE, stack = FALSE, combine = TRUE, @@ -618,6 +628,14 @@ VlnPlot <- function( add.noise = TRUE, raster = NULL ) { + if (is_present(arg = slot)) { + deprecate_soft( + when = '5.0.0', + what = 'VlnPlot(slot = )', + with = 'VlnPlot(layer = )' + ) + layer <- layer %||% slot + } if ( !is.null(x = split.by) & getOption(x = 'Seurat.warn.vlnplot.split', default = TRUE) @@ -648,7 +666,7 @@ VlnPlot <- function( group.by = group.by, split.by = split.by, log = log, - slot = slot, + layer = layer, stack = stack, combine = combine, fill.by = fill.by, @@ -6552,7 +6570,8 @@ ExIPlot <- function( group.by = NULL, split.by = NULL, log = FALSE, - slot = 'data', + slot = deprecated(), + layer = 'data', stack = FALSE, combine = TRUE, fill.by = NULL, @@ -6560,6 +6579,9 @@ ExIPlot <- function( add.noise = TRUE, raster = NULL ) { + if (is_present(arg = slot)) { + layer <- layer %||% slot + } assay <- assay %||% DefaultAssay(object = object) DefaultAssay(object = object) <- assay cells <- Cells(x = object, assay = NULL) @@ -6591,7 +6613,7 @@ ExIPlot <- function( y = cells ) } - data <- FetchData(object = object, vars = features, slot = slot, cells = cells) + data <- FetchData(object = object, vars = features, slot = layer, cells = cells) pt.size <- pt.size %||% AutoPointSize(data = object) features <- colnames(x = data) data <- data[cells, , drop = FALSE] diff --git a/_pkgdown.yaml b/_pkgdown.yaml index 49c45764c..bec6a63bd 100644 --- a/_pkgdown.yaml +++ b/_pkgdown.yaml @@ -76,7 +76,7 @@ navbar: href: articles/interaction_vignette.html - text: "Merging Seurat objects" href: articles/merge_vignette.html - - text: "Seurat 5 Vignettes" + - text: "Vignettes v5" menu: - text: Introductory Vignettes - text: "PBMC 3K guided tutorial" From d7cf9cb345389bb0efe1f78998dc753ec890f4ac Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Mon, 9 Jan 2023 13:53:20 -0500 Subject: [PATCH 361/979] bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0e23ae6a6..068ca49c1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9017 -Date: 2023-01-05 +Version: 4.9.9.9018 +Date: 2023-01-09 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From 8c12991613b587cce5f5c0b7d5433d38c65a885b Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 10 Jan 2023 10:48:25 -0500 Subject: [PATCH 362/979] fix new BPCells --- NAMESPACE | 1 - R/objects.R | 7 ------- R/sketching.R | 2 +- 3 files changed, 1 insertion(+), 9 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index dc00defd1..c7c4eccdb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -152,7 +152,6 @@ S3method(as.Seurat,CellDataSet) S3method(as.Seurat,SingleCellExperiment) S3method(as.SingleCellExperiment,Seurat) S3method(as.data.frame,Matrix) -S3method(as.matrix,IterableMatrix) S3method(as.sparse,H5Group) S3method(as.sparse,IterableMatrix) S3method(components,SCTAssay) diff --git a/R/objects.R b/R/objects.R index 60eb261eb..9e97732b7 100644 --- a/R/objects.R +++ b/R/objects.R @@ -1395,13 +1395,6 @@ as.sparse.IterableMatrix <- function(x, ...) { } -#' @method as.matrix IterableMatrix -#' @export -#' -as.matrix.IterableMatrix <- function(x, ...) { - return(as.matrix(x = as.sparse(x = x))) -} - #' Get Cell Names #' #' @inheritParams SeuratObject::Cells diff --git a/R/sketching.R b/R/sketching.R index c0f6bd119..702d5c2ea 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -233,7 +233,7 @@ LeverageScore.default <- function( )) Z <- object %*% (R.inv %*% JL) if (inherits(x = Z, what = 'IterableMatrix')) { - Z.score <- matrix_stats(matrix = Z, row_stats = 'variance')$row_stats['variance',]*ncol(Z) + Z.score <- matrix_stats(matrix = Z^2, row_stats = 'mean')$row_stats['mean',]*ncol(x = Z) } else { Z.score <- rowSums(x = Z ^ 2) } From eb6d3e64e350bc581fac2bb464ec391b39047551 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 10 Jan 2023 14:21:57 -0500 Subject: [PATCH 363/979] fix select inte features v5 --- R/integration.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/integration.R b/R/integration.R index 91f699838..1469668dd 100644 --- a/R/integration.R +++ b/R/integration.R @@ -3004,6 +3004,7 @@ SelectIntegrationFeatures5 <- function( var.features <- VariableFeatures( object = object, assay = assay, + nfeatures = nfeatures, method = method, layer = layers, simplify = TRUE From bf1003c3e4c62c6812af62bb141768c8768594ce Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Tue, 10 Jan 2023 15:54:18 -0500 Subject: [PATCH 364/979] fix find spatially variable features --- DESCRIPTION | 4 +- R/preprocessing.R | 2 +- R/preprocessing5.R | 67 ++++++++++++++++++++++++++ vignettes/seurat5_spatial_vignette.Rmd | 2 +- 4 files changed, 71 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 068ca49c1..983441d0e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9018 -Date: 2023-01-09 +Version: 4.9.9.9019 +Date: 2023-01-10 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( diff --git a/R/preprocessing.R b/R/preprocessing.R index e57406f88..07afeb350 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -3861,7 +3861,7 @@ FindSpatiallyVariableFeatures.Assay <- function( svf.info[[var.name]] <- FALSE svf.info[[var.name]][1:(min(nrow(x = svf.info), nfeatures))] <- TRUE svf.info[[var.name.rank]] <- 1:nrow(x = svf.info) - object[[names(x = svf.info)]] <- svf.info + object[names(x = svf.info)] <- svf.info return(object) } diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 9ea560498..cd9d11ad7 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -155,6 +155,73 @@ FindVariableFeatures.StdAssay <- function( return(object) } +#' @param layer Layer in the Assay5 to pull data from +#' @param features If provided, only compute on given features. Otherwise, +#' compute for all features. +#' @param nfeatures Number of features to mark as the top spatially variable. +#' +#' @method FindSpatiallyVariableFeatures StdAssay +#' @rdname FindSpatiallyVariableFeatures +#' @concept preprocessing +#' @concept spatial +#' @export +#' +FindSpatiallyVariableFeatures.StdAssay <- function( + object, + layer = "scale.data", + spatial.location, + selection.method = c('markvariogram', 'moransi'), + features = NULL, + r.metric = 5, + x.cuts = NULL, + y.cuts = NULL, + nfeatures = nfeatures, + verbose = TRUE, + ... +) { + features <- features %||% rownames(x = object) + if (selection.method == "markvariogram" && "markvariogram" %in% names(x = Misc(object = object))) { + features.computed <- names(x = Misc(object = object, slot = "markvariogram")) + features <- features[! features %in% features.computed] + } + data <- GetAssayData(object = object, layer = layer) + data <- as.matrix(x = data[features, ]) + data <- data[RowVar(x = data) > 0, ] + if (nrow(x = data) != 0) { + svf.info <- FindSpatiallyVariableFeatures( + object = data, + spatial.location = spatial.location, + selection.method = selection.method, + r.metric = r.metric, + x.cuts = x.cuts, + y.cuts = y.cuts, + verbose = verbose, + ... + ) + } else { + svf.info <- c() + } + if (selection.method == "markvariogram") { + if ("markvariogram" %in% names(x = Misc(object = object))) { + svf.info <- c(svf.info, Misc(object = object, slot = "markvariogram")) + } + suppressWarnings(expr = Misc(object = object, slot = "markvariogram") <- svf.info) + svf.info <- ComputeRMetric(mv = svf.info, r.metric) + svf.info <- svf.info[order(svf.info[, 1]), , drop = FALSE] + } + if (selection.method == "moransi") { + colnames(x = svf.info) <- paste0("MoransI_", colnames(x = svf.info)) + svf.info <- svf.info[order(svf.info[, 2], -abs(svf.info[, 1])), , drop = FALSE] + } + var.name <- paste0(selection.method, ".spatially.variable") + var.name.rank <- paste0(var.name, ".rank") + svf.info[[var.name]] <- FALSE + svf.info[[var.name]][1:(min(nrow(x = svf.info), nfeatures))] <- TRUE + svf.info[[var.name.rank]] <- 1:nrow(x = svf.info) + object[names(x = svf.info)] <- svf.info + return(object) +} + #' @importFrom rlang enquo #' @method FindVariableFeatures Seurat5 #' @export diff --git a/vignettes/seurat5_spatial_vignette.Rmd b/vignettes/seurat5_spatial_vignette.Rmd index 8252642d5..9bbabf874 100644 --- a/vignettes/seurat5_spatial_vignette.Rmd +++ b/vignettes/seurat5_spatial_vignette.Rmd @@ -58,7 +58,7 @@ library(dplyr) ```{r libraries.for.rmd, echo = FALSE} library("htmltools") -#library("vembedr") +library("vembedr") ``` From 301163ea98f51993a9a2f4e9db981715faebe598 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Tue, 10 Jan 2023 17:14:51 -0500 Subject: [PATCH 365/979] Fix VariableFeatures for SCT assay --- R/integration.R | 28 ++++++++-------- R/objects.R | 79 +++++++++++++++++++++++++++++++++++----------- R/preprocessing5.R | 23 +++++++------- 3 files changed, 86 insertions(+), 44 deletions(-) diff --git a/R/integration.R b/R/integration.R index 1469668dd..d886052c3 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1970,19 +1970,19 @@ IntegrateSketchEmbeddings <- function( ' atomic cells identified in the atoms' ) message("Correcting embeddings") - } + } emb <- UnSketchEmbeddings( atom.data = LayerData( object = object[[atoms]], layer = layers[i], features = features - ), + ), atom.cells = cells.sketch, orig.data = LayerData( object = object[[orig]], layer = layers[i], features = features - ), + ), embeddings = Embeddings(object = object[[reduction]]), sketch.matrix = sketch.matrix ) @@ -3029,7 +3029,7 @@ SelectSCTIntegrationFeatures <- function( vf.list <- VariableFeatures( object = object[[assay]], layer = models, - n = nfeatures, + nfeatures = nfeatures, simplify = FALSE ) var.features <- sort( @@ -3496,7 +3496,7 @@ TransferData <- function( #' Transfer data from sketch data to full data #' @export -#' +#' TransferSketchLabels <- function( object, atoms = 'sketch', @@ -3528,15 +3528,15 @@ TransferSketchLabels <- function( !all(colnames(full_sketch.weight) == Cells(object[[reduction]])) || !all(rownames(full_sketch.weight) == colnames(object[[atoms]])) || recompute.weights - + if (compute.neighbors) { if (verbose) { message("Finding sketch neighbors") } full_sketch.nn <- Seurat:::NNHelper( - query = Embeddings(object[[reduction]])[, dims], - data = Embeddings(object[[reduction]])[colnames(object[[atoms]]), dims], - k = k, + query = Embeddings(object[[reduction]])[, dims], + data = Embeddings(object[[reduction]])[colnames(object[[atoms]]), dims], + k = k, method = "annoy" ) } @@ -3553,7 +3553,7 @@ TransferSketchLabels <- function( } object@tools$TransferSketchLabels$full_sketch.nn <- full_sketch.nn object@tools$TransferSketchLabels$full_sketch.weight <- full_sketch.weight - + if (length(refdata) == 1 & is.character(refdata)) { refdata <- list(refdata) names(refdata) <- unlist(refdata) @@ -5121,7 +5121,7 @@ if (normalization.method == 'SCT') { #' @method ProjectCellEmbeddings IterableMatrix #' @export #' -#' +#' ProjectCellEmbeddings.IterableMatrix <- function( query, reference, @@ -7733,12 +7733,12 @@ FastRPCAIntegration <- function( #' Transfer embeddings from sketched cells to the full data -#' +#' #' @importFrom MASS ginv #' @importFrom Matrix t -#' +#' #' @export -#' +#' UnSketchEmbeddings <- function(atom.data, atom.cells = NULL, orig.data, diff --git a/R/objects.R b/R/objects.R index 25ecca1cc..d7781d4b6 100644 --- a/R/objects.R +++ b/R/objects.R @@ -1389,7 +1389,7 @@ as.sparse.H5Group <- function(x, ...) { #' @method as.sparse IterableMatrix #' @export -#' +#' as.sparse.IterableMatrix <- function(x, ...) { return(as(object = x, Class = 'dgCMatrix')) } @@ -1904,60 +1904,101 @@ SCTResults.Seurat <- function(object, assay = "SCT", slot, model = NULL, ...) { return(SCTResults(object = object[[assay]], slot = slot, model = model, ...)) } +#' @importFrom utils head #' @method VariableFeatures SCTModel #' @export #' -VariableFeatures.SCTModel <- function(object, n = 3000, ...) { - if (!is_scalar_integerish(x = n) || (!is_na(x = n < 1L) && n < 1L)) { - abort(message = "'n' must be a single positive integer") +VariableFeatures.SCTModel <- function(object, nfeatures = 3000, ...) { + if (!is_scalar_integerish(x = nfeatures) || (!is_na(x = nfeatures < 1L) && nfeatures < 1L)) { + abort(message = "'nfeatures' must be a single positive integer") } feature.attr <- SCTResults(object = object, slot = 'feature.attributes') feature.variance <- feature.attr[, 'residual_variance'] names(x = feature.variance) <- row.names(x = feature.attr) feature.variance <- sort(x = feature.variance, decreasing = TRUE) - if (is_na(x = n)) { + if (is_na(x = nfeatures)) { return(names(x = feature.variance)) } - return(head(x = names(x = feature.variance), n = n)) + return(head(x = names(x = feature.variance), n = nfeatures)) } +#' @importFrom utils head #' @method VariableFeatures SCTAssay #' @export #' VariableFeatures.SCTAssay <- function( object, layer = NULL, - n = 3000, + nfeatures = 3000, simplify = TRUE, + use.var.features = TRUE, ... ) { - layer <- layer %||% levels(x = object)[1L] - if (is_na(x = layer)) { + nfeatures <- nfeatures %||% 3000 + if (is.null(x = layer)) { layer <- levels(x = object) } + # Is the information already in var.features? + var.features.existing <- object@var.features + if (simplify == TRUE & use.var.features == TRUE & length(var.features.existing)>=nfeatures){ + return (head(x = var.features.existing, n = nfeatures)) + } + layer <- match.arg(arg = layer, choices = levels(x = object), several.ok = TRUE) - variable.features <- sapply( + # run variable features on each model + + vf.list <- sapply( X = layer, FUN = function(lyr) { return(VariableFeatures( object = components(object = object, model = lyr), - n = n, + nfeatures = nfeatures, ... )) }, simplify = FALSE, USE.NAMES = TRUE ) - if (length(x = variable.features) == 1L) { - if (isFALSE(x = simplify)) { - return(variable.features) - } - return(variable.features[[1L]]) + if (isFALSE(x = simplify)){ + return (vf.list) } - if (isTRUE(x = simplify)) { - return(Reduce(f = union, x = variable.features)) + var.features <- sort( + x = table(unlist(x = vf.list, use.names = FALSE)), + decreasing = TRUE + ) + for (i in 1:length(x = layer)) { + vst_out <- SCTModel_to_vst(SCTModel = slot(object = object, name = "SCTModel.list")[[layer[[i]]]]) + var.features <- var.features[names(x = var.features) %in% rownames(x = vst_out$gene_attr)] + } + tie.val <- var.features[min(nfeatures, length(x = var.features))] + features <- names(x = var.features[which(x = var.features > tie.val)]) + if (length(x = features) > 0) { + feature.ranks <- sapply(X = features, FUN = function(x) { + ranks <- sapply(X = vf.list, FUN = function(vf) { + if (x %in% vf) { + return(which(x = x == vf)) + } + return(NULL) + }) + median(x = unlist(x = ranks)) + }) + features <- names(x = sort(x = feature.ranks)) } - return(variable.features) + features.tie <- var.features[which(x = var.features == tie.val)] + tie.ranks <- sapply(X = names(x = features.tie), FUN = function(x) { + ranks <- sapply(X = vf.list, FUN = function(vf) { + if (x %in% vf) { + return(which(x = x == vf)) + } + return(NULL) + }) + median(x = unlist(x = ranks)) + }) + features <- c( + features, + names(x = head(x = sort(x = tie.ranks), nfeatures - length(x = features))) + ) + return(features) } #' @rdname ScaleFactors diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 9ea560498..cdeb8e30c 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1012,7 +1012,7 @@ VST.matrix <- function( #' @importFrom SeuratObject .CalcN -#' +#' CalcN <- function(object) { return(.CalcN(object)) } @@ -1662,7 +1662,8 @@ SCTransform.StdAssay <- function( } merged.assay <- merge(x = sct.assay.list[[1]], y = sct.assay.list[2:length(sct.assay.list)]) - VariableFeatures(object = merged.assay) <- intersect(x = var.features, y = rownames(x = GetAssayData(object = merged.assay, slot='scale.data'))) + #VariableFeatures(object = merged.assay) <- intersect(x = var.features, y = rownames(x = GetAssayData(object = merged.assay, slot='scale.data'))) + VariableFeatures(object = merged.assay) <- VariableFeatures(object = merged.assay, use.var.features = FALSE) # set the names of SCTmodels to be layer names models <- slot(object = merged.assay, name="SCTModel.list") names(models) <- names(x = sct.assay.list) @@ -2101,7 +2102,7 @@ FetchResidualSCTModel <- function(object, #' temporal function to get residuals from reference #' @importFrom sctransform get_residuals #' @importFrom Matrix colSums -#' +#' FetchResiduals_reference <- function(object, reference.SCT.model = NULL, @@ -2109,7 +2110,7 @@ FetchResiduals_reference <- function(object, verbose = FALSE) { features_to_compute <- features vst_out <- SCTModel_to_vst(SCTModel = reference.SCT.model) - + # override clip.range clip.range <- vst_out$arguments$sct.clip.range # get rid of the cell attributes @@ -2120,13 +2121,13 @@ FetchResiduals_reference <- function(object, ) vst_out$gene_attr <- vst_out$gene_attr[all.features, , drop = FALSE] vst_out$model_pars_fit <- vst_out$model_pars_fit[all.features, , drop = FALSE] - + clip.max <- max(clip.range) clip.min <- min(clip.range) - - + + umi <- object[features_to_compute, , drop = FALSE] - + ## Add cell_attr for missing cells cell_attr <- data.frame( umi = colSums(object), @@ -2134,11 +2135,11 @@ FetchResiduals_reference <- function(object, ) rownames(cell_attr) <- colnames(object) vst_out$cell_attr <- cell_attr - + if (verbose) { message("using reference sct model") } - + if (vst_out$arguments$min_variance == "umi_median"){ min_var <- min_var_custom } else { @@ -2152,7 +2153,7 @@ FetchResiduals_reference <- function(object, res_clip_range = c(clip.min, clip.max), verbosity = as.numeric(x = verbose) * 2 ) - + ref.residuals.mean <- vst_out$gene_attr[rownames(x = new_residual),"residual_mean"] new_residual <- sweep( x = new_residual, From 3289869ebb8a2f347d4f31fe0e6203cda95ae5d5 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 10 Jan 2023 18:32:25 -0500 Subject: [PATCH 366/979] fix project cell emb --- R/integration.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/integration.R b/R/integration.R index 1469668dd..c12f23520 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5079,6 +5079,7 @@ if (normalization.method == 'SCT') { reference.SCT.model = reference.SCT.model, features = features) } else { + query <- query[features,] reference.data <- GetAssayData( object = reference, assay = reference.assay, @@ -5161,6 +5162,7 @@ ProjectCellEmbeddings.IterableMatrix <- function( colnames(query)) )) } else { + query <- query[features,] reference.data <- LayerData(object = reference[[reference.assay]], layer = 'data')[features,] if (is.null(x = feature.mean)) { if (inherits(x = reference.data, what = 'dgCMatrix')) { @@ -5181,7 +5183,7 @@ ProjectCellEmbeddings.IterableMatrix <- function( feature.mean[is.na(x = feature.mean)] <- 1 } query.scale <- (query - feature.mean)/feature.sd - query.scale <- BPCells::min_scalar(mat = query.scale, val = 10) + #query.scale <- BPCells::min_scalar(mat = query.scale, val = 10) proj.pca <- t(query.scale) %*% Loadings(object = reference[[reduction]])[features,dims] rownames(proj.pca) <- colnames(query) colnames(proj.pca) <- colnames(Embeddings(object = reference[[reduction]]))[dims] From 478e7eac17e9986e58a0297f7a50b71fdb5d973e Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 11 Jan 2023 11:14:06 -0500 Subject: [PATCH 367/979] modify leverage score --- R/sketching.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/sketching.R b/R/sketching.R index 702d5c2ea..6e932695f 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -171,7 +171,7 @@ LeverageScore.default <- function( ) { # Check the dimensions of the object, nsketch, and ndims ncells <- ncol(x = object) - if (ncells < nsketch) { + if (ncells < nsketch*1.5) { Z <- irlba(A = object, nv = 50, nu = 0, verbose = FALSE)$v return(rowSums(x = Z ^ 2)) } @@ -233,7 +233,8 @@ LeverageScore.default <- function( )) Z <- object %*% (R.inv %*% JL) if (inherits(x = Z, what = 'IterableMatrix')) { - Z.score <- matrix_stats(matrix = Z^2, row_stats = 'mean')$row_stats['mean',]*ncol(x = Z) + Z.score <- matrix_stats(matrix = Z ^ 2, row_stats = 'mean' + )$row_stats['mean',]*ncol(x = Z) } else { Z.score <- rowSums(x = Z ^ 2) } From e7f0d8829b65b149c47471359be18df26a57f063 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Wed, 11 Jan 2023 11:34:34 -0500 Subject: [PATCH 368/979] fix vignettes --- vignettes/seurat5_cell_cycle_vignette.Rmd | 2 +- vignettes/seurat5_sctransform_v2_vignette.Rmd | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/vignettes/seurat5_cell_cycle_vignette.Rmd b/vignettes/seurat5_cell_cycle_vignette.Rmd index d67e2fed5..3f51104f0 100644 --- a/vignettes/seurat5_cell_cycle_vignette.Rmd +++ b/vignettes/seurat5_cell_cycle_vignette.Rmd @@ -48,7 +48,7 @@ s.genes <- cc.genes$s.genes g2m.genes <- cc.genes$g2m.genes # Create our Seurat object and complete the initalization steps -marrow <- CreateSeuratObject(counts = exp.mat) +marrow <- CreateSeuratObject(counts = as.matrix(exp.mat)) marrow <- NormalizeData(marrow) marrow <- FindVariableFeatures(marrow, selection.method = 'vst') marrow <- ScaleData(marrow, features = rownames(marrow)) diff --git a/vignettes/seurat5_sctransform_v2_vignette.Rmd b/vignettes/seurat5_sctransform_v2_vignette.Rmd index 3a6ba3faf..69a98b42a 100644 --- a/vignettes/seurat5_sctransform_v2_vignette.Rmd +++ b/vignettes/seurat5_sctransform_v2_vignette.Rmd @@ -87,9 +87,9 @@ InstallData("ifnb") ```{r init, results='hide', message=FALSE, fig.keep='none'} # load dataset -LoadData("ifnb") +ifnb <- LoadData("ifnb") ifnb <- UpdateSeuratObject(ifnb) -# ifnb[["RNA"]] <- CreateAssay5Object(ifnb[["RNA"]]@counts) +ifnb[["RNA"]] <- CreateAssay5Object(ifnb[["RNA"]]@counts) # split the dataset into a list of two seurat objects (stim and CTRL) ifnb.list <- SplitObject(ifnb, split.by = "stim") From f275663b8926a373fb9d6c917c6d9d3b2e38f404 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Wed, 11 Jan 2023 11:56:25 -0500 Subject: [PATCH 369/979] add layer to cell cycle vignette --- vignettes/seurat5_cell_cycle_vignette.Rmd | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vignettes/seurat5_cell_cycle_vignette.Rmd b/vignettes/seurat5_cell_cycle_vignette.Rmd index 3f51104f0..59e6a7fc8 100644 --- a/vignettes/seurat5_cell_cycle_vignette.Rmd +++ b/vignettes/seurat5_cell_cycle_vignette.Rmd @@ -57,7 +57,7 @@ marrow <- ScaleData(marrow, features = rownames(marrow)) If we run a PCA on our object, using the variable genes we found in `FindVariableFeatures()` above, we see that while most of the variance can be explained by lineage, PC8 and PC10 are split on cell-cycle genes including *TOP2A* and *MKI67*. We will attempt to regress this signal from the data, so that cell-cycle heterogeneity does not contribute to PCA or downstream analysis. ```{r justification, message=TRUE} -marrow <- RunPCA(marrow, features = VariableFeatures(marrow), ndims.print = 6:10, +marrow <- RunPCA(marrow, features = VariableFeatures(marrow, layer = 'counts'), ndims.print = 6:10, nfeatures.print = 10) DimHeatmap(marrow, dims = c(8, 10)) ``` @@ -104,7 +104,7 @@ marrow <- ScaleData(marrow, vars.to.regress = c('S.Score', 'G2M.Score'), feature ```{r pca2, message=TRUE} # Now, a PCA on the variable genes no longer returns components associated with cell cycle -marrow <- RunPCA(marrow, features = VariableFeatures(marrow), nfeatures.print = 10) +marrow <- RunPCA(marrow, features = VariableFeatures(marrow, layer = 'counts'), nfeatures.print = 10) ``` ```{r pca3} @@ -128,7 +128,7 @@ marrow <- ScaleData(marrow, vars.to.regress = 'CC.Difference', features = rownam ```{r pca4, message=TRUE} #cell cycle effects strongly mitigated in PCA -marrow <- RunPCA(marrow, features = VariableFeatures(marrow), nfeatures.print = 10) +marrow <- RunPCA(marrow, features = VariableFeatures(marrow, layer = 'counts'), nfeatures.print = 10) ``` ```{r pca5} From 529be2853a074371b5c493c517e6704c8e9d2d24 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Thu, 12 Jan 2023 09:49:09 -0500 Subject: [PATCH 370/979] update spatial imaging vignette --- vignettes/seurat5_spatial_vignette_2.Rmd | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/vignettes/seurat5_spatial_vignette_2.Rmd b/vignettes/seurat5_spatial_vignette_2.Rmd index 90bbee2a3..fdb11f529 100644 --- a/vignettes/seurat5_spatial_vignette_2.Rmd +++ b/vignettes/seurat5_spatial_vignette_2.Rmd @@ -45,6 +45,7 @@ First, we load the packages necessary for this vignette. ```{r init, message=FALSE, warning=FALSE} library(Seurat) +options(Seurat.object.assay.version = "v5") library(future) plan("multisession", workers = 10) ``` @@ -96,7 +97,7 @@ head(FetchData(vizgen.obj[["s2r1"]][["molecules"]], vars="Chrm1")) We start by performing a standard unsupervised clustering analysis, essentially first treating the dataset as an scRNA-seq experiment. We use SCTransform-based normalization, though we slightly modify the default clipping parameters to mitigate the effect of outliers that we occasionally observe in smFISH experiments. After normalization, we can run dimensional reduction and clustering. ```{r analysis, results='hide'} -vizgen.obj <- SCTransform(vizgen.obj, assay = "Vizgen", clip.range = c(-10,10),) +vizgen.obj <- SCTransform(vizgen.obj, assay = "Vizgen", clip.range = c(-10,10)) vizgen.obj <- RunPCA(vizgen.obj, npcs = 30, features = rownames(vizgen.obj)) vizgen.obj <- RunUMAP(vizgen.obj, dims = 1:30) vizgen.obj <- FindNeighbors(vizgen.obj, reduction = "pca", dims = 1:30) @@ -214,6 +215,7 @@ nano.obj <- LoadNanostring(data.dir = "/brahms/hartmana/spatial_vignette_data/na # add in precomputed Azimuth annotations azimuth.data <- readRDS("/brahms/hartmana/spatial_vignette_data/nanostring_data.Rds") nano.obj <- AddMetaData(nano.obj, metadata = azimuth.data$annotations) +azimuth.data$umap@assay.used <- "Nanostring" nano.obj[["proj.umap"]] <- azimuth.data$umap Idents(nano.obj) <- nano.obj$predicted.annotation.l1 @@ -248,7 +250,7 @@ ImageDimPlot(nano.obj, fov = "lung5.rep1", cells = WhichCells(nano.obj, idents=c We can also visualize gene expression markers a few different ways: ```{r, fig.width=10, fig.height=5} -VlnPlot(nano.obj, features = "KRT17", slot = "counts", pt.size = 0.1, y.max = 30) + NoLegend() +VlnPlot(nano.obj, features = "KRT17", layer = "counts", pt.size = 0.1, y.max = 30) + NoLegend() ``` ```{r, fig.width=5, fig.height=4} @@ -338,3 +340,7 @@ Each of these datasets represents an opportunity to learn organizing principles sessionInfo() ```
    + +```{r save.times, include=TRUE} +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/seurat5_spatial_vignette_2.csv") +``` From 7e579c4eb98b6a7d400e17ddaa8c385584b3abd9 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Thu, 12 Jan 2023 11:30:53 -0500 Subject: [PATCH 371/979] FindTransferAnchors query assay assignment --- R/integration.R | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/R/integration.R b/R/integration.R index 76fe767ec..a2d5fddad 100644 --- a/R/integration.R +++ b/R/integration.R @@ -841,10 +841,13 @@ FindTransferAnchors <- function( ) feature.mean <- "SCT" } + # make new query assay w same name as reference assay + query[[reference.assay]] <- query[[query.assay]] + DefaultAssay(query) <- reference.assay # only keep necessary info from objects query <- DietSeurat( object = query, - assays = query.assay, + assays = reference.assay, dimreducs = reference.reduction, features = features, scale.data = TRUE @@ -1077,7 +1080,7 @@ FindTransferAnchors <- function( } else { projected.lsi <- ProjectSVD( reduction = reference[[reference.reduction]], - data = GetAssayData(object = query, assay = query.assay, slot = "data"), + data = GetAssayData(object = query, assay = reference.assay, slot = "data"), mode = "lsi", do.center = FALSE, do.scale = FALSE, @@ -1137,7 +1140,7 @@ FindTransferAnchors <- function( } anchors <- FindAnchors( object.pair = combined.ob, - assay = c(reference.assay, query.assay), + assay = reference.assay, slot = "data", cells1 = colnames(x = reference), cells2 = colnames(x = query), @@ -1158,7 +1161,7 @@ FindTransferAnchors <- function( verbose = verbose ) reductions <- slot(object = combined.ob, name = "reductions") - for (i in unique(x = c(reference.assay, query.assay))) { + for (i in unique(x = c(reference.assay))) { dummy.assay <- paste0(i, "DUMMY") suppressWarnings( expr = combined.ob[[dummy.assay]] <- CreateDummyAssay(assay = combined.ob[[i]]) From 440e41ce4470c3625f571fe68e4ad2a35bf185b2 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 12 Jan 2023 14:59:35 -0500 Subject: [PATCH 372/979] bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e02c8655b..807c27f1d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9014 -Date: 2022-12-06 +Version: 4.9.9.9015 +Date: 2023-01-12 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From 880df8e9e56b3f695356366b9bd63b49455fd8d8 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 12 Jan 2023 15:02:14 -0500 Subject: [PATCH 373/979] bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 983441d0e..a68af7c8f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9019 -Date: 2023-01-10 +Version: 4.9.9.9020 +Date: 2023-01-12 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From 0cb0401bf1a9dde35fe2d73c5ad8a3229d7da8fe Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 12 Jan 2023 17:31:08 -0500 Subject: [PATCH 374/979] SCT mapping --- R/integration.R | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/R/integration.R b/R/integration.R index c1bb25786..9b231ce62 100644 --- a/R/integration.R +++ b/R/integration.R @@ -804,7 +804,7 @@ FindTransferAnchors <- function( reduction.2 <- character() feature.mean <- NULL reference.reduction.init <- reference.reduction - if (normalization.method == "SCT") { + if (normalization.method == "SCT" && !inherits(x = query[[query.assay]]$counts, what = 'IterableMatrix')) { # ensure all residuals required are computed query <- suppressWarnings(expr = GetResidual(object = query, assay = query.assay, features = features, verbose = FALSE)) if (is.null(x = reference.reduction)) { @@ -842,7 +842,7 @@ FindTransferAnchors <- function( feature.mean <- "SCT" } # make new query assay w same name as reference assay - query[[reference.assay]] <- query[[query.assay]] + suppressWarnings(expr = query[[reference.assay]] <- query[[query.assay]]) DefaultAssay(query) <- reference.assay # only keep necessary info from objects query <- DietSeurat( @@ -858,7 +858,6 @@ FindTransferAnchors <- function( warnings("reference assay is diffrent from the assay.used in", reference.reduction) slot(object = reference[[reference.reduction]], name = "assay.used") <- reference.assay } - reference <- DietSeurat( object = reference, assays = reference.assay, @@ -930,6 +929,7 @@ FindTransferAnchors <- function( projected.pca <- ProjectCellEmbeddings( reference = reference, reduction = reference.reduction, + normalization.method = normalization.method, query = query, scale = scale, dims = dims, @@ -5842,16 +5842,20 @@ ValidateParams_FindTransferAnchors <- function( "you can set recompute.residuals to FALSE", call. = FALSE) } } - query <- SCTransform( - object = query, - reference.SCT.model = slot(object = reference[[reference.assay]], name = "SCTModel.list")[[1]], - residual.features = features, - assay = query.umi.assay, - new.assay.name = new.sct.assay, - verbose = FALSE - ) - ModifyParam(param = "query.assay", value = new.sct.assay) - ModifyParam(param = "query", value = query) + if (inherits(x = query[[query.umi.assay]]$counts, what = 'IterableMatrix')) { + query[[query.umi.assay]]$scale.data <- query[[query.umi.assay]]$counts + } else { + query <- SCTransform( + object = query, + reference.SCT.model = slot(object = reference[[reference.assay]], name = "SCTModel.list")[[1]], + residual.features = features, + assay = query.umi.assay, + new.assay.name = new.sct.assay, + verbose = FALSE + ) + ModifyParam(param = "query.assay", value = new.sct.assay) + ModifyParam(param = "query", value = query) + } ModifyParam(param = "reference", value = reference) } if (IsSCT(assay = reference[[reference.assay]]) && normalization.method == "LogNormalize") { From ed50f495a181dd0ea63ad95750797c84c920bbfb Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 12 Jan 2023 22:23:40 -0500 Subject: [PATCH 375/979] refactor SCT FindTransferAnchor --- R/integration.R | 84 ++++++++++++++++++++----------------------------- R/objects.R | 3 ++ 2 files changed, 37 insertions(+), 50 deletions(-) diff --git a/R/integration.R b/R/integration.R index 9b231ce62..7c310d351 100644 --- a/R/integration.R +++ b/R/integration.R @@ -751,7 +751,6 @@ FindTransferAnchors <- function( reference.assay = NULL, reference.neighbors = NULL, query.assay = NULL, - query.layers = NULL, reduction = "pcaproject", reference.reduction = NULL, project.query = FALSE, @@ -780,7 +779,6 @@ FindTransferAnchors <- function( reference.assay = reference.assay, reference.neighbors = reference.neighbors, query.assay = query.assay, - query.layers = query.layers, reduction = reduction, reference.reduction = reference.reduction, project.query = project.query, @@ -804,42 +802,34 @@ FindTransferAnchors <- function( reduction.2 <- character() feature.mean <- NULL reference.reduction.init <- reference.reduction - if (normalization.method == "SCT" && !inherits(x = query[[query.assay]]$counts, what = 'IterableMatrix')) { - # ensure all residuals required are computed - query <- suppressWarnings(expr = GetResidual(object = query, assay = query.assay, features = features, verbose = FALSE)) + if (normalization.method == "SCT") { if (is.null(x = reference.reduction)) { - reference <- suppressWarnings(expr = GetResidual(object = reference, assay = reference.assay, features = features, verbose = FALSE)) + reference <- suppressWarnings(expr = GetResidual( + object = reference, + assay = reference.assay, + features = features, + verbose = FALSE + )) + reference <- ScaleData( + object = reference, + features = features, + do.scale = FALSE, + verbose = FALSE + ) features <- intersect( x = features, - y = intersect( - x = rownames(x = GetAssayData(object = query[[query.assay]], slot = "scale.data")), - y = rownames(x = GetAssayData(object = reference[[reference.assay]], slot = "scale.data")) - ) + y = rownames(reference[[reference.assay]]$scale.data) ) - reference[[reference.assay]] <- as( - object = CreateAssayObject( - data = GetAssayData(object = reference[[reference.assay]], slot = "scale.data")[features, ]), - Class = "SCTAssay" - ) - reference <- SetAssayData( - object = reference, - slot = "scale.data", - assay = reference.assay, - new.data = as.matrix(x = GetAssayData(object = reference[[reference.assay]], slot = "data")) - ) - } - query[[query.assay]] <- as( - object = CreateAssayObject( - data = GetAssayData(object = query[[query.assay]], slot = "scale.data")[features, ]), - Class = "SCTAssay" - ) - query <- SetAssayData( - object = query, - slot = "scale.data", - assay = query.assay, - new.data = as.matrix(x = GetAssayData(object = query[[query.assay]], slot = "data")) - ) - feature.mean <- "SCT" + VariableFeatures(reference) <- features + } + if (IsSCT(assay = query[[query.assay]])) { + query <- suppressWarnings(expr = GetResidual( + object = query, + assay = query.assay, + features = features, + verbose = FALSE + )) + } } # make new query assay w same name as reference assay suppressWarnings(expr = query[[reference.assay]] <- query[[query.assay]]) @@ -5707,7 +5697,6 @@ ValidateParams_FindTransferAnchors <- function( reference.assay, reference.neighbors, query.assay, - query.layers, reduction, reference.reduction, project.query, @@ -5780,6 +5769,10 @@ ValidateParams_FindTransferAnchors <- function( stop("The project.query workflow is not compatible with reduction = 'cca'", call. = FALSE) } + if (normalization.method == "SCT" && isTRUE(x = project.query) && !IsSCT(query[[query.assay]])) { + stop("In the project.query workflow, normalization is SCT, but query is not SCT normalized", + call. = FALSE) + } if (IsSCT(assay = query[[query.assay]]) && IsSCT(assay = reference[[reference.assay]]) && normalization.method != "SCT") { warning("Both reference and query assays have been processed with SCTransform.", @@ -5804,6 +5797,8 @@ ValidateParams_FindTransferAnchors <- function( ModifyParam(param = "recompute.residuals", value = recompute.residuals) } if (recompute.residuals) { + # recompute.residuals only happens in ProjectCellEmbeddings, so k.filter set to NA. + ModifyParam(param = "k.filter", value = NA) reference.model.num <- length(x = slot(object = reference[[reference.assay]], name = "SCTModel.list")) if (reference.model.num > 1) { stop("Given reference assay (", reference.assay, ") has ", reference.model.num , @@ -5842,20 +5837,9 @@ ValidateParams_FindTransferAnchors <- function( "you can set recompute.residuals to FALSE", call. = FALSE) } } - if (inherits(x = query[[query.umi.assay]]$counts, what = 'IterableMatrix')) { - query[[query.umi.assay]]$scale.data <- query[[query.umi.assay]]$counts - } else { - query <- SCTransform( - object = query, - reference.SCT.model = slot(object = reference[[reference.assay]], name = "SCTModel.list")[[1]], - residual.features = features, - assay = query.umi.assay, - new.assay.name = new.sct.assay, - verbose = FALSE - ) - ModifyParam(param = "query.assay", value = new.sct.assay) - ModifyParam(param = "query", value = query) - } + DefaultAssay(query) <- query.umi.assay + ModifyParam(param = "query.assay", value = query.umi.assay) + ModifyParam(param = "query", value = query) ModifyParam(param = "reference", value = reference) } if (IsSCT(assay = reference[[reference.assay]]) && normalization.method == "LogNormalize") { @@ -5868,7 +5852,7 @@ ValidateParams_FindTransferAnchors <- function( call. = FALSE) } # features must be in both reference and query - feature.slot <- ifelse(test = normalization.method == "SCT", yes = "scale.data", no = "data") + feature.slot <- 'data' query.assay.check <- query.assay reference.assay.check <- reference.assay ref.features <- rownames(x = GetAssayData(object = reference[[reference.assay.check]], slot = feature.slot)) diff --git a/R/objects.R b/R/objects.R index d7781d4b6..5175d0987 100644 --- a/R/objects.R +++ b/R/objects.R @@ -1966,6 +1966,9 @@ VariableFeatures.SCTAssay <- function( x = table(unlist(x = vf.list, use.names = FALSE)), decreasing = TRUE ) + if (length(x = var.features) == 0) { + return(NULL) + } for (i in 1:length(x = layer)) { vst_out <- SCTModel_to_vst(SCTModel = slot(object = object, name = "SCTModel.list")[[layer[[i]]]]) var.features <- var.features[names(x = var.features) %in% rownames(x = vst_out$gene_attr)] From 0d1010624c083e9e89333f54ecdb2a93e5409bbc Mon Sep 17 00:00:00 2001 From: Gesmira Date: Thu, 19 Jan 2023 12:52:46 -0500 Subject: [PATCH 376/979] v5 integration vignettes --- .../seurat5_integration_introduction.Rmd | 118 +-- .../seurat5_integration_introduction.html | 809 ++++++++++++++++++ vignettes/seurat5_integration_mapping.Rmd | 67 +- vignettes/seurat5_integration_mapping.html | 703 +++++++++++++++ vignettes/seurat5_integration_rpca.Rmd | 105 +-- 5 files changed, 1665 insertions(+), 137 deletions(-) create mode 100644 vignettes/seurat5_integration_introduction.html create mode 100644 vignettes/seurat5_integration_mapping.html diff --git a/vignettes/seurat5_integration_introduction.Rmd b/vignettes/seurat5_integration_introduction.Rmd index 904e4a71f..67588f46e 100644 --- a/vignettes/seurat5_integration_introduction.Rmd +++ b/vignettes/seurat5_integration_introduction.Rmd @@ -68,35 +68,34 @@ InstallData('ifnb') ```{r init, results='hide', message=FALSE, fig.keep='none'} # load dataset -LoadData('ifnb') +data('ifnb') ifnb <- UpdateSeuratObject(ifnb) ifnb[["RNA"]] <- CreateAssay5Object(ifnb[["RNA"]]@counts) -# split the dataset into a list of two seurat objects (stim and CTRL) -ifnb.list <- SplitObject(ifnb, split.by = "stim") +# split the dataset into layers (stim and CTRL) +ifnb[["RNA"]] <- split(ifnb[["RNA"]], f = ifnb$stim) # normalize and identify variable features for each dataset independently -ifnb.list <- lapply(X = ifnb.list, FUN = function(x) { - x <- NormalizeData(x) - x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000) -}) - -# select features that are repeatedly variable across datasets for integration -features <- SelectIntegrationFeatures(object.list = ifnb.list) +ifnb <- NormalizeData(ifnb) +ifnb <- FindVariableFeatures(ifnb, selection.method = "vst", nfeatures = 2000) + +# these two now are run before +ifnb <- ScaleData(ifnb) +ifnb <- RunPCA(ifnb) +# # select features that are repeatedly variable across datasets for integration +# features <- SelectIntegrationFeatures(object.list = ifnb.list) +ifnb ``` ## Perform integration -We then identify anchors using the `FindIntegrationAnchors()` function, which takes a list of Seurat objects as input, and use these anchors to integrate the two datasets together with `IntegrateData()`. - -```{r find.anchors} -immune.anchors <- FindIntegrationAnchors(object.list = ifnb.list, anchor.features = features) +We then identify anchors using the `FindIntegrationAnchors()` function (not any more), which takes a list of Seurat objects as input, and use these anchors to integrate the two layers together with `IntegrateLayers()`. +```{r} +ifnb <- IntegrateLayers(object = ifnb, + method = CCAIntegration, + verbose = F) ``` -```{r integrate.data} -# this command creates an 'integrated' data assay -immune.combined <- IntegrateData(anchorset = immune.anchors) -``` ## Perform an integrated analysis @@ -105,27 +104,25 @@ Now we can run a single integrated analysis on all cells! ```{r clustering, results='hide', message=FALSE} # specify that we will perform downstream analysis on the corrected data # note that the original unmodified data still resides in the 'RNA' assay -DefaultAssay(immune.combined) <- "integrated" # Run the standard workflow for visualization and clustering -immune.combined <- ScaleData(immune.combined, verbose = FALSE) -immune.combined <- RunPCA(immune.combined, npcs = 30, verbose = FALSE) -immune.combined <- RunUMAP(immune.combined, reduction = "pca", dims = 1:30) -immune.combined <- FindNeighbors(immune.combined, reduction = "pca", dims = 1:30) -immune.combined <- FindClusters(immune.combined, resolution = 0.5) +# use integrated.dr here instead of pca +ifnb <- RunUMAP(ifnb, reduction = "integrated.dr", dims = 1:30) +ifnb <- FindNeighbors(ifnb, reduction = "integrated.dr", dims = 1:30) +ifnb <- FindClusters(ifnb, resolution = 0.5) ``` ```{r viz, results='hide', message=FALSE} # Visualization -p1 <- DimPlot(immune.combined, reduction = "umap", group.by = "stim") -p2 <- DimPlot(immune.combined, reduction = "umap", label = TRUE, repel = TRUE) +p1 <- DimPlot(ifnb, reduction = "umap", group.by = "stim") +p2 <- DimPlot(ifnb, reduction = "umap", label = TRUE, repel = TRUE) p1 + p2 ``` To visualize the two conditions side-by-side, we can use the `split.by` argument to show each condition colored by cluster. ```{r split.dim} -DimPlot(immune.combined, reduction = "umap", split.by = "stim") +DimPlot(ifnb, reduction = "umap", split.by = "stim") ``` ## Identify conserved cell type markers @@ -134,35 +131,39 @@ To identify canonical cell type marker genes that are conserved across condition ```{r conserved.markers, warning=FALSE} # For performing differential expression after integration, we switch back to the original data -DefaultAssay(immune.combined) <- "RNA" -nk.markers <- FindConservedMarkers(immune.combined, ident.1 = 6, grouping.var = "stim", verbose = FALSE) +DefaultAssay(ifnb) <- "RNA" +# Join Data Layers across stimualtions +ifnb[['RNA']] <- JoinLayers(ifnb[["RNA"]], search = "data", new = "data") +nk.markers <- FindConservedMarkers(ifnb, ident.1 = 6, grouping.var = "stim", verbose = FALSE) head(nk.markers) ``` We can explore these marker genes for each cluster and use them to annotate our clusters as specific cell types. ```{r annotate, results = 'hide', message=FALSE, fig.height = 8} -FeaturePlot(immune.combined, features = c("CD3D", "SELL", "CREM", "CD8A", "GNLY", "CD79A", "FCGR3A", "CCL2", "PPBP"), min.cutoff = "q9") -immune.combined <- RenameIdents(immune.combined, "0" = "CD14 Mono", "1" = "CD4 Naive T", "2" = "CD4 Memory T", "3" = "CD16 Mono", "4" = "B", "5" = "CD8 T", "6" = "NK" , "7" = "T activated", "8" = "DC", "9" = "B Activated", "10" = "Mk", "11" = "pDC", "12" = "Eryth", "13" = "Mono/Mk Doublets", "14" = "HSPC") -DimPlot(immune.combined, label = TRUE) +FeaturePlot(ifnb, features = c("CD3D", "SELL", "CREM", "CD8A", "GNLY", "CD79A", "FCGR3A", "CCL2", "PPBP"), min.cutoff = "q9") +ifnb <- RenameIdents(ifnb, `0` = "CD14 Mono", `1` = "CD4 Naive T", `2` = "CD4 Memory T", + `3` = "CD16 Mono", `4` = "B", `5` = "CD8 T", `6` = "NK", `7` = "T activated", `8` = "DC", `9` = "B Activated", + `10` = "Mk", `11` = "pDC", `12` = "Eryth", `13` = "Mono/Mk Doublets", `14` = "HSPC") +DimPlot(ifnb, label = TRUE) ``` The `DotPlot()` function with the `split.by` parameter can be useful for viewing conserved cell type markers across conditions, showing both the expression level and the percentage of cells in a cluster expressing any given gene. Here we plot 2-3 strong marker genes for each of our 14 clusters. ```{r splitdotplot, fig.height = 10} -Idents(immune.combined) <- factor( - Idents(immune.combined), +Idents(ifnb) <- factor( + Idents(ifnb), levels = c("HSPC", "Mono/Mk Doublets", "pDC", "Eryth","Mk", "DC", "CD14 Mono", "CD16 Mono", "B Activated", "B", "CD8 T", "NK", "T activated", "CD4 Naive T", "CD4 Memory T")) markers.to.plot <- c("CD3D","CREM","HSPH1","SELL","GIMAP5","CACYBP","GNLY","NKG7","CCL5","CD8A","MS4A1","CD79A","MIR155HG","NME1","FCGR3A","VMO1","CCL2","S100A9","HLA-DQA1","GPR183","PPBP","GNG11","HBA2","HBB","TSPAN13","IL3RA","IGJ","PRSS57") -DotPlot(immune.combined, features = markers.to.plot, cols = c('blue', 'red'), dot.scale = 8, split.by = "stim") + RotatedAxis() +DotPlot(ifnb, features = markers.to.plot, cols = c('blue', 'red'), dot.scale = 8, split.by = "stim") + RotatedAxis() ``` ```{r save.img, include=TRUE} library(ggplot2) -plot <- DotPlot(immune.combined, features = markers.to.plot, cols = c('blue', 'red'), +plot <- DotPlot(ifnb, features = markers.to.plot, cols = c('blue', 'red'), dot.scale = 6, split.by = "stim") + RotatedAxis() -ggsave(filename = "../output/images/pbmc_alignment.jpg", height = 7, width = 12, plot = plot, quality = 50) +#ggsave(filename = "../output/images/pbmc_alignment.jpg", height = 7, width = 12, plot = plot, quality = 50) ``` ### Identify differential expressed genes across conditions @@ -173,12 +174,12 @@ Now that we've aligned the stimulated and control cells, we can start to do comp library(ggplot2) library(cowplot) theme_set(theme_cowplot()) -t.cells <- subset(immune.combined, idents = "CD4 Naive T") +t.cells <- subset(ifnb, idents = "CD4 Naive T") Idents(t.cells) <- "stim" avg.t.cells <- as.data.frame(log1p(AverageExpression(t.cells, verbose = FALSE)$RNA)) avg.t.cells$gene <- rownames(avg.t.cells) -cd14.mono <- subset(immune.combined, idents = "CD14 Mono") +cd14.mono <- subset(ifnb, idents = "CD14 Mono") Idents(cd14.mono) <- "stim" avg.cd14.mono <- as.data.frame(log1p(AverageExpression(cd14.mono, verbose = FALSE)$RNA)) avg.cd14.mono$gene <- rownames(avg.cd14.mono) @@ -196,26 +197,26 @@ As you can see, many of the same genes are upregulated in both of these cell typ Because we are confident in having identified common cell types across condition, we can ask what genes change in different conditions for cells of the same type. First, we create a column in the meta.data slot to hold both the cell type and stimulation information and switch the current ident to that column. Then we use `FindMarkers()` to find the genes that are different between stimulated and control B cells. Notice that many of the top genes that show up here are the same as the ones we plotted earlier as core interferon response genes. Additionally, genes like CXCL10 which we saw were specific to monocyte and B cell interferon response show up as highly significant in this list as well. ```{r de.genes} -immune.combined$celltype.stim <- paste(Idents(immune.combined), immune.combined$stim, sep = "_") -immune.combined$celltype <- Idents(immune.combined) -Idents(immune.combined) <- "celltype.stim" -b.interferon.response <- FindMarkers(immune.combined, ident.1 = "B_STIM", ident.2 = "B_CTRL", verbose = FALSE) +ifnb$celltype.stim <- paste(Idents(ifnb), ifnb$stim, sep = "_") +ifnb$celltype <- Idents(ifnb) +Idents(ifnb) <- "celltype.stim" +b.interferon.response <- FindMarkers(ifnb, ident.1 = "B_STIM", ident.2 = "B_CTRL", verbose = FALSE) head(b.interferon.response, n = 15) ``` Another useful way to visualize these changes in gene expression is with the `split.by` option to the `FeaturePlot()` or `VlnPlot()` function. This will display FeaturePlots of the list of given genes, split by a grouping variable (stimulation condition here). Genes such as CD3D and GNLY are canonical cell type markers (for T cells and NK/CD8 T cells) that are virtually unaffected by interferon stimulation and display similar gene expression patterns in the control and stimulated group. IFI6 and ISG15, on the other hand, are core interferon response genes and are upregulated accordingly in all cell types. Finally, CD14 and CXCL10 are genes that show a cell type specific interferon response. CD14 expression decreases after stimulation in CD14 monocytes, which could lead to misclassification in a supervised analysis framework, underscoring the value of integrated analysis. CXCL10 shows a distinct upregulation in monocytes and B cells after interferon stimulation but not in other cell types. ```{r feature.heatmaps, fig.height = 14} -FeaturePlot(immune.combined, features = c("CD3D", "GNLY", "IFI6"), split.by = "stim", max.cutoff = 3, cols = c("grey", "red")) +FeaturePlot(ifnb, features = c("CD3D", "GNLY", "IFI6"), split.by = "stim", max.cutoff = 3, cols = c("grey", "red")) ``` ```{r splitvln, fig.height = 12} -plots <- VlnPlot(immune.combined, features = c("LYZ", "ISG15", "CXCL10"), split.by = "stim", group.by = "celltype", pt.size = 0, combine = FALSE) +plots <- VlnPlot(ifnb, features = c("LYZ", "ISG15", "CXCL10"), split.by = "stim", group.by = "celltype", pt.size = 0, combine = FALSE) wrap_plots(plots = plots, ncol = 1) ``` ```{r save, include=TRUE} -saveRDS(immune.combined, file = "../output/immune.combined.rds") +#saveRDS(ifnb, file = "../output/ifnb.rds") ``` # Performing integration on datasets normalized with SCTransform @@ -233,32 +234,35 @@ Below, we demonstrate how to modify the Seurat integration workflow for datasets ```{r panc8.cca.sct.init, results='hide', message=FALSE, fig.keep='none'} LoadData('ifnb') -ifnb.list <- SplitObject(ifnb, split.by = "stim") -ifnb.list <- lapply(X = ifnb.list, FUN = SCTransform) -features <- SelectIntegrationFeatures(object.list = ifnb.list, nfeatures = 3000) -ifnb.list <- PrepSCTIntegration(object.list = ifnb.list, anchor.features = features) +ifnb <- UpdateSeuratObject(ifnb) +ifnb[["RNA"]] <- CreateAssay5Object(ifnb[["RNA"]]@counts) +ifnb[["RNA"]] <- split(ifnb[["RNA"]], f = ifnb$stim) + +ifnb <- SCTransform(ifnb) +ifnb <- RunPCA(ifnb) ``` ```{r ifnb.cca.sct.anchors} -immune.anchors <- FindIntegrationAnchors(object.list = ifnb.list, normalization.method = 'SCT', anchor.features = features) -immune.combined.sct <- IntegrateData(anchorset = immune.anchors, normalization.method = 'SCT') +ifnb <- IntegrateLayers(object = ifnb, + method = CCAIntegration, + normalization.method = "SCT", + verbose = F) ``` ```{r ifnb.cca.sct.clustering, results='hide', message=FALSE} -immune.combined.sct <- RunPCA(immune.combined.sct, verbose = FALSE) -immune.combined.sct <- RunUMAP(immune.combined.sct, reduction = "pca", dims = 1:30) +ifnb <- RunUMAP(ifnb, reduction = "integrated.dr", dims = 1:30) ``` ```{r immunesca.cca.sct.split.dims} -p1 <- DimPlot(immune.combined.sct, reduction = "umap", group.by = "stim") -p2 <- DimPlot(immune.combined.sct, reduction = "umap", group.by = 'seurat_annotations',label = TRUE, repel = TRUE) +p1 <- DimPlot(ifnb, reduction = "umap", group.by = "stim") +p2 <- DimPlot(ifnb, reduction = "umap", group.by = 'seurat_annotations',label = TRUE, repel = TRUE) p1 + p2 ``` Now that the datasets have been integrated, you can follow the previous steps in this vignette identify cell types and cell type-specific responses. ```{r save.times, include=TRUE} -write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/seurat5_integration_introduction.csv") +#write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/seurat5_integration_introduction.csv") ```
    diff --git a/vignettes/seurat5_integration_introduction.html b/vignettes/seurat5_integration_introduction.html new file mode 100644 index 000000000..41b05d1ca --- /dev/null +++ b/vignettes/seurat5_integration_introduction.html @@ -0,0 +1,809 @@ + + + + + + + + + + + + + +Introduction to scRNA-seq integration + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + +
    all_times <- list()  # store the time for each chunk
    +knitr::knit_hooks$set(time_it = local({
    +  now <- NULL
    +  function(before, options) {
    +    if (before) {
    +      now <<- Sys.time()
    +    } else {
    +      res <- difftime(Sys.time(), now, units = "secs")
    +      all_times[[options$label]] <<- res
    +    }
    +  }
    +}))
    +knitr::opts_chunk$set(
    +  tidy = TRUE,
    +  tidy.opts = list(width.cutoff = 95),
    +  fig.width = 10,
    +  message = FALSE,
    +  warning = FALSE,
    +  time_it = TRUE,
    +  error = TRUE
    +)
    +
    +

    Introduction to scRNA-seq integration

    +

    The joint analysis of two or more single-cell datasets poses unique +challenges. In particular, identifying cell populations that are present +across multiple datasets can be problematic under standard workflows. +Seurat v4 includes a set of methods to match (or ‘align’) shared cell +populations across datasets. These methods first identify cross-dataset +pairs of cells that are in a matched biological state (‘anchors’), can +be used both to correct for technical differences between datasets +(i.e. batch effect correction), and to perform comparative scRNA-seq +analysis of across experimental conditions.

    +

    Below, we demonstrate methods for scRNA-seq integration as described +in Stuart*, +Butler* et al, 2019 to perform a comparative analysis of human +immune cells (PBMC) in either a resting or +interferon-stimulated state.

    +
    +

    Integration goals

    +

    The following tutorial is designed to give you an overview of the +kinds of comparative analyses on complex cell types that are possible +using the Seurat integration procedure. Here, we address a few key +goals:

    +
      +
    • Create an ‘integrated’ data assay for downstream analysis
    • +
    • Identify cell types that are present in both datasets
    • +
    • Obtain cell type markers that are conserved in both control and +stimulated cells
    • +
    • Compare the datasets to find cell-type specific responses to +stimulation
    • +
    +
    +
    +

    Setup the Seurat objects

    +

    For convenience, we distribute this dataset through our SeuratData +package.

    +
    options(SeuratData.repo.use = "http://satijalab04.nygenome.org")
    +
    library(Seurat)
    +options(Seurat.object.assay.version = "v5")
    +library(SeuratData)
    +library(patchwork)
    +
    # install dataset
    +InstallData("ifnb")
    +
    # load dataset
    +LoadData("ifnb")
    +
    ## Error in slot(object = object, name = s): no slot of name "images" for this object of class "Seurat"
    +
    ifnb <- UpdateSeuratObject(ifnb)
    +ifnb[["RNA"]] <- CreateAssay5Object(ifnb[["RNA"]]@counts)
    +
    +# split the dataset into layers (stim and CTRL)
    +ifnb[["RNA"]] <- split(ifnb[["RNA"]], f = ifnb$stim)
    +
    +# normalize and identify variable features for each dataset independently
    +ifnb <- NormalizeData(ifnb)
    +ifnb <- FindVariableFeatures(ifnb, selection.method = "vst", nfeatures = 2000)
    +features <- VariableFeatures(ifnb)
    +
    +# these two now are run before
    +ifnb <- ScaleData(ifnb)
    +ifnb <- RunPCA(ifnb)
    +# # select features that are repeatedly variable across datasets for integration features <-
    +# SelectIntegrationFeatures(object.list = ifnb.list)
    +ifnb
    +
    +
    +

    Perform integration

    +

    We then identify anchors using the +FindIntegrationAnchors() function (not any more), which +takes a list of Seurat objects as input, and use these anchors to +integrate the two layers together with +IntegrateLayers().

    +
    ifnb <- IntegrateLayers(object = ifnb, method = CCAIntegration, features = features, verbose = F)
    +
    +
    +

    Perform an integrated analysis

    +

    Now we can run a single integrated analysis on all cells!

    +
    # specify that we will perform downstream analysis on the corrected data note that the
    +# original unmodified data still resides in the 'RNA' assay
    +
    +# Run the standard workflow for visualization and clustering use integrated.dr here instead of
    +# pca
    +ifnb <- RunUMAP(ifnb, reduction = "integrated.dr", dims = 1:30)
    +ifnb <- FindNeighbors(ifnb, reduction = "integrated.dr", dims = 1:30)
    +ifnb <- FindClusters(ifnb, resolution = 0.5)
    +
    # Visualization
    +p1 <- DimPlot(ifnb, reduction = "umap", group.by = "stim")
    +p2 <- DimPlot(ifnb, reduction = "umap", label = TRUE, repel = TRUE)
    +p1 + p2
    +

    +

    To visualize the two conditions side-by-side, we can use the +split.by argument to show each condition colored by +cluster.

    +
    DimPlot(ifnb, reduction = "umap", split.by = "stim")
    +

    +
    +
    +

    Identify conserved cell type markers

    +

    To identify canonical cell type marker genes that are conserved +across conditions, we provide the FindConservedMarkers() +function. This function performs differential gene expression testing +for each dataset/group and combines the p-values using meta-analysis +methods from the MetaDE R package. For example, we can calculated the +genes that are conserved markers irrespective of stimulation condition +in cluster 6 (NK cells).

    +
    # For performing differential expression after integration, we switch back to the original
    +# data
    +DefaultAssay(ifnb) <- "RNA"
    +# Join Data Layers across stimualtions
    +ifnb[["RNA"]] <- JoinLayers(ifnb[["RNA"]], search = "data", new = "data")
    +nk.markers <- FindConservedMarkers(ifnb, ident.1 = 6, grouping.var = "stim", verbose = FALSE)
    +head(nk.markers)
    +
    ##        CTRL_p_val CTRL_avg_log2FC CTRL_pct.1 CTRL_pct.2 CTRL_p_val_adj
    +## GNLY            0        6.012755      0.946      0.046              0
    +## FGFBP2          0        3.253231      0.503      0.021              0
    +## CLIC3           0        3.480735      0.605      0.024              0
    +## CTSW            0        3.025603      0.541      0.030              0
    +## KLRD1           0        2.803233      0.510      0.019              0
    +## KLRC1           0        2.615312      0.391      0.003              0
    +##           STIM_p_val STIM_avg_log2FC STIM_pct.1 STIM_pct.2 STIM_p_val_adj
    +## GNLY    0.000000e+00        5.792059      0.949      0.061   0.000000e+00
    +## FGFBP2 4.908844e-168        2.190945      0.268      0.015  6.898399e-164
    +## CLIC3   0.000000e+00        3.551895      0.627      0.031   0.000000e+00
    +## CTSW    0.000000e+00        3.162748      0.602      0.035   0.000000e+00
    +## KLRD1   0.000000e+00        2.868744      0.554      0.027   0.000000e+00
    +## KLRC1   0.000000e+00        2.539733      0.379      0.006   0.000000e+00
    +##             max_pval minimump_p_val
    +## GNLY    0.000000e+00              0
    +## FGFBP2 4.908844e-168              0
    +## CLIC3   0.000000e+00              0
    +## CTSW    0.000000e+00              0
    +## KLRD1   0.000000e+00              0
    +## KLRC1   0.000000e+00              0
    +

    We can explore these marker genes for each cluster and use them to +annotate our clusters as specific cell types.

    +
    FeaturePlot(ifnb, features = c("CD3D", "SELL", "CREM", "CD8A", "GNLY", "CD79A", "FCGR3A", "CCL2",
    +    "PPBP"), min.cutoff = "q9")
    +

    +
    ifnb <- RenameIdents(ifnb, `0` = "CD14 Mono", `1` = "CD4 Naive T", `2` = "CD4 Memory T", `3` = "CD16 Mono",
    +    `4` = "B", `5` = "CD8 T", `6` = "NK", `7` = "T activated", `8` = "DC", `9` = "B Activated",
    +    `10` = "Mk", `11` = "pDC", `12` = "Eryth", `13` = "Mono/Mk Doublets", `14` = "HSPC")
    +DimPlot(ifnb, label = TRUE)
    +

    +

    The DotPlot() function with the split.by +parameter can be useful for viewing conserved cell type markers across +conditions, showing both the expression level and the percentage of +cells in a cluster expressing any given gene. Here we plot 2-3 strong +marker genes for each of our 14 clusters.

    +
    Idents(ifnb) <- factor(Idents(ifnb), levels = c("HSPC", "Mono/Mk Doublets", "pDC", "Eryth", "Mk",
    +    "DC", "CD14 Mono", "CD16 Mono", "B Activated", "B", "CD8 T", "NK", "T activated", "CD4 Naive T",
    +    "CD4 Memory T"))
    +markers.to.plot <- c("CD3D", "CREM", "HSPH1", "SELL", "GIMAP5", "CACYBP", "GNLY", "NKG7", "CCL5",
    +    "CD8A", "MS4A1", "CD79A", "MIR155HG", "NME1", "FCGR3A", "VMO1", "CCL2", "S100A9", "HLA-DQA1",
    +    "GPR183", "PPBP", "GNG11", "HBA2", "HBB", "TSPAN13", "IL3RA", "IGJ", "PRSS57")
    +DotPlot(ifnb, features = markers.to.plot, cols = c("blue", "red"), dot.scale = 8, split.by = "stim") +
    +    RotatedAxis()
    +

    +
    library(ggplot2)
    +plot <- DotPlot(ifnb, features = markers.to.plot, cols = c("blue", "red"), dot.scale = 6, split.by = "stim") +
    +    RotatedAxis()
    +# ggsave(filename = '../output/images/pbmc_alignment.jpg', height = 7, width = 12, plot =
    +# plot, quality = 50)
    +
    +

    Identify differential expressed genes across conditions

    +

    Now that we’ve aligned the stimulated and control cells, we can start +to do comparative analyses and look at the differences induced by +stimulation. One way to look broadly at these changes is to plot the +average expression of both the stimulated and control cells and look for +genes that are visual outliers on a scatter plot. Here, we take the +average expression of both the stimulated and control naive T cells and +CD14 monocyte populations and generate the scatter plots, highlighting +genes that exhibit dramatic responses to interferon stimulation.

    +
    library(ggplot2)
    +library(cowplot)
    +theme_set(theme_cowplot())
    +t.cells <- subset(ifnb, idents = "CD4 Naive T")
    +Idents(t.cells) <- "stim"
    +avg.t.cells <- as.data.frame(log1p(AverageExpression(t.cells, verbose = FALSE)$RNA))
    +avg.t.cells$gene <- rownames(avg.t.cells)
    +
    +cd14.mono <- subset(ifnb, idents = "CD14 Mono")
    +Idents(cd14.mono) <- "stim"
    +avg.cd14.mono <- as.data.frame(log1p(AverageExpression(cd14.mono, verbose = FALSE)$RNA))
    +avg.cd14.mono$gene <- rownames(avg.cd14.mono)
    +
    +genes.to.label = c("ISG15", "LY6E", "IFI6", "ISG20", "MX1", "IFIT2", "IFIT1", "CXCL10", "CCL8")
    +p1 <- ggplot(avg.t.cells, aes(CTRL, STIM)) + geom_point() + ggtitle("CD4 Naive T Cells")
    +p1 <- LabelPoints(plot = p1, points = genes.to.label, repel = TRUE)
    +p2 <- ggplot(avg.cd14.mono, aes(CTRL, STIM)) + geom_point() + ggtitle("CD14 Monocytes")
    +p2 <- LabelPoints(plot = p2, points = genes.to.label, repel = TRUE)
    +p1 + p2
    +

    +

    As you can see, many of the same genes are upregulated in both of +these cell types and likely represent a conserved interferon response +pathway.

    +

    Because we are confident in having identified common cell types +across condition, we can ask what genes change in different conditions +for cells of the same type. First, we create a column in the meta.data +slot to hold both the cell type and stimulation information and switch +the current ident to that column. Then we use FindMarkers() +to find the genes that are different between stimulated and control B +cells. Notice that many of the top genes that show up here are the same +as the ones we plotted earlier as core interferon response genes. +Additionally, genes like CXCL10 which we saw were specific to monocyte +and B cell interferon response show up as highly significant in this +list as well.

    +
    ifnb$celltype.stim <- paste(Idents(ifnb), ifnb$stim, sep = "_")
    +ifnb$celltype <- Idents(ifnb)
    +Idents(ifnb) <- "celltype.stim"
    +b.interferon.response <- FindMarkers(ifnb, ident.1 = "B_STIM", ident.2 = "B_CTRL", verbose = FALSE)
    +head(b.interferon.response, n = 15)
    +
    ##                 p_val avg_log2FC pct.1 pct.2     p_val_adj
    +## ISG15   9.059333e-159  4.5698269 0.998 0.238 1.273108e-154
    +## IFIT3   4.791846e-154  4.4791700 0.965 0.051 6.733981e-150
    +## IFI6    7.806881e-152  4.2289881 0.963 0.076 1.097101e-147
    +## ISG20   1.061756e-149  2.9198403 1.000 0.672 1.492086e-145
    +## IFIT1   1.946349e-139  4.1101556 0.908 0.032 2.735204e-135
    +## MX1     1.243713e-123  3.2589732 0.908 0.113 1.747790e-119
    +## LY6E    9.532583e-120  3.1248078 0.896 0.147 1.339614e-115
    +## TNFSF10 2.678440e-112  3.8106081 0.787 0.020 3.764012e-108
    +## IFIT2   1.779710e-109  3.6693485 0.789 0.032 2.501027e-105
    +## B2M      3.573225e-99  0.6249356 1.000 1.000  5.021453e-95
    +## IRF7     3.158083e-95  2.6051975 0.843 0.191  4.438053e-91
    +## PLSCR1   6.649893e-94  2.7815365 0.792 0.118  9.345094e-90
    +## CXCL10   3.138646e-86  5.3307595 0.651 0.010  4.410739e-82
    +## UBE2L6   1.553836e-83  2.1249493 0.857 0.299  2.183606e-79
    +## PSMB9    5.270716e-78  1.6498113 0.937 0.559  7.406937e-74
    +

    Another useful way to visualize these changes in gene expression is +with the split.by option to the FeaturePlot() +or VlnPlot() function. This will display FeaturePlots of +the list of given genes, split by a grouping variable (stimulation +condition here). Genes such as CD3D and GNLY are canonical cell type +markers (for T cells and NK/CD8 T cells) that are virtually unaffected +by interferon stimulation and display similar gene expression patterns +in the control and stimulated group. IFI6 and ISG15, on the other hand, +are core interferon response genes and are upregulated accordingly in +all cell types. Finally, CD14 and CXCL10 are genes that show a cell type +specific interferon response. CD14 expression decreases after +stimulation in CD14 monocytes, which could lead to misclassification in +a supervised analysis framework, underscoring the value of integrated +analysis. CXCL10 shows a distinct upregulation in monocytes and B cells +after interferon stimulation but not in other cell types.

    +
    FeaturePlot(ifnb, features = c("CD3D", "GNLY", "IFI6"), split.by = "stim", max.cutoff = 3, cols = c("grey",
    +    "red"))
    +

    +
    plots <- VlnPlot(ifnb, features = c("LYZ", "ISG15", "CXCL10"), split.by = "stim", group.by = "celltype",
    +    pt.size = 0, combine = FALSE)
    +wrap_plots(plots = plots, ncol = 1)
    +

    +
    # saveRDS(ifnb, file = '../output/ifnb.rds')
    +
    +
    +
    +
    +

    Performing integration on datasets normalized with SCTransform

    +

    In Hafemeister +and Satija, 2019, we introduced an improved method for the +normalization of scRNA-seq, based on regularized negative binomial +regression. The method is named ‘sctransform’, and avoids some of the +pitfalls of standard normalization workflows, including the addition of +a pseudocount, and log-transformation. You can read more about +sctransform in the manuscript +or our SCTransform vignette.

    +

    Below, we demonstrate how to modify the Seurat integration workflow +for datasets that have been normalized with the sctransform workflow. +The commands are largely similar, with a few key differences:

    +
      +
    • Normalize datasets individually by SCTransform(), +instead of NormalizeData() prior to integration
    • +
    • As discussed further in our SCTransform vignette, we typically +use 3,000 or more features for analysis downstream of sctransform.
    • +
    • Run the PrepSCTIntegration() function prior to +identifying anchors
    • +
    • When running FindIntegrationAnchors(), and +IntegrateData(), set the normalization.method +parameter to the value SCT.
    • +
    • When running sctransform-based workflows, including integration, do +not run the ScaleData() function
    • +
    +
    LoadData("ifnb")
    +
    ## Error in slot(object = object, name = s): no slot of name "images" for this object of class "Seurat"
    +
    ifnb <- UpdateSeuratObject(ifnb)
    +ifnb[["RNA"]] <- CreateAssay5Object(ifnb[["RNA"]]@counts)
    +
    ## Error in CreateAssay5Object(ifnb[["RNA"]]@counts): no slot of name "counts" for this object of class "Assay5"
    +
    ifnb[["RNA"]] <- split(ifnb[["RNA"]], f = ifnb$stim)
    +
    ## Error in slot(object = object, name = "layers")[[layer]][features, cells, : invalid or not-yet-implemented 'Matrix' subsetting
    +
    ifnb <- SCTransform(ifnb)
    +ifnb <- RunPCA(ifnb)
    +
    ifnb <- IntegrateLayers(object = ifnb, method = CCAIntegration, normalization.method = "SCT", verbose = F)
    +
    ifnb <- RunUMAP(ifnb, reduction = "integrated.dr", dims = 1:30)
    +
    p1 <- DimPlot(ifnb, reduction = "umap", group.by = "stim")
    +p2 <- DimPlot(ifnb, reduction = "umap", group.by = "seurat_annotations", label = TRUE, repel = TRUE)
    +p1 + p2
    +

    +

    Now that the datasets have been integrated, you can follow the +previous steps in this vignette identify cell types and cell +type-specific responses.

    +
    # write.csv(x = t(as.data.frame(all_times)), file =
    +# '../output/timings/seurat5_integration_introduction.csv')
    +
    + +Session Info + +
    sessionInfo()
    +
    ## R version 4.2.2 Patched (2022-11-10 r83330)
    +## Platform: x86_64-pc-linux-gnu (64-bit)
    +## Running under: Ubuntu 20.04.5 LTS
    +## 
    +## Matrix products: default
    +## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
    +## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
    +## 
    +## locale:
    +##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
    +##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
    +##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
    +##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
    +##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
    +## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
    +## 
    +## attached base packages:
    +## [1] stats     graphics  grDevices utils     datasets  methods   base     
    +## 
    +## other attached packages:
    +##  [1] cowplot_1.1.1           ggplot2_3.4.0           patchwork_1.1.2        
    +##  [4] pbmc3k.SeuratData_3.1.4 panc8.SeuratData_3.0.2  ifnb.SeuratData_3.1.0  
    +##  [7] SeuratData_0.2.2        Seurat_4.9.9.9020       SeuratObject_4.9.9.9053
    +## [10] sp_1.5-1               
    +## 
    +## loaded via a namespace (and not attached):
    +##   [1] spam_2.9-1             sn_2.1.0               plyr_1.8.8            
    +##   [4] igraph_1.3.5           lazyeval_0.2.2         splines_4.2.2         
    +##   [7] RcppHNSW_0.4.1         listenv_0.9.0          scattermore_0.8       
    +##  [10] qqconf_1.3.0           TH.data_1.1-1          digest_0.6.31         
    +##  [13] htmltools_0.5.4        fansi_1.0.3            magrittr_2.0.3        
    +##  [16] tensor_1.5             cluster_2.1.4          ROCR_1.0-11           
    +##  [19] globals_0.16.2         matrixStats_0.63.0     sandwich_3.0-2        
    +##  [22] spatstat.sparse_3.0-0  colorspace_2.0-3       rappdirs_0.3.3        
    +##  [25] ggrepel_0.9.2          rbibutils_2.2.9        xfun_0.36             
    +##  [28] dplyr_1.0.10           crayon_1.5.2           jsonlite_1.8.4        
    +##  [31] progressr_0.13.0       spatstat.data_3.0-0    survival_3.4-0        
    +##  [34] zoo_1.8-11             glue_1.6.2             polyclip_1.10-4       
    +##  [37] gtable_0.3.1           leiden_0.4.3           DelayedArray_0.22.0   
    +##  [40] future.apply_1.10.0    BiocGenerics_0.44.0    abind_1.4-5           
    +##  [43] scales_1.2.1           mvtnorm_1.1-3          DBI_1.1.2             
    +##  [46] spatstat.random_3.0-1  miniUI_0.1.1.1         Rcpp_1.0.9            
    +##  [49] plotrix_3.8-2          metap_1.8              viridisLite_0.4.1     
    +##  [52] xtable_1.8-4           reticulate_1.27        dotCall64_1.0-2       
    +##  [55] stats4_4.2.2           htmlwidgets_1.6.1      httr_1.4.4            
    +##  [58] RColorBrewer_1.1-3     TFisher_0.2.0          ellipsis_0.3.2        
    +##  [61] ica_1.0-3              pkgconfig_2.0.3        farver_2.1.1          
    +##  [64] sass_0.4.4             uwot_0.1.14            deldir_1.0-6          
    +##  [67] utf8_1.2.2             tidyselect_1.2.0       labeling_0.4.2        
    +##  [70] rlang_1.0.6            reshape2_1.4.4         later_1.3.0           
    +##  [73] munsell_0.5.0          tools_4.2.2            cachem_1.0.6          
    +##  [76] cli_3.6.0              generics_0.1.3         mathjaxr_1.6-0        
    +##  [79] ggridges_0.5.4         evaluate_0.19          stringr_1.5.0         
    +##  [82] fastmap_1.1.0          yaml_2.3.6             goftest_1.2-3         
    +##  [85] knitr_1.41             fitdistrplus_1.1-8     purrr_1.0.1           
    +##  [88] RANN_2.6.1             pbapply_1.6-0          future_1.30.0         
    +##  [91] nlme_3.1-161           mime_0.12              formatR_1.12          
    +##  [94] compiler_4.2.2         rstudioapi_0.14        plotly_4.10.1         
    +##  [97] png_0.1-8              spatstat.utils_3.0-1   tibble_3.1.8          
    +## [100] bslib_0.4.2            stringi_1.7.12         highr_0.10            
    +## [103] RSpectra_0.16-1        lattice_0.20-45        Matrix_1.5-1          
    +## [106] multtest_2.52.0        vctrs_0.5.1            mutoss_0.1-12         
    +## [109] pillar_1.8.1           lifecycle_1.0.3        spatstat.geom_3.0-3   
    +## [112] Rdpack_2.4             lmtest_0.9-40          jquerylib_0.1.4       
    +## [115] RcppAnnoy_0.0.20       data.table_1.14.6      irlba_2.3.5.1         
    +## [118] httpuv_1.6.7           R6_2.5.1               promises_1.2.0.1      
    +## [121] KernSmooth_2.23-20     gridExtra_2.3          IRanges_2.32.0        
    +## [124] parallelly_1.34.0      codetools_0.2-18       fastDummies_1.6.3     
    +## [127] MASS_7.3-58            assertthat_0.2.1       withr_2.5.0           
    +## [130] presto_1.0.0           mnormt_2.1.1           sctransform_0.3.5     
    +## [133] S4Vectors_0.36.0       multcomp_1.4-20        parallel_4.2.2        
    +## [136] grid_4.2.2             tidyr_1.2.1            rmarkdown_2.19        
    +## [139] MatrixGenerics_1.8.1   Rtsne_0.16             spatstat.explore_3.0-5
    +## [142] Biobase_2.56.0         numDeriv_2016.8-1.1    shiny_1.7.4
    +
    +
    + + + + +
    + + + + + + + + + + + + + + + diff --git a/vignettes/seurat5_integration_mapping.Rmd b/vignettes/seurat5_integration_mapping.Rmd index c660b0e28..51435c67f 100644 --- a/vignettes/seurat5_integration_mapping.Rmd +++ b/vignettes/seurat5_integration_mapping.Rmd @@ -56,18 +56,17 @@ To construct a reference, we will identify 'anchors' between the individual data data('panc8') panc8 <- UpdateSeuratObject(panc8) panc8[["RNA"]] <- CreateAssay5Object(panc8[["RNA"]]@counts) -pancreas.list <- SplitObject(panc8, split.by = "tech") -pancreas.list <- pancreas.list[c("celseq", "celseq2", "fluidigmc1", "smartseq2")] +# split the dataset into layers by technology +panc8[["RNA"]] <- split(panc8[["RNA"]], f = panc8$tech) +panc8 <- DietSeurat(panc8, layers = c("celseq", "celseq2", "fluidigmc1", "smartseq2")) ``` Prior to finding anchors, we perform standard preprocessing (log-normalization), and identify variable features individually for each. Note that Seurat implements an improved method for variable feature selection based on a variance stabilizing transformation (`"vst"`) ```{r preprocessing3} -for (i in 1:length(pancreas.list)) { - pancreas.list[[i]] <- NormalizeData(pancreas.list[[i]], verbose = FALSE) - pancreas.list[[i]] <- FindVariableFeatures(pancreas.list[[i]], selection.method = "vst", - nfeatures = 2000, verbose = FALSE) -} +panc8 <- NormalizeData(panc8, verbose = FALSE) +panc8 <- FindVariableFeatures(panc8, selection.method = "vst", + nfeatures = 2000, verbose = FALSE) ``` # Integration of 3 pancreatic islet cell datasets @@ -77,8 +76,12 @@ Next, we identify anchors using the `FindIntegrationAnchors()` function, which t * We use all default parameters here for identifying anchors, including the 'dimensionality' of the dataset (30; feel free to try varying this parameter over a broad range, for example between 10 and 50). ```{r integration.anchors, warning = FALSE, message = FALSE} -reference.list <- pancreas.list[c("celseq", "celseq2", "smartseq2")] -pancreas.anchors <- FindIntegrationAnchors(object.list = reference.list, dims = 1:30) +pancreas.ref <- DietSeurat(panc8, layers = c("celseq", "celseq2", "smartseq2")) +pancreas.ref <- as(object = pancreas.ref[['RNA']], Class = 'Assay5') +pancreas.ref <- CreateSeuratObject(pancreas.ref, meta.data = panc8@meta.data) +pancreas.ref <- ScaleData(pancreas.ref) +pancreas.ref <- RunPCA(pancreas.ref) +#pancreas.anchors <- FindIntegrationAnchors(object.list = reference.list, dims = 1:30) ``` We then pass these anchors to the `IntegrateData()` function, which returns a Seurat object. @@ -86,7 +89,9 @@ We then pass these anchors to the `IntegrateData()` function, which returns a Se * The returned object will contain a new `Assay`, which holds an integrated (or 'batch-corrected') expression matrix for all cells, enabling them to be jointly analyzed. ```{r data.integration, warning = FALSE, message = FALSE} -pancreas.integrated <- IntegrateData(anchorset = pancreas.anchors, dims = 1:30) +pancreas.ref <- IntegrateLayers(object = pancreas.ref, + method = CCAIntegration, + verbose = F) ``` After running `IntegrateData()`, the `Seurat` object will contain a new `Assay` with the integrated expression matrix. Note that the original (uncorrected values) are still stored in the object in the "RNA" assay, so you can switch back and forth. @@ -97,25 +102,20 @@ We can then use this new integrated matrix for downstream analysis and visualiza library(ggplot2) library(cowplot) library(patchwork) -#switch to integrated assay. The variable features of this assay are automatically -#set during IntegrateData -DefaultAssay(pancreas.integrated) <- 'integrated' # Run the standard workflow for visualization and clustering -pancreas.integrated <- ScaleData(pancreas.integrated, verbose = FALSE) -pancreas.integrated <- RunPCA(pancreas.integrated, npcs = 30, verbose = FALSE) -pancreas.integrated <- RunUMAP(pancreas.integrated, reduction = "pca", dims = 1:30, +pancreas.ref <- RunUMAP(pancreas.ref, reduction = "integrated.dr", dims = 1:30, verbose = FALSE) -p1 <- DimPlot(pancreas.integrated, reduction = "umap", group.by = "tech") -p2 <- DimPlot(pancreas.integrated, reduction = "umap", group.by = "celltype", +p1 <- DimPlot(pancreas.ref, reduction = "umap", group.by = "tech") +p2 <- DimPlot(pancreas.ref, reduction = "umap", group.by = "celltype", label = TRUE, repel = TRUE) + NoLegend() p1 + p2 ``` ```{r save.img, include=TRUE} -plot <- DimPlot(pancreas.integrated, reduction = "umap", label = TRUE, label.size = 4.5) + xlab("UMAP 1") + ylab("UMAP 2") + +plot <- DimPlot(pancreas.ref, reduction = "umap", label = TRUE, label.size = 4.5) + xlab("UMAP 1") + ylab("UMAP 2") + theme(axis.title = element_text(size = 18), legend.text = element_text(size = 18)) + guides(colour = guide_legend(override.aes = list(size = 10))) -ggsave(filename = "pancreas_integrated_umap.jpg", height = 7, width = 12, plot = plot, quality = 50) +#ggsave(filename = "pancreas_integrated_umap.jpg", height = 7, width = 12, plot = plot, quality = 50) ``` # Cell type classification using an integrated reference @@ -128,9 +128,14 @@ Seurat also supports the projection of reference data (or meta data) onto a quer After finding anchors, we use the `TransferData()` function to classify the query cells based on reference data (a vector of reference cell type labels). `TransferData()` returns a matrix with predicted IDs and prediction scores, which we can add to the query metadata. ```{r label.transfer, warning = FALSE, message = FALSE} -pancreas.query <- pancreas.list[["fluidigmc1"]] -pancreas.anchors <- FindTransferAnchors(reference = pancreas.integrated, query = pancreas.query, dims = 1:30, reference.reduction = "pca") -predictions <- TransferData(anchorset = pancreas.anchors, refdata = pancreas.integrated$celltype, dims = 1:30) +# do we want a different query and reference object or just have different layers?? +pancreas.query <- DietSeurat(panc8, layers = "fluidigmc1", assays = "RNA", ) +pancreas.query <- as(object = pancreas.query[['RNA']], Class = 'Assay5') +pancreas.query <- CreateSeuratObject(pancreas.query, meta.data = panc8@meta.data) + +pancreas.anchors <- FindTransferAnchors(reference = pancreas.ref, query = pancreas.query, dims = 1:30, reference.reduction = "integrated.dr", k.filter = NA) + +predictions <- TransferData(anchorset = pancreas.anchors, refdata = pancreas.ref$celltype, dims = 1:30) pancreas.query <- AddMetaData(pancreas.query, metadata = predictions) ``` @@ -153,13 +158,13 @@ VlnPlot(pancreas.query, c("REG1A", "PPY", "SST", "GHRL", "VWF", "SOX10"), group. In Seurat v4, we also enable projection of a query onto the reference UMAP structure. This can be achieved by computing the reference UMAP model and then calling `MapQuery()` instead of `TransferData()`. ```{r label.transfer.v4, warning = FALSE, message = FALSE} -pancreas.integrated <- RunUMAP(pancreas.integrated, dims = 1:30, reduction = "pca", return.model = TRUE) +pancreas.ref <- RunUMAP(pancreas.ref, dims = 1:30, reduction = "integrated.dr", return.model = TRUE) pancreas.query <- MapQuery( anchorset = pancreas.anchors, - reference = pancreas.integrated, + reference = pancreas.ref, query = pancreas.query, refdata = list(celltype = 'celltype'), - reference.reduction = 'pca', + reference.reduction = 'integrated.dr', reduction.model = 'umap' ) ``` @@ -172,20 +177,20 @@ pancreas.query <- MapQuery( ```{r, eval=FALSE} pancreas.query <- TransferData( anchorset = pancreas.anchors, - reference = pancreas.integrated, + reference = panc8, query = pancreas.query, refdata = list(celltype = "celltype") ) pancreas.query <- IntegrateEmbeddings( anchorset = pancreas.anchors, - reference = pancreas.integrated, + reference = panc8, query = pancreas.query, new.reduction.name = "ref.pca" ) pancreas.query <- ProjectUMAP( query = pancreas.query, query.reduction = "ref.pca", - reference = pancreas.integrated, + reference = panc8, reference.reduction = "pca", reduction.model = "umap" ) @@ -195,7 +200,7 @@ pancreas.query <- ProjectUMAP( We can now visualize the query cells alongside our reference. ```{r panc.refdimplots, fig.width=10} -p1 <- DimPlot(pancreas.integrated, reduction = "umap", group.by = "celltype", label = TRUE, +p1 <- DimPlot(pancreas.ref, reduction = "umap", group.by = "celltype", label = TRUE, label.size = 3 ,repel = TRUE) + NoLegend() + ggtitle("Reference annotations") p2 <- DimPlot(pancreas.query, reduction = "ref.umap", group.by = "predicted.celltype", label = TRUE, label.size = 3 ,repel = TRUE) + NoLegend() + ggtitle("Query transferred labels") @@ -203,7 +208,7 @@ p1 + p2 ``` ```{r save.times, include=TRUE} -write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/seurat5_integration_reference_mapping.csv") +#write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/seurat5_integration_reference_mapping.csv") ```
    diff --git a/vignettes/seurat5_integration_mapping.html b/vignettes/seurat5_integration_mapping.html new file mode 100644 index 000000000..f056bb681 --- /dev/null +++ b/vignettes/seurat5_integration_mapping.html @@ -0,0 +1,703 @@ + + + + + + + + + + + + + +Mapping and annotating query datasets + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + +
    +
    all_times <- list()  # store the time for each chunk
    +knitr::knit_hooks$set(time_it = local({
    +  now <- NULL
    +  function(before, options) {
    +    if (before) {
    +      now <<- Sys.time()
    +    } else {
    +      res <- difftime(Sys.time(), now, units = "secs")
    +      all_times[[options$label]] <<- res
    +    }
    +  }
    +}))
    +knitr::opts_chunk$set(
    +  tidy = TRUE,
    +  tidy.opts = list(width.cutoff = 95),
    +  warning = FALSE,
    +  error = TRUE,
    +  message = FALSE,
    +  fig.width = 8,
    +  time_it = TRUE
    +)
    +
    +

    Introduction to single-cell reference mapping

    +

    In this vignette, we first build an integrated reference and then +demonstrate how to leverage this reference to annotate new query +datasets. Generating an integrated reference follows the same workflow +described in more detail in the integration introduction vignette. Once generated, this +reference can be used to analyze additional query datasets through tasks +like cell type label transfer and projecting query cells onto reference +UMAPs. Notably, this does not require correction of the underlying raw +query data and can therefore be an efficient strategy if a high quality +reference is available.

    +
    +
    +

    Dataset preprocessing

    +

    For the purposes of this example, we’ve chosen human pancreatic islet +cell datasets produced across four technologies, CelSeq (GSE81076) +CelSeq2 (GSE85241), Fluidigm C1 (GSE86469), and SMART-Seq2 +(E-MTAB-5061). For convenience, we distribute this dataset through our +SeuratData +package. The metadata contains the technology (tech column) +and cell type annotations (celltype column) for each cell +in the four datasets.

    +
    library(Seurat)
    +options(Seurat.object.assay.version = "v5")
    +library(SeuratData)
    +
    InstallData("panc8")
    +

    To construct a reference, we will identify ‘anchors’ between the +individual datasets. First, we split the combined object into a list, +with each dataset as an element (this is only necessary because the data +was bundled together for easy distribution).

    +
    data("panc8")
    +panc8 <- UpdateSeuratObject(panc8)
    +panc8[["RNA"]] <- CreateAssay5Object(panc8[["RNA"]]@counts)
    +# split the dataset into layers by technology
    +panc8[["RNA"]] <- split(panc8[["RNA"]], f = panc8$tech)
    +panc8 <- DietSeurat(panc8, layers = c("celseq", "celseq2", "fluidigmc1", "smartseq2"))
    +

    Prior to finding anchors, we perform standard preprocessing +(log-normalization), and identify variable features individually for +each. Note that Seurat implements an improved method for variable +feature selection based on a variance stabilizing transformation +("vst")

    +
    panc8 <- NormalizeData(panc8, verbose = FALSE)
    +panc8 <- FindVariableFeatures(panc8, selection.method = "vst", nfeatures = 2000, verbose = FALSE)
    +features <- VariableFeatures(panc8)
    +
    +
    +

    Integration of 3 pancreatic islet cell datasets

    +

    Next, we identify anchors using the +FindIntegrationAnchors() function, which takes a list of +Seurat objects as input. Here, we integrate three of the objects into a +reference (we will use the fourth later in this vignette as a query +dataset to demonstrate mapping).

    +
      +
    • We use all default parameters here for identifying anchors, +including the ‘dimensionality’ of the dataset (30; feel free to try +varying this parameter over a broad range, for example between 10 and +50).
    • +
    +
    pancreas.ref <- DietSeurat(panc8, layers = c("celseq", "celseq2", "smartseq2"))
    +pancreas.ref <- as(object = pancreas.ref[["RNA"]], Class = "Assay5")
    +pancreas.ref <- CreateSeuratObject(pancreas.ref, meta.data = panc8@meta.data)
    +pancreas.ref <- ScaleData(pancreas.ref)
    +pancreas.ref <- RunPCA(pancreas.ref)
    +# pancreas.anchors <- FindIntegrationAnchors(object.list = reference.list, dims = 1:30)
    +

    We then pass these anchors to the IntegrateData() +function, which returns a Seurat object.

    +
      +
    • The returned object will contain a new Assay, which +holds an integrated (or ‘batch-corrected’) expression matrix for all +cells, enabling them to be jointly analyzed.
    • +
    +
    pancreas.ref <- IntegrateLayers(object = pancreas.ref, method = CCAIntegration, features = features,
    +    verbose = F)
    +

    After running IntegrateData(), the Seurat +object will contain a new Assay with the integrated +expression matrix. Note that the original (uncorrected values) are still +stored in the object in the “RNA” assay, so you can switch back and +forth.

    +

    We can then use this new integrated matrix for downstream analysis +and visualization. Here we scale the integrated data, run PCA, and +visualize the results with UMAP. The integrated datasets cluster by cell +type, instead of by technology.

    +
    library(ggplot2)
    +library(cowplot)
    +library(patchwork)
    +# Run the standard workflow for visualization and clustering
    +pancreas.ref <- RunUMAP(pancreas.ref, reduction = "integrated.dr", dims = 1:30, verbose = FALSE)
    +p1 <- DimPlot(pancreas.ref, reduction = "umap", group.by = "tech")
    +p2 <- DimPlot(pancreas.ref, reduction = "umap", group.by = "celltype", label = TRUE, repel = TRUE) +
    +    NoLegend()
    +p1 + p2
    +

    +
    plot <- DimPlot(pancreas.ref, reduction = "umap", label = TRUE, label.size = 4.5) + xlab("UMAP 1") +
    +    ylab("UMAP 2") + theme(axis.title = element_text(size = 18), legend.text = element_text(size = 18)) +
    +    guides(colour = guide_legend(override.aes = list(size = 10)))
    +# ggsave(filename = 'pancreas_integrated_umap.jpg', height = 7, width = 12, plot = plot,
    +# quality = 50)
    +
    +
    +

    Cell type classification using an integrated reference

    +

    Seurat also supports the projection of reference data (or meta data) +onto a query object. While many of the methods are conserved (both +procedures begin by identifying anchors), there are two important +distinctions between data transfer and integration:

    +
      +
    1. In data transfer, Seurat does not correct or modify the query +expression data.
    2. +
    3. In data transfer, Seurat has an option (set by default) to project +the PCA structure of a reference onto the query, instead of learning a +joint structure with CCA. We generally suggest using this option when +projecting data between scRNA-seq datasets.
    4. +
    +

    After finding anchors, we use the TransferData() +function to classify the query cells based on reference data (a vector +of reference cell type labels). TransferData() returns a +matrix with predicted IDs and prediction scores, which we can add to the +query metadata.

    +
    # do we want a different query and reference object or just have different layers??
    +pancreas.query <- DietSeurat(panc8, layers = "fluidigmc1", assays = "RNA", )
    +pancreas.query <- as(object = pancreas.query[["RNA"]], Class = "Assay5")
    +pancreas.query <- CreateSeuratObject(pancreas.query, meta.data = panc8@meta.data)
    +# system.time(pancreas.ref[['RNA']] <- JoinLayers(pancreas.ref[['RNA']], search = 'data', new
    +# = 'data')) # Error in Find Anchors, have to join layers system.time(pancreas.query[['RNA']]
    +# <- JoinLayers(pancreas.query[['RNA']], search = 'data', new = 'data')) # Error in Find
    +# Anchors, have to rename layer
    +
    +# for now i'm specifying RNA assay
    +pancreas.anchors <- FindTransferAnchors(reference = pancreas.ref, query = pancreas.query, dims = 1:30,
    +    reference.reduction = "integrated.dr", k.filter = NA)
    +
    +
    +predictions <- TransferData(anchorset = pancreas.anchors, refdata = pancreas.ref$celltype, dims = 1:30)
    +pancreas.query <- AddMetaData(pancreas.query, metadata = predictions)
    +

    Because we have the original label annotations from our full +integrated analysis, we can evaluate how well our predicted cell type +annotations match the full reference. In this example, we find that +there is a high agreement in cell type classification, with over 96% of +cells being labeled correctly.

    +
    pancreas.query$prediction.match <- pancreas.query$predicted.id == pancreas.query$celltype
    +table(pancreas.query$prediction.match)
    +
    ## 
    +## FALSE  TRUE 
    +##    26   612
    +

    To verify this further, we can examine some canonical cell type +markers for specific pancreatic islet cell populations. Note that even +though some of these cell types are only represented by one or two cells +(e.g. epsilon cells), we are still able to classify them correctly.

    +
    table(pancreas.query$predicted.id)
    +
    ## 
    +##             acinar activated_stellate              alpha               beta 
    +##                 22                 17                254                261 
    +##              delta             ductal        endothelial              gamma 
    +##                 22                 31                  9                 17 
    +##         macrophage            schwann 
    +##                  2                  3
    +
    VlnPlot(pancreas.query, c("REG1A", "PPY", "SST", "GHRL", "VWF", "SOX10"), group.by = "predicted.id")
    +

    +
    +
    +

    Unimodal UMAP Projection

    +

    In Seurat v4, we also enable projection of a query onto the reference +UMAP structure. This can be achieved by computing the reference UMAP +model and then calling MapQuery() instead of +TransferData().

    +
    pancreas.ref <- RunUMAP(pancreas.ref, dims = 1:30, reduction = "integrated.dr", return.model = TRUE)
    +pancreas.query <- MapQuery(anchorset = pancreas.anchors, reference = pancreas.ref, query = pancreas.query,
    +    refdata = list(celltype = "celltype"), reference.reduction = "integrated.dr", reduction.model = "umap")
    +
    + +What is MapQuery doing? + +

    MapQuery() is a wrapper around three functions: +TransferData(), IntegrateEmbeddings(), and +ProjectUMAP(). TransferData() is used to +transfer cell type labels and impute the ADT values; +IntegrateEmbeddings() is used to integrate reference with +query by correcting the query’s projected low-dimensional embeddings; +and finally ProjectUMAP() is used to project the query data +onto the UMAP structure of the reference. The equivalent code for doing +this with the intermediate functions is below:

    +
    pancreas.query <- TransferData(anchorset = pancreas.anchors, reference = panc8, query = pancreas.query,
    +    refdata = list(celltype = "celltype"))
    +pancreas.query <- IntegrateEmbeddings(anchorset = pancreas.anchors, reference = panc8, query = pancreas.query,
    +    new.reduction.name = "ref.pca")
    +pancreas.query <- ProjectUMAP(query = pancreas.query, query.reduction = "ref.pca", reference = panc8,
    +    reference.reduction = "pca", reduction.model = "umap")
    +
    +

    We can now visualize the query cells alongside our reference.

    +
    p1 <- DimPlot(pancreas.ref, reduction = "umap", group.by = "celltype", label = TRUE, label.size = 3,
    +    repel = TRUE) + NoLegend() + ggtitle("Reference annotations")
    +p2 <- DimPlot(pancreas.query, reduction = "ref.umap", group.by = "predicted.celltype", label = TRUE,
    +    label.size = 3, repel = TRUE) + NoLegend() + ggtitle("Query transferred labels")
    +p1 + p2
    +

    +
    # write.csv(x = t(as.data.frame(all_times)), file =
    +# '../output/timings/seurat5_integration_reference_mapping.csv')
    +
    + +Session Info + +
    sessionInfo()
    +
    ## R version 4.2.2 Patched (2022-11-10 r83330)
    +## Platform: x86_64-pc-linux-gnu (64-bit)
    +## Running under: Ubuntu 20.04.5 LTS
    +## 
    +## Matrix products: default
    +## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
    +## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
    +## 
    +## locale:
    +##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
    +##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
    +##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
    +##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
    +##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
    +## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
    +## 
    +## attached base packages:
    +## [1] stats     graphics  grDevices utils     datasets  methods   base     
    +## 
    +## other attached packages:
    +##  [1] patchwork_1.1.2                cowplot_1.1.1                 
    +##  [3] ggplot2_3.4.0                  pbmc3k.SeuratData_3.1.4       
    +##  [5] panc8.SeuratData_3.0.2         ifnb.SeuratData_3.1.0         
    +##  [7] bonemarrowref.SeuratData_1.0.0 SeuratData_0.2.2              
    +##  [9] Seurat_4.9.9.9020              SeuratObject_4.9.9.9053       
    +## [11] sp_1.5-1                      
    +## 
    +## loaded via a namespace (and not attached):
    +##   [1] Rtsne_0.16             colorspace_2.0-3       deldir_1.0-6          
    +##   [4] ellipsis_0.3.2         ggridges_0.5.4         RcppHNSW_0.4.1        
    +##   [7] spatstat.data_3.0-0    rstudioapi_0.14        farver_2.1.1          
    +##  [10] leiden_0.4.3           listenv_0.9.0          ggrepel_0.9.2         
    +##  [13] RSpectra_0.16-1        fansi_1.0.3            codetools_0.2-18      
    +##  [16] splines_4.2.2          cachem_1.0.6           knitr_1.41            
    +##  [19] polyclip_1.10-4        spam_2.9-1             jsonlite_1.8.4        
    +##  [22] ica_1.0-3              cluster_2.1.4          png_0.1-8             
    +##  [25] uwot_0.1.14            spatstat.sparse_3.0-0  shiny_1.7.4           
    +##  [28] sctransform_0.3.5      compiler_4.2.2         httr_1.4.4            
    +##  [31] assertthat_0.2.1       Matrix_1.5-1           fastmap_1.1.0         
    +##  [34] lazyeval_0.2.2         cli_3.6.0              later_1.3.0           
    +##  [37] formatR_1.12           htmltools_0.5.4        tools_4.2.2           
    +##  [40] igraph_1.3.5           dotCall64_1.0-2        gtable_0.3.1          
    +##  [43] glue_1.6.2             RANN_2.6.1             reshape2_1.4.4        
    +##  [46] dplyr_1.0.10           rappdirs_0.3.3         Rcpp_1.0.9            
    +##  [49] scattermore_0.8        jquerylib_0.1.4        vctrs_0.5.1           
    +##  [52] nlme_3.1-161           spatstat.explore_3.0-5 progressr_0.13.0      
    +##  [55] lmtest_0.9-40          spatstat.random_3.0-1  xfun_0.36             
    +##  [58] stringr_1.5.0          globals_0.16.2         mime_0.12             
    +##  [61] miniUI_0.1.1.1         lifecycle_1.0.3        irlba_2.3.5.1         
    +##  [64] goftest_1.2-3          future_1.30.0          MASS_7.3-58           
    +##  [67] zoo_1.8-11             scales_1.2.1           spatstat.utils_3.0-1  
    +##  [70] promises_1.2.0.1       parallel_4.2.2         RColorBrewer_1.1-3    
    +##  [73] yaml_2.3.6             gridExtra_2.3          reticulate_1.27       
    +##  [76] pbapply_1.6-0          sass_0.4.4             stringi_1.7.12        
    +##  [79] highr_0.10             fastDummies_1.6.3      rlang_1.0.6           
    +##  [82] pkgconfig_2.0.3        matrixStats_0.63.0     evaluate_0.19         
    +##  [85] lattice_0.20-45        tensor_1.5             ROCR_1.0-11           
    +##  [88] purrr_1.0.1            labeling_0.4.2         htmlwidgets_1.6.1     
    +##  [91] tidyselect_1.2.0       parallelly_1.34.0      RcppAnnoy_0.0.20      
    +##  [94] plyr_1.8.8             magrittr_2.0.3         R6_2.5.1              
    +##  [97] generics_0.1.3         DBI_1.1.2              withr_2.5.0           
    +## [100] pillar_1.8.1           fitdistrplus_1.1-8     abind_1.4-5           
    +## [103] survival_3.4-0         tibble_3.1.8           future.apply_1.10.0   
    +## [106] crayon_1.5.2           KernSmooth_2.23-20     utf8_1.2.2            
    +## [109] spatstat.geom_3.0-3    plotly_4.10.1          rmarkdown_2.19        
    +## [112] grid_4.2.2             data.table_1.14.6      digest_0.6.31         
    +## [115] xtable_1.8-4           tidyr_1.2.1            httpuv_1.6.7          
    +## [118] munsell_0.5.0          viridisLite_0.4.1      bslib_0.4.2
    +
    +
    + + + + +
    + + + + + + + + + + + + + + + diff --git a/vignettes/seurat5_integration_rpca.Rmd b/vignettes/seurat5_integration_rpca.Rmd index d78236ddd..72c7223a9 100644 --- a/vignettes/seurat5_integration_rpca.Rmd +++ b/vignettes/seurat5_integration_rpca.Rmd @@ -55,38 +55,40 @@ InstallData('ifnb') ```{r init, results='hide', message=FALSE, fig.keep='none'} # load dataset -LoadData('ifnb') +data('ifnb') # what if I do data here instead ifnb <- UpdateSeuratObject(ifnb) +ifnb[["RNA"]] <- CreateAssay5Object(ifnb[["RNA"]]@counts) -# split the dataset into a list of two seurat objects (stim and CTRL) -ifnb.list <- SplitObject(ifnb, split.by = "stim") +# split the dataset into layers (stim and CTRL) +ifnb[["RNA"]] <- split(ifnb[["RNA"]], f = ifnb$stim) # normalize and identify variable features for each dataset independently -ifnb.list <- lapply(X = ifnb.list, FUN = function(x) { - x <- NormalizeData(x) - x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000) -}) +ifnb <- NormalizeData(ifnb) # select features that are repeatedly variable across datasets for integration # run PCA on each dataset using these features -features <- SelectIntegrationFeatures(object.list = ifnb.list) -ifnb.list <- lapply(X = ifnb.list, FUN = function(x) { - x <- ScaleData(x, features = features, verbose = FALSE) - x <- RunPCA(x, features = features, verbose = FALSE) -}) + +ifnb <- FindVariableFeatures(ifnb, selection.method = "vst", nfeatures = 2000) +#features <- SelectIntegrationFeatures(ifnb.list) # this is in the vignette +features <- VariableFeatures(ifnb) + +ifnb <- ScaleData(ifnb, features = features, verbose = FALSE) +ifnb <- RunPCA(ifnb, features = features, verbose = FALSE) +ifnb + + + ``` # Perform integration We then identify anchors using the `FindIntegrationAnchors()` function, which takes a list of Seurat objects as input, and use these anchors to integrate the two datasets together with `IntegrateData()`. -```{r find.anchors} -immune.anchors <- FindIntegrationAnchors(object.list = ifnb.list, anchor.features = features,reduction = 'rpca') -``` - ```{r integrate.data} -# this command creates an 'integrated' data assay -immune.combined <- IntegrateData(anchorset = immune.anchors) +ifnb <- IntegrateLayers(object = ifnb, + method = RPCAIntegration, + features = features, + verbose = F) ``` Now we can run a single integrated analysis on all cells! @@ -94,20 +96,17 @@ Now we can run a single integrated analysis on all cells! ```{r clustering, results='hide', message=FALSE} # specify that we will perform downstream analysis on the corrected data # note that the original unmodified data still resides in the 'RNA' assay -DefaultAssay(immune.combined) <- "integrated" # Run the standard workflow for visualization and clustering -immune.combined <- ScaleData(immune.combined, verbose = FALSE) -immune.combined <- RunPCA(immune.combined, npcs = 30, verbose = FALSE) -immune.combined <- RunUMAP(immune.combined, reduction = "pca", dims = 1:30) -immune.combined <- FindNeighbors(immune.combined, reduction = "pca", dims = 1:30) -immune.combined <- FindClusters(immune.combined, resolution = 0.5) +ifnb <- RunUMAP(ifnb, reduction = "integrated.dr", dims = 1:30) +ifnb <- FindNeighbors(ifnb, reduction = "integrated.dr", dims = 1:30) +ifnb <- FindClusters(ifnb, resolution = 0.5) ``` ```{r viz, results='hide', message=FALSE} # Visualization -p1 <- DimPlot(immune.combined, reduction = "umap", group.by = "stim") -p2 <- DimPlot(immune.combined, reduction = "umap", group.by = 'seurat_annotations',label = TRUE, repel = TRUE) +p1 <- DimPlot(ifnb, reduction = "umap", group.by = "stim") +p2 <- DimPlot(ifnb, reduction = "umap", group.by = 'seurat_annotations',label = TRUE, repel = TRUE) p1 + p2 ``` @@ -116,26 +115,30 @@ p1 + p2 The results show that rpca-based integration is more conservative, and in this case, do not perfectly align a subset of cells (which are naive and memory T cells) across experiments. You can increase the strength of alignment by increasing the `k.anchor` parameter, which is set to 5 by default. Increasing this parameter to 20 will assist in aligning these populations. ```{r split.dim} -immune.anchors <- FindIntegrationAnchors(object.list = ifnb.list, anchor.features = features,reduction = 'rpca', k.anchor = 20) -immune.combined <- IntegrateData(anchorset = immune.anchors) +#immune.anchors <- FindIntegrationAnchors(object.list = ifnb.list, anchor.features = features,reduction = 'rpca', k.anchor = 20) +#ifnb <- IntegrateData(anchorset = immune.anchors) -immune.combined <- ScaleData(immune.combined, verbose = FALSE) -immune.combined <- RunPCA(immune.combined, npcs = 30, verbose = FALSE) -immune.combined <- RunUMAP(immune.combined, reduction = "pca", dims = 1:30) -immune.combined <- FindNeighbors(immune.combined, reduction = "pca", dims = 1:30) -immune.combined <- FindClusters(immune.combined, resolution = 0.5) +ifnb <- IntegrateLayers(object = ifnb, + k.anchor = 20, + method = RPCAIntegration, + features = features, + verbose = F) + +ifnb <- RunUMAP(ifnb, reduction = "integrated.dr", dims = 1:30) +ifnb <- FindNeighbors(ifnb, reduction = "integrated.dr", dims = 1:30) +ifnb <- FindClusters(ifnb, resolution = 0.5) ``` ```{r viz2, results='hide', message=FALSE} # Visualization -p1 <- DimPlot(immune.combined, reduction = "umap", group.by = "stim") -p2 <- DimPlot(immune.combined, reduction = "umap", label = TRUE, repel = TRUE) +p1 <- DimPlot(ifnb, reduction = "umap", group.by = "stim") +p2 <- DimPlot(ifnb, reduction = "umap", group.by = 'seurat_annotations', label = TRUE, repel = TRUE) p1 + p2 ``` ```{r save.img, include=TRUE} library(ggplot2) -plot <- DimPlot(immune.combined, group.by = "stim") + +plot <- DimPlot(ifnb, group.by = "stim") + xlab("UMAP 1") + ylab("UMAP 2") + theme(axis.title = element_text(size = 18), legend.text = element_text(size = 18)) + guides(colour = guide_legend(override.aes = list(size = 10))) @@ -149,34 +152,38 @@ Now that the datasets have been integrated, you can follow the previous steps in As an additional example, we repeat the analyses performed above, but normalize the datasets using [SCTransform](sctransform_vignette.html). We may choose to set the `method` parameter to `glmGamPoi` (install [here](https://bioconductor.org/packages/release/bioc/html/glmGamPoi.html)) in order to enable faster estimation of regression parameters in `SCTransform()`. ```{r panc8.cca.sct.init, results='hide', message=FALSE, fig.keep='none'} -LoadData('ifnb') +data('ifnb') ifnb <- UpdateSeuratObject(ifnb) -ifnb.list <- SplitObject(ifnb, split.by = "stim") -ifnb.list <- lapply(X = ifnb.list, FUN = SCTransform, method = "glmGamPoi") -features <- SelectIntegrationFeatures(object.list = ifnb.list, nfeatures = 3000) -ifnb.list <- PrepSCTIntegration(object.list = ifnb.list, anchor.features = features) -ifnb.list <- lapply(X = ifnb.list, FUN = RunPCA, features = features) +ifnb[["RNA"]] <- CreateAssay5Object(ifnb[["RNA"]]@counts) +ifnb[["RNA"]] <- split(ifnb[["RNA"]], f = ifnb$stim) + +ifnb <- SCTransform(ifnb, method = "glmGamPoi") +features <- VariableFeatures(ifnb) +ifnb <- RunPCA(ifnb, features = features) ``` ```{r ifnb.cca.sct.anchors} -immune.anchors <- FindIntegrationAnchors(object.list = ifnb.list, normalization.method = 'SCT', anchor.features = features, dims = 1:30, reduction = 'rpca', k.anchor = 20) -immune.combined.sct <- IntegrateData(anchorset = immune.anchors, normalization.method = 'SCT', dims = 1:30) +ifnb <- IntegrateLayers(object = ifnb, + method = RPCAIntegration, + normalization.method = "SCT", + features = features, + k.anchor = 20, + verbose = F) ``` ```{r ifnb.cca.sct.clustering, results='hide', message=FALSE} -immune.combined.sct <- RunPCA(immune.combined.sct, verbose = FALSE) -immune.combined.sct <- RunUMAP(immune.combined.sct, reduction = "pca", dims = 1:30) +ifnb <- RunUMAP(ifnb, reduction = "integrated.dr", dims = 1:30) ``` ```{r immunesca.cca.sct.split.dims} # Visualization -p1 <- DimPlot(immune.combined.sct, reduction = "umap", group.by = "stim") -p2 <- DimPlot(immune.combined.sct, reduction = "umap", group.by = 'seurat_annotations',label = TRUE, repel = TRUE) +p1 <- DimPlot(ifnb, reduction = "umap", group.by = "stim") +p2 <- DimPlot(ifnb, reduction = "umap", group.by = 'seurat_annotations',label = TRUE, repel = TRUE) p1 + p2 ``` ```{r save.times, include=TRUE} -write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/seurat5_integration_rpca.csv") +#write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/seurat5_integration_rpca.csv") ```
    From c0137e556217cf45c5f4b38eb14df7568cbb4974 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Thu, 19 Jan 2023 12:54:30 -0500 Subject: [PATCH 377/979] removing htmls --- .../seurat5_integration_introduction.html | 809 ------------------ vignettes/seurat5_integration_mapping.html | 703 --------------- 2 files changed, 1512 deletions(-) delete mode 100644 vignettes/seurat5_integration_introduction.html delete mode 100644 vignettes/seurat5_integration_mapping.html diff --git a/vignettes/seurat5_integration_introduction.html b/vignettes/seurat5_integration_introduction.html deleted file mode 100644 index 41b05d1ca..000000000 --- a/vignettes/seurat5_integration_introduction.html +++ /dev/null @@ -1,809 +0,0 @@ - - - - - - - - - - - - - -Introduction to scRNA-seq integration - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    - - - - - - - -
    all_times <- list()  # store the time for each chunk
    -knitr::knit_hooks$set(time_it = local({
    -  now <- NULL
    -  function(before, options) {
    -    if (before) {
    -      now <<- Sys.time()
    -    } else {
    -      res <- difftime(Sys.time(), now, units = "secs")
    -      all_times[[options$label]] <<- res
    -    }
    -  }
    -}))
    -knitr::opts_chunk$set(
    -  tidy = TRUE,
    -  tidy.opts = list(width.cutoff = 95),
    -  fig.width = 10,
    -  message = FALSE,
    -  warning = FALSE,
    -  time_it = TRUE,
    -  error = TRUE
    -)
    -
    -

    Introduction to scRNA-seq integration

    -

    The joint analysis of two or more single-cell datasets poses unique -challenges. In particular, identifying cell populations that are present -across multiple datasets can be problematic under standard workflows. -Seurat v4 includes a set of methods to match (or ‘align’) shared cell -populations across datasets. These methods first identify cross-dataset -pairs of cells that are in a matched biological state (‘anchors’), can -be used both to correct for technical differences between datasets -(i.e. batch effect correction), and to perform comparative scRNA-seq -analysis of across experimental conditions.

    -

    Below, we demonstrate methods for scRNA-seq integration as described -in Stuart*, -Butler* et al, 2019 to perform a comparative analysis of human -immune cells (PBMC) in either a resting or -interferon-stimulated state.

    -
    -

    Integration goals

    -

    The following tutorial is designed to give you an overview of the -kinds of comparative analyses on complex cell types that are possible -using the Seurat integration procedure. Here, we address a few key -goals:

    -
      -
    • Create an ‘integrated’ data assay for downstream analysis
    • -
    • Identify cell types that are present in both datasets
    • -
    • Obtain cell type markers that are conserved in both control and -stimulated cells
    • -
    • Compare the datasets to find cell-type specific responses to -stimulation
    • -
    -
    -
    -

    Setup the Seurat objects

    -

    For convenience, we distribute this dataset through our SeuratData -package.

    -
    options(SeuratData.repo.use = "http://satijalab04.nygenome.org")
    -
    library(Seurat)
    -options(Seurat.object.assay.version = "v5")
    -library(SeuratData)
    -library(patchwork)
    -
    # install dataset
    -InstallData("ifnb")
    -
    # load dataset
    -LoadData("ifnb")
    -
    ## Error in slot(object = object, name = s): no slot of name "images" for this object of class "Seurat"
    -
    ifnb <- UpdateSeuratObject(ifnb)
    -ifnb[["RNA"]] <- CreateAssay5Object(ifnb[["RNA"]]@counts)
    -
    -# split the dataset into layers (stim and CTRL)
    -ifnb[["RNA"]] <- split(ifnb[["RNA"]], f = ifnb$stim)
    -
    -# normalize and identify variable features for each dataset independently
    -ifnb <- NormalizeData(ifnb)
    -ifnb <- FindVariableFeatures(ifnb, selection.method = "vst", nfeatures = 2000)
    -features <- VariableFeatures(ifnb)
    -
    -# these two now are run before
    -ifnb <- ScaleData(ifnb)
    -ifnb <- RunPCA(ifnb)
    -# # select features that are repeatedly variable across datasets for integration features <-
    -# SelectIntegrationFeatures(object.list = ifnb.list)
    -ifnb
    -
    -
    -

    Perform integration

    -

    We then identify anchors using the -FindIntegrationAnchors() function (not any more), which -takes a list of Seurat objects as input, and use these anchors to -integrate the two layers together with -IntegrateLayers().

    -
    ifnb <- IntegrateLayers(object = ifnb, method = CCAIntegration, features = features, verbose = F)
    -
    -
    -

    Perform an integrated analysis

    -

    Now we can run a single integrated analysis on all cells!

    -
    # specify that we will perform downstream analysis on the corrected data note that the
    -# original unmodified data still resides in the 'RNA' assay
    -
    -# Run the standard workflow for visualization and clustering use integrated.dr here instead of
    -# pca
    -ifnb <- RunUMAP(ifnb, reduction = "integrated.dr", dims = 1:30)
    -ifnb <- FindNeighbors(ifnb, reduction = "integrated.dr", dims = 1:30)
    -ifnb <- FindClusters(ifnb, resolution = 0.5)
    -
    # Visualization
    -p1 <- DimPlot(ifnb, reduction = "umap", group.by = "stim")
    -p2 <- DimPlot(ifnb, reduction = "umap", label = TRUE, repel = TRUE)
    -p1 + p2
    -

    -

    To visualize the two conditions side-by-side, we can use the -split.by argument to show each condition colored by -cluster.

    -
    DimPlot(ifnb, reduction = "umap", split.by = "stim")
    -

    -
    -
    -

    Identify conserved cell type markers

    -

    To identify canonical cell type marker genes that are conserved -across conditions, we provide the FindConservedMarkers() -function. This function performs differential gene expression testing -for each dataset/group and combines the p-values using meta-analysis -methods from the MetaDE R package. For example, we can calculated the -genes that are conserved markers irrespective of stimulation condition -in cluster 6 (NK cells).

    -
    # For performing differential expression after integration, we switch back to the original
    -# data
    -DefaultAssay(ifnb) <- "RNA"
    -# Join Data Layers across stimualtions
    -ifnb[["RNA"]] <- JoinLayers(ifnb[["RNA"]], search = "data", new = "data")
    -nk.markers <- FindConservedMarkers(ifnb, ident.1 = 6, grouping.var = "stim", verbose = FALSE)
    -head(nk.markers)
    -
    ##        CTRL_p_val CTRL_avg_log2FC CTRL_pct.1 CTRL_pct.2 CTRL_p_val_adj
    -## GNLY            0        6.012755      0.946      0.046              0
    -## FGFBP2          0        3.253231      0.503      0.021              0
    -## CLIC3           0        3.480735      0.605      0.024              0
    -## CTSW            0        3.025603      0.541      0.030              0
    -## KLRD1           0        2.803233      0.510      0.019              0
    -## KLRC1           0        2.615312      0.391      0.003              0
    -##           STIM_p_val STIM_avg_log2FC STIM_pct.1 STIM_pct.2 STIM_p_val_adj
    -## GNLY    0.000000e+00        5.792059      0.949      0.061   0.000000e+00
    -## FGFBP2 4.908844e-168        2.190945      0.268      0.015  6.898399e-164
    -## CLIC3   0.000000e+00        3.551895      0.627      0.031   0.000000e+00
    -## CTSW    0.000000e+00        3.162748      0.602      0.035   0.000000e+00
    -## KLRD1   0.000000e+00        2.868744      0.554      0.027   0.000000e+00
    -## KLRC1   0.000000e+00        2.539733      0.379      0.006   0.000000e+00
    -##             max_pval minimump_p_val
    -## GNLY    0.000000e+00              0
    -## FGFBP2 4.908844e-168              0
    -## CLIC3   0.000000e+00              0
    -## CTSW    0.000000e+00              0
    -## KLRD1   0.000000e+00              0
    -## KLRC1   0.000000e+00              0
    -

    We can explore these marker genes for each cluster and use them to -annotate our clusters as specific cell types.

    -
    FeaturePlot(ifnb, features = c("CD3D", "SELL", "CREM", "CD8A", "GNLY", "CD79A", "FCGR3A", "CCL2",
    -    "PPBP"), min.cutoff = "q9")
    -

    -
    ifnb <- RenameIdents(ifnb, `0` = "CD14 Mono", `1` = "CD4 Naive T", `2` = "CD4 Memory T", `3` = "CD16 Mono",
    -    `4` = "B", `5` = "CD8 T", `6` = "NK", `7` = "T activated", `8` = "DC", `9` = "B Activated",
    -    `10` = "Mk", `11` = "pDC", `12` = "Eryth", `13` = "Mono/Mk Doublets", `14` = "HSPC")
    -DimPlot(ifnb, label = TRUE)
    -

    -

    The DotPlot() function with the split.by -parameter can be useful for viewing conserved cell type markers across -conditions, showing both the expression level and the percentage of -cells in a cluster expressing any given gene. Here we plot 2-3 strong -marker genes for each of our 14 clusters.

    -
    Idents(ifnb) <- factor(Idents(ifnb), levels = c("HSPC", "Mono/Mk Doublets", "pDC", "Eryth", "Mk",
    -    "DC", "CD14 Mono", "CD16 Mono", "B Activated", "B", "CD8 T", "NK", "T activated", "CD4 Naive T",
    -    "CD4 Memory T"))
    -markers.to.plot <- c("CD3D", "CREM", "HSPH1", "SELL", "GIMAP5", "CACYBP", "GNLY", "NKG7", "CCL5",
    -    "CD8A", "MS4A1", "CD79A", "MIR155HG", "NME1", "FCGR3A", "VMO1", "CCL2", "S100A9", "HLA-DQA1",
    -    "GPR183", "PPBP", "GNG11", "HBA2", "HBB", "TSPAN13", "IL3RA", "IGJ", "PRSS57")
    -DotPlot(ifnb, features = markers.to.plot, cols = c("blue", "red"), dot.scale = 8, split.by = "stim") +
    -    RotatedAxis()
    -

    -
    library(ggplot2)
    -plot <- DotPlot(ifnb, features = markers.to.plot, cols = c("blue", "red"), dot.scale = 6, split.by = "stim") +
    -    RotatedAxis()
    -# ggsave(filename = '../output/images/pbmc_alignment.jpg', height = 7, width = 12, plot =
    -# plot, quality = 50)
    -
    -

    Identify differential expressed genes across conditions

    -

    Now that we’ve aligned the stimulated and control cells, we can start -to do comparative analyses and look at the differences induced by -stimulation. One way to look broadly at these changes is to plot the -average expression of both the stimulated and control cells and look for -genes that are visual outliers on a scatter plot. Here, we take the -average expression of both the stimulated and control naive T cells and -CD14 monocyte populations and generate the scatter plots, highlighting -genes that exhibit dramatic responses to interferon stimulation.

    -
    library(ggplot2)
    -library(cowplot)
    -theme_set(theme_cowplot())
    -t.cells <- subset(ifnb, idents = "CD4 Naive T")
    -Idents(t.cells) <- "stim"
    -avg.t.cells <- as.data.frame(log1p(AverageExpression(t.cells, verbose = FALSE)$RNA))
    -avg.t.cells$gene <- rownames(avg.t.cells)
    -
    -cd14.mono <- subset(ifnb, idents = "CD14 Mono")
    -Idents(cd14.mono) <- "stim"
    -avg.cd14.mono <- as.data.frame(log1p(AverageExpression(cd14.mono, verbose = FALSE)$RNA))
    -avg.cd14.mono$gene <- rownames(avg.cd14.mono)
    -
    -genes.to.label = c("ISG15", "LY6E", "IFI6", "ISG20", "MX1", "IFIT2", "IFIT1", "CXCL10", "CCL8")
    -p1 <- ggplot(avg.t.cells, aes(CTRL, STIM)) + geom_point() + ggtitle("CD4 Naive T Cells")
    -p1 <- LabelPoints(plot = p1, points = genes.to.label, repel = TRUE)
    -p2 <- ggplot(avg.cd14.mono, aes(CTRL, STIM)) + geom_point() + ggtitle("CD14 Monocytes")
    -p2 <- LabelPoints(plot = p2, points = genes.to.label, repel = TRUE)
    -p1 + p2
    -

    -

    As you can see, many of the same genes are upregulated in both of -these cell types and likely represent a conserved interferon response -pathway.

    -

    Because we are confident in having identified common cell types -across condition, we can ask what genes change in different conditions -for cells of the same type. First, we create a column in the meta.data -slot to hold both the cell type and stimulation information and switch -the current ident to that column. Then we use FindMarkers() -to find the genes that are different between stimulated and control B -cells. Notice that many of the top genes that show up here are the same -as the ones we plotted earlier as core interferon response genes. -Additionally, genes like CXCL10 which we saw were specific to monocyte -and B cell interferon response show up as highly significant in this -list as well.

    -
    ifnb$celltype.stim <- paste(Idents(ifnb), ifnb$stim, sep = "_")
    -ifnb$celltype <- Idents(ifnb)
    -Idents(ifnb) <- "celltype.stim"
    -b.interferon.response <- FindMarkers(ifnb, ident.1 = "B_STIM", ident.2 = "B_CTRL", verbose = FALSE)
    -head(b.interferon.response, n = 15)
    -
    ##                 p_val avg_log2FC pct.1 pct.2     p_val_adj
    -## ISG15   9.059333e-159  4.5698269 0.998 0.238 1.273108e-154
    -## IFIT3   4.791846e-154  4.4791700 0.965 0.051 6.733981e-150
    -## IFI6    7.806881e-152  4.2289881 0.963 0.076 1.097101e-147
    -## ISG20   1.061756e-149  2.9198403 1.000 0.672 1.492086e-145
    -## IFIT1   1.946349e-139  4.1101556 0.908 0.032 2.735204e-135
    -## MX1     1.243713e-123  3.2589732 0.908 0.113 1.747790e-119
    -## LY6E    9.532583e-120  3.1248078 0.896 0.147 1.339614e-115
    -## TNFSF10 2.678440e-112  3.8106081 0.787 0.020 3.764012e-108
    -## IFIT2   1.779710e-109  3.6693485 0.789 0.032 2.501027e-105
    -## B2M      3.573225e-99  0.6249356 1.000 1.000  5.021453e-95
    -## IRF7     3.158083e-95  2.6051975 0.843 0.191  4.438053e-91
    -## PLSCR1   6.649893e-94  2.7815365 0.792 0.118  9.345094e-90
    -## CXCL10   3.138646e-86  5.3307595 0.651 0.010  4.410739e-82
    -## UBE2L6   1.553836e-83  2.1249493 0.857 0.299  2.183606e-79
    -## PSMB9    5.270716e-78  1.6498113 0.937 0.559  7.406937e-74
    -

    Another useful way to visualize these changes in gene expression is -with the split.by option to the FeaturePlot() -or VlnPlot() function. This will display FeaturePlots of -the list of given genes, split by a grouping variable (stimulation -condition here). Genes such as CD3D and GNLY are canonical cell type -markers (for T cells and NK/CD8 T cells) that are virtually unaffected -by interferon stimulation and display similar gene expression patterns -in the control and stimulated group. IFI6 and ISG15, on the other hand, -are core interferon response genes and are upregulated accordingly in -all cell types. Finally, CD14 and CXCL10 are genes that show a cell type -specific interferon response. CD14 expression decreases after -stimulation in CD14 monocytes, which could lead to misclassification in -a supervised analysis framework, underscoring the value of integrated -analysis. CXCL10 shows a distinct upregulation in monocytes and B cells -after interferon stimulation but not in other cell types.

    -
    FeaturePlot(ifnb, features = c("CD3D", "GNLY", "IFI6"), split.by = "stim", max.cutoff = 3, cols = c("grey",
    -    "red"))
    -

    -
    plots <- VlnPlot(ifnb, features = c("LYZ", "ISG15", "CXCL10"), split.by = "stim", group.by = "celltype",
    -    pt.size = 0, combine = FALSE)
    -wrap_plots(plots = plots, ncol = 1)
    -

    -
    # saveRDS(ifnb, file = '../output/ifnb.rds')
    -
    -
    -
    -
    -

    Performing integration on datasets normalized with SCTransform

    -

    In Hafemeister -and Satija, 2019, we introduced an improved method for the -normalization of scRNA-seq, based on regularized negative binomial -regression. The method is named ‘sctransform’, and avoids some of the -pitfalls of standard normalization workflows, including the addition of -a pseudocount, and log-transformation. You can read more about -sctransform in the manuscript -or our SCTransform vignette.

    -

    Below, we demonstrate how to modify the Seurat integration workflow -for datasets that have been normalized with the sctransform workflow. -The commands are largely similar, with a few key differences:

    -
      -
    • Normalize datasets individually by SCTransform(), -instead of NormalizeData() prior to integration
    • -
    • As discussed further in our SCTransform vignette, we typically -use 3,000 or more features for analysis downstream of sctransform.
    • -
    • Run the PrepSCTIntegration() function prior to -identifying anchors
    • -
    • When running FindIntegrationAnchors(), and -IntegrateData(), set the normalization.method -parameter to the value SCT.
    • -
    • When running sctransform-based workflows, including integration, do -not run the ScaleData() function
    • -
    -
    LoadData("ifnb")
    -
    ## Error in slot(object = object, name = s): no slot of name "images" for this object of class "Seurat"
    -
    ifnb <- UpdateSeuratObject(ifnb)
    -ifnb[["RNA"]] <- CreateAssay5Object(ifnb[["RNA"]]@counts)
    -
    ## Error in CreateAssay5Object(ifnb[["RNA"]]@counts): no slot of name "counts" for this object of class "Assay5"
    -
    ifnb[["RNA"]] <- split(ifnb[["RNA"]], f = ifnb$stim)
    -
    ## Error in slot(object = object, name = "layers")[[layer]][features, cells, : invalid or not-yet-implemented 'Matrix' subsetting
    -
    ifnb <- SCTransform(ifnb)
    -ifnb <- RunPCA(ifnb)
    -
    ifnb <- IntegrateLayers(object = ifnb, method = CCAIntegration, normalization.method = "SCT", verbose = F)
    -
    ifnb <- RunUMAP(ifnb, reduction = "integrated.dr", dims = 1:30)
    -
    p1 <- DimPlot(ifnb, reduction = "umap", group.by = "stim")
    -p2 <- DimPlot(ifnb, reduction = "umap", group.by = "seurat_annotations", label = TRUE, repel = TRUE)
    -p1 + p2
    -

    -

    Now that the datasets have been integrated, you can follow the -previous steps in this vignette identify cell types and cell -type-specific responses.

    -
    # write.csv(x = t(as.data.frame(all_times)), file =
    -# '../output/timings/seurat5_integration_introduction.csv')
    -
    - -Session Info - -
    sessionInfo()
    -
    ## R version 4.2.2 Patched (2022-11-10 r83330)
    -## Platform: x86_64-pc-linux-gnu (64-bit)
    -## Running under: Ubuntu 20.04.5 LTS
    -## 
    -## Matrix products: default
    -## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
    -## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
    -## 
    -## locale:
    -##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
    -##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
    -##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
    -##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
    -##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
    -## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
    -## 
    -## attached base packages:
    -## [1] stats     graphics  grDevices utils     datasets  methods   base     
    -## 
    -## other attached packages:
    -##  [1] cowplot_1.1.1           ggplot2_3.4.0           patchwork_1.1.2        
    -##  [4] pbmc3k.SeuratData_3.1.4 panc8.SeuratData_3.0.2  ifnb.SeuratData_3.1.0  
    -##  [7] SeuratData_0.2.2        Seurat_4.9.9.9020       SeuratObject_4.9.9.9053
    -## [10] sp_1.5-1               
    -## 
    -## loaded via a namespace (and not attached):
    -##   [1] spam_2.9-1             sn_2.1.0               plyr_1.8.8            
    -##   [4] igraph_1.3.5           lazyeval_0.2.2         splines_4.2.2         
    -##   [7] RcppHNSW_0.4.1         listenv_0.9.0          scattermore_0.8       
    -##  [10] qqconf_1.3.0           TH.data_1.1-1          digest_0.6.31         
    -##  [13] htmltools_0.5.4        fansi_1.0.3            magrittr_2.0.3        
    -##  [16] tensor_1.5             cluster_2.1.4          ROCR_1.0-11           
    -##  [19] globals_0.16.2         matrixStats_0.63.0     sandwich_3.0-2        
    -##  [22] spatstat.sparse_3.0-0  colorspace_2.0-3       rappdirs_0.3.3        
    -##  [25] ggrepel_0.9.2          rbibutils_2.2.9        xfun_0.36             
    -##  [28] dplyr_1.0.10           crayon_1.5.2           jsonlite_1.8.4        
    -##  [31] progressr_0.13.0       spatstat.data_3.0-0    survival_3.4-0        
    -##  [34] zoo_1.8-11             glue_1.6.2             polyclip_1.10-4       
    -##  [37] gtable_0.3.1           leiden_0.4.3           DelayedArray_0.22.0   
    -##  [40] future.apply_1.10.0    BiocGenerics_0.44.0    abind_1.4-5           
    -##  [43] scales_1.2.1           mvtnorm_1.1-3          DBI_1.1.2             
    -##  [46] spatstat.random_3.0-1  miniUI_0.1.1.1         Rcpp_1.0.9            
    -##  [49] plotrix_3.8-2          metap_1.8              viridisLite_0.4.1     
    -##  [52] xtable_1.8-4           reticulate_1.27        dotCall64_1.0-2       
    -##  [55] stats4_4.2.2           htmlwidgets_1.6.1      httr_1.4.4            
    -##  [58] RColorBrewer_1.1-3     TFisher_0.2.0          ellipsis_0.3.2        
    -##  [61] ica_1.0-3              pkgconfig_2.0.3        farver_2.1.1          
    -##  [64] sass_0.4.4             uwot_0.1.14            deldir_1.0-6          
    -##  [67] utf8_1.2.2             tidyselect_1.2.0       labeling_0.4.2        
    -##  [70] rlang_1.0.6            reshape2_1.4.4         later_1.3.0           
    -##  [73] munsell_0.5.0          tools_4.2.2            cachem_1.0.6          
    -##  [76] cli_3.6.0              generics_0.1.3         mathjaxr_1.6-0        
    -##  [79] ggridges_0.5.4         evaluate_0.19          stringr_1.5.0         
    -##  [82] fastmap_1.1.0          yaml_2.3.6             goftest_1.2-3         
    -##  [85] knitr_1.41             fitdistrplus_1.1-8     purrr_1.0.1           
    -##  [88] RANN_2.6.1             pbapply_1.6-0          future_1.30.0         
    -##  [91] nlme_3.1-161           mime_0.12              formatR_1.12          
    -##  [94] compiler_4.2.2         rstudioapi_0.14        plotly_4.10.1         
    -##  [97] png_0.1-8              spatstat.utils_3.0-1   tibble_3.1.8          
    -## [100] bslib_0.4.2            stringi_1.7.12         highr_0.10            
    -## [103] RSpectra_0.16-1        lattice_0.20-45        Matrix_1.5-1          
    -## [106] multtest_2.52.0        vctrs_0.5.1            mutoss_0.1-12         
    -## [109] pillar_1.8.1           lifecycle_1.0.3        spatstat.geom_3.0-3   
    -## [112] Rdpack_2.4             lmtest_0.9-40          jquerylib_0.1.4       
    -## [115] RcppAnnoy_0.0.20       data.table_1.14.6      irlba_2.3.5.1         
    -## [118] httpuv_1.6.7           R6_2.5.1               promises_1.2.0.1      
    -## [121] KernSmooth_2.23-20     gridExtra_2.3          IRanges_2.32.0        
    -## [124] parallelly_1.34.0      codetools_0.2-18       fastDummies_1.6.3     
    -## [127] MASS_7.3-58            assertthat_0.2.1       withr_2.5.0           
    -## [130] presto_1.0.0           mnormt_2.1.1           sctransform_0.3.5     
    -## [133] S4Vectors_0.36.0       multcomp_1.4-20        parallel_4.2.2        
    -## [136] grid_4.2.2             tidyr_1.2.1            rmarkdown_2.19        
    -## [139] MatrixGenerics_1.8.1   Rtsne_0.16             spatstat.explore_3.0-5
    -## [142] Biobase_2.56.0         numDeriv_2016.8-1.1    shiny_1.7.4
    -
    -
    - - - - -
    - - - - - - - - - - - - - - - diff --git a/vignettes/seurat5_integration_mapping.html b/vignettes/seurat5_integration_mapping.html deleted file mode 100644 index f056bb681..000000000 --- a/vignettes/seurat5_integration_mapping.html +++ /dev/null @@ -1,703 +0,0 @@ - - - - - - - - - - - - - -Mapping and annotating query datasets - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    - - - - - - - -
    -
    all_times <- list()  # store the time for each chunk
    -knitr::knit_hooks$set(time_it = local({
    -  now <- NULL
    -  function(before, options) {
    -    if (before) {
    -      now <<- Sys.time()
    -    } else {
    -      res <- difftime(Sys.time(), now, units = "secs")
    -      all_times[[options$label]] <<- res
    -    }
    -  }
    -}))
    -knitr::opts_chunk$set(
    -  tidy = TRUE,
    -  tidy.opts = list(width.cutoff = 95),
    -  warning = FALSE,
    -  error = TRUE,
    -  message = FALSE,
    -  fig.width = 8,
    -  time_it = TRUE
    -)
    -
    -

    Introduction to single-cell reference mapping

    -

    In this vignette, we first build an integrated reference and then -demonstrate how to leverage this reference to annotate new query -datasets. Generating an integrated reference follows the same workflow -described in more detail in the integration introduction vignette. Once generated, this -reference can be used to analyze additional query datasets through tasks -like cell type label transfer and projecting query cells onto reference -UMAPs. Notably, this does not require correction of the underlying raw -query data and can therefore be an efficient strategy if a high quality -reference is available.

    -
    -
    -

    Dataset preprocessing

    -

    For the purposes of this example, we’ve chosen human pancreatic islet -cell datasets produced across four technologies, CelSeq (GSE81076) -CelSeq2 (GSE85241), Fluidigm C1 (GSE86469), and SMART-Seq2 -(E-MTAB-5061). For convenience, we distribute this dataset through our -SeuratData -package. The metadata contains the technology (tech column) -and cell type annotations (celltype column) for each cell -in the four datasets.

    -
    library(Seurat)
    -options(Seurat.object.assay.version = "v5")
    -library(SeuratData)
    -
    InstallData("panc8")
    -

    To construct a reference, we will identify ‘anchors’ between the -individual datasets. First, we split the combined object into a list, -with each dataset as an element (this is only necessary because the data -was bundled together for easy distribution).

    -
    data("panc8")
    -panc8 <- UpdateSeuratObject(panc8)
    -panc8[["RNA"]] <- CreateAssay5Object(panc8[["RNA"]]@counts)
    -# split the dataset into layers by technology
    -panc8[["RNA"]] <- split(panc8[["RNA"]], f = panc8$tech)
    -panc8 <- DietSeurat(panc8, layers = c("celseq", "celseq2", "fluidigmc1", "smartseq2"))
    -

    Prior to finding anchors, we perform standard preprocessing -(log-normalization), and identify variable features individually for -each. Note that Seurat implements an improved method for variable -feature selection based on a variance stabilizing transformation -("vst")

    -
    panc8 <- NormalizeData(panc8, verbose = FALSE)
    -panc8 <- FindVariableFeatures(panc8, selection.method = "vst", nfeatures = 2000, verbose = FALSE)
    -features <- VariableFeatures(panc8)
    -
    -
    -

    Integration of 3 pancreatic islet cell datasets

    -

    Next, we identify anchors using the -FindIntegrationAnchors() function, which takes a list of -Seurat objects as input. Here, we integrate three of the objects into a -reference (we will use the fourth later in this vignette as a query -dataset to demonstrate mapping).

    -
      -
    • We use all default parameters here for identifying anchors, -including the ‘dimensionality’ of the dataset (30; feel free to try -varying this parameter over a broad range, for example between 10 and -50).
    • -
    -
    pancreas.ref <- DietSeurat(panc8, layers = c("celseq", "celseq2", "smartseq2"))
    -pancreas.ref <- as(object = pancreas.ref[["RNA"]], Class = "Assay5")
    -pancreas.ref <- CreateSeuratObject(pancreas.ref, meta.data = panc8@meta.data)
    -pancreas.ref <- ScaleData(pancreas.ref)
    -pancreas.ref <- RunPCA(pancreas.ref)
    -# pancreas.anchors <- FindIntegrationAnchors(object.list = reference.list, dims = 1:30)
    -

    We then pass these anchors to the IntegrateData() -function, which returns a Seurat object.

    -
      -
    • The returned object will contain a new Assay, which -holds an integrated (or ‘batch-corrected’) expression matrix for all -cells, enabling them to be jointly analyzed.
    • -
    -
    pancreas.ref <- IntegrateLayers(object = pancreas.ref, method = CCAIntegration, features = features,
    -    verbose = F)
    -

    After running IntegrateData(), the Seurat -object will contain a new Assay with the integrated -expression matrix. Note that the original (uncorrected values) are still -stored in the object in the “RNA” assay, so you can switch back and -forth.

    -

    We can then use this new integrated matrix for downstream analysis -and visualization. Here we scale the integrated data, run PCA, and -visualize the results with UMAP. The integrated datasets cluster by cell -type, instead of by technology.

    -
    library(ggplot2)
    -library(cowplot)
    -library(patchwork)
    -# Run the standard workflow for visualization and clustering
    -pancreas.ref <- RunUMAP(pancreas.ref, reduction = "integrated.dr", dims = 1:30, verbose = FALSE)
    -p1 <- DimPlot(pancreas.ref, reduction = "umap", group.by = "tech")
    -p2 <- DimPlot(pancreas.ref, reduction = "umap", group.by = "celltype", label = TRUE, repel = TRUE) +
    -    NoLegend()
    -p1 + p2
    -

    -
    plot <- DimPlot(pancreas.ref, reduction = "umap", label = TRUE, label.size = 4.5) + xlab("UMAP 1") +
    -    ylab("UMAP 2") + theme(axis.title = element_text(size = 18), legend.text = element_text(size = 18)) +
    -    guides(colour = guide_legend(override.aes = list(size = 10)))
    -# ggsave(filename = 'pancreas_integrated_umap.jpg', height = 7, width = 12, plot = plot,
    -# quality = 50)
    -
    -
    -

    Cell type classification using an integrated reference

    -

    Seurat also supports the projection of reference data (or meta data) -onto a query object. While many of the methods are conserved (both -procedures begin by identifying anchors), there are two important -distinctions between data transfer and integration:

    -
      -
    1. In data transfer, Seurat does not correct or modify the query -expression data.
    2. -
    3. In data transfer, Seurat has an option (set by default) to project -the PCA structure of a reference onto the query, instead of learning a -joint structure with CCA. We generally suggest using this option when -projecting data between scRNA-seq datasets.
    4. -
    -

    After finding anchors, we use the TransferData() -function to classify the query cells based on reference data (a vector -of reference cell type labels). TransferData() returns a -matrix with predicted IDs and prediction scores, which we can add to the -query metadata.

    -
    # do we want a different query and reference object or just have different layers??
    -pancreas.query <- DietSeurat(panc8, layers = "fluidigmc1", assays = "RNA", )
    -pancreas.query <- as(object = pancreas.query[["RNA"]], Class = "Assay5")
    -pancreas.query <- CreateSeuratObject(pancreas.query, meta.data = panc8@meta.data)
    -# system.time(pancreas.ref[['RNA']] <- JoinLayers(pancreas.ref[['RNA']], search = 'data', new
    -# = 'data')) # Error in Find Anchors, have to join layers system.time(pancreas.query[['RNA']]
    -# <- JoinLayers(pancreas.query[['RNA']], search = 'data', new = 'data')) # Error in Find
    -# Anchors, have to rename layer
    -
    -# for now i'm specifying RNA assay
    -pancreas.anchors <- FindTransferAnchors(reference = pancreas.ref, query = pancreas.query, dims = 1:30,
    -    reference.reduction = "integrated.dr", k.filter = NA)
    -
    -
    -predictions <- TransferData(anchorset = pancreas.anchors, refdata = pancreas.ref$celltype, dims = 1:30)
    -pancreas.query <- AddMetaData(pancreas.query, metadata = predictions)
    -

    Because we have the original label annotations from our full -integrated analysis, we can evaluate how well our predicted cell type -annotations match the full reference. In this example, we find that -there is a high agreement in cell type classification, with over 96% of -cells being labeled correctly.

    -
    pancreas.query$prediction.match <- pancreas.query$predicted.id == pancreas.query$celltype
    -table(pancreas.query$prediction.match)
    -
    ## 
    -## FALSE  TRUE 
    -##    26   612
    -

    To verify this further, we can examine some canonical cell type -markers for specific pancreatic islet cell populations. Note that even -though some of these cell types are only represented by one or two cells -(e.g. epsilon cells), we are still able to classify them correctly.

    -
    table(pancreas.query$predicted.id)
    -
    ## 
    -##             acinar activated_stellate              alpha               beta 
    -##                 22                 17                254                261 
    -##              delta             ductal        endothelial              gamma 
    -##                 22                 31                  9                 17 
    -##         macrophage            schwann 
    -##                  2                  3
    -
    VlnPlot(pancreas.query, c("REG1A", "PPY", "SST", "GHRL", "VWF", "SOX10"), group.by = "predicted.id")
    -

    -
    -
    -

    Unimodal UMAP Projection

    -

    In Seurat v4, we also enable projection of a query onto the reference -UMAP structure. This can be achieved by computing the reference UMAP -model and then calling MapQuery() instead of -TransferData().

    -
    pancreas.ref <- RunUMAP(pancreas.ref, dims = 1:30, reduction = "integrated.dr", return.model = TRUE)
    -pancreas.query <- MapQuery(anchorset = pancreas.anchors, reference = pancreas.ref, query = pancreas.query,
    -    refdata = list(celltype = "celltype"), reference.reduction = "integrated.dr", reduction.model = "umap")
    -
    - -What is MapQuery doing? - -

    MapQuery() is a wrapper around three functions: -TransferData(), IntegrateEmbeddings(), and -ProjectUMAP(). TransferData() is used to -transfer cell type labels and impute the ADT values; -IntegrateEmbeddings() is used to integrate reference with -query by correcting the query’s projected low-dimensional embeddings; -and finally ProjectUMAP() is used to project the query data -onto the UMAP structure of the reference. The equivalent code for doing -this with the intermediate functions is below:

    -
    pancreas.query <- TransferData(anchorset = pancreas.anchors, reference = panc8, query = pancreas.query,
    -    refdata = list(celltype = "celltype"))
    -pancreas.query <- IntegrateEmbeddings(anchorset = pancreas.anchors, reference = panc8, query = pancreas.query,
    -    new.reduction.name = "ref.pca")
    -pancreas.query <- ProjectUMAP(query = pancreas.query, query.reduction = "ref.pca", reference = panc8,
    -    reference.reduction = "pca", reduction.model = "umap")
    -
    -

    We can now visualize the query cells alongside our reference.

    -
    p1 <- DimPlot(pancreas.ref, reduction = "umap", group.by = "celltype", label = TRUE, label.size = 3,
    -    repel = TRUE) + NoLegend() + ggtitle("Reference annotations")
    -p2 <- DimPlot(pancreas.query, reduction = "ref.umap", group.by = "predicted.celltype", label = TRUE,
    -    label.size = 3, repel = TRUE) + NoLegend() + ggtitle("Query transferred labels")
    -p1 + p2
    -

    -
    # write.csv(x = t(as.data.frame(all_times)), file =
    -# '../output/timings/seurat5_integration_reference_mapping.csv')
    -
    - -Session Info - -
    sessionInfo()
    -
    ## R version 4.2.2 Patched (2022-11-10 r83330)
    -## Platform: x86_64-pc-linux-gnu (64-bit)
    -## Running under: Ubuntu 20.04.5 LTS
    -## 
    -## Matrix products: default
    -## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
    -## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
    -## 
    -## locale:
    -##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
    -##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
    -##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
    -##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
    -##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
    -## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
    -## 
    -## attached base packages:
    -## [1] stats     graphics  grDevices utils     datasets  methods   base     
    -## 
    -## other attached packages:
    -##  [1] patchwork_1.1.2                cowplot_1.1.1                 
    -##  [3] ggplot2_3.4.0                  pbmc3k.SeuratData_3.1.4       
    -##  [5] panc8.SeuratData_3.0.2         ifnb.SeuratData_3.1.0         
    -##  [7] bonemarrowref.SeuratData_1.0.0 SeuratData_0.2.2              
    -##  [9] Seurat_4.9.9.9020              SeuratObject_4.9.9.9053       
    -## [11] sp_1.5-1                      
    -## 
    -## loaded via a namespace (and not attached):
    -##   [1] Rtsne_0.16             colorspace_2.0-3       deldir_1.0-6          
    -##   [4] ellipsis_0.3.2         ggridges_0.5.4         RcppHNSW_0.4.1        
    -##   [7] spatstat.data_3.0-0    rstudioapi_0.14        farver_2.1.1          
    -##  [10] leiden_0.4.3           listenv_0.9.0          ggrepel_0.9.2         
    -##  [13] RSpectra_0.16-1        fansi_1.0.3            codetools_0.2-18      
    -##  [16] splines_4.2.2          cachem_1.0.6           knitr_1.41            
    -##  [19] polyclip_1.10-4        spam_2.9-1             jsonlite_1.8.4        
    -##  [22] ica_1.0-3              cluster_2.1.4          png_0.1-8             
    -##  [25] uwot_0.1.14            spatstat.sparse_3.0-0  shiny_1.7.4           
    -##  [28] sctransform_0.3.5      compiler_4.2.2         httr_1.4.4            
    -##  [31] assertthat_0.2.1       Matrix_1.5-1           fastmap_1.1.0         
    -##  [34] lazyeval_0.2.2         cli_3.6.0              later_1.3.0           
    -##  [37] formatR_1.12           htmltools_0.5.4        tools_4.2.2           
    -##  [40] igraph_1.3.5           dotCall64_1.0-2        gtable_0.3.1          
    -##  [43] glue_1.6.2             RANN_2.6.1             reshape2_1.4.4        
    -##  [46] dplyr_1.0.10           rappdirs_0.3.3         Rcpp_1.0.9            
    -##  [49] scattermore_0.8        jquerylib_0.1.4        vctrs_0.5.1           
    -##  [52] nlme_3.1-161           spatstat.explore_3.0-5 progressr_0.13.0      
    -##  [55] lmtest_0.9-40          spatstat.random_3.0-1  xfun_0.36             
    -##  [58] stringr_1.5.0          globals_0.16.2         mime_0.12             
    -##  [61] miniUI_0.1.1.1         lifecycle_1.0.3        irlba_2.3.5.1         
    -##  [64] goftest_1.2-3          future_1.30.0          MASS_7.3-58           
    -##  [67] zoo_1.8-11             scales_1.2.1           spatstat.utils_3.0-1  
    -##  [70] promises_1.2.0.1       parallel_4.2.2         RColorBrewer_1.1-3    
    -##  [73] yaml_2.3.6             gridExtra_2.3          reticulate_1.27       
    -##  [76] pbapply_1.6-0          sass_0.4.4             stringi_1.7.12        
    -##  [79] highr_0.10             fastDummies_1.6.3      rlang_1.0.6           
    -##  [82] pkgconfig_2.0.3        matrixStats_0.63.0     evaluate_0.19         
    -##  [85] lattice_0.20-45        tensor_1.5             ROCR_1.0-11           
    -##  [88] purrr_1.0.1            labeling_0.4.2         htmlwidgets_1.6.1     
    -##  [91] tidyselect_1.2.0       parallelly_1.34.0      RcppAnnoy_0.0.20      
    -##  [94] plyr_1.8.8             magrittr_2.0.3         R6_2.5.1              
    -##  [97] generics_0.1.3         DBI_1.1.2              withr_2.5.0           
    -## [100] pillar_1.8.1           fitdistrplus_1.1-8     abind_1.4-5           
    -## [103] survival_3.4-0         tibble_3.1.8           future.apply_1.10.0   
    -## [106] crayon_1.5.2           KernSmooth_2.23-20     utf8_1.2.2            
    -## [109] spatstat.geom_3.0-3    plotly_4.10.1          rmarkdown_2.19        
    -## [112] grid_4.2.2             data.table_1.14.6      digest_0.6.31         
    -## [115] xtable_1.8-4           tidyr_1.2.1            httpuv_1.6.7          
    -## [118] munsell_0.5.0          viridisLite_0.4.1      bslib_0.4.2
    -
    -
    - - - - -
    - - - - - - - - - - - - - - - From b6a611695ba95f3c99db0f1f583b3835c026325b Mon Sep 17 00:00:00 2001 From: Gesmira Date: Thu, 19 Jan 2023 14:08:33 -0500 Subject: [PATCH 378/979] dataframe for larger data --- R/integration5.R | 8 +-- .../seurat5_integration_large_datasets.Rmd | 52 +++++++++++++------ vignettes/seurat5_integration_mapping.Rmd | 1 - 3 files changed, 39 insertions(+), 22 deletions(-) diff --git a/R/integration5.R b/R/integration5.R index 5a3c7859a..f0036a866 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -445,10 +445,10 @@ IntegrateLayers <- function( df } else if (is.null(x = group.by) && length(x = layers) > 1L) { cmap <- slot(object = object[[assay]], name = 'cells')[, layers] - as.data.frame(x = labels( - object = cmap, - values = Cells(x = object[[assay]], layer = scale.layer) - )) + data.frame(x = Cells(object[[assay]], + layer = scale.layer), + y = rep(scale.layer, length(Cells(object[[assay]], + layer = scale.layer)))) } else if (is_scalar_character(x = group.by) && group.by %in% names(x = object[[]])) { FetchData( object = object, diff --git a/vignettes/seurat5_integration_large_datasets.Rmd b/vignettes/seurat5_integration_large_datasets.Rmd index 24eb3ad23..567ceaa76 100644 --- a/vignettes/seurat5_integration_large_datasets.Rmd +++ b/vignettes/seurat5_integration_large_datasets.Rmd @@ -58,45 +58,63 @@ For this example, we will be using the "Immune Cell Atlas" data from the Human C ```{r libs} library(Seurat) +options(Seurat.object.assay.version = "v5") ``` After acquiring the data, we first perform standard normalization and variable feature selection. ```{r hca.full.1} -bm280k.data <- Read10X_h5("../data/ica_bone_marrow_h5.h5") + +bm280k.data <- Read10X_h5("/brahms/haoy/HCA_BoneMarrow/ica_bone_marrow_h5.h5") +#bm280k.data <- Read10X("/brahms/mollag/seurat_v5/data/e7448a34-b33d-41de-b422-4c09bfeba96b.mtx/") +#colnames(bm280k.data) <- make.unique(c(colnames(bm280k.data)), sep="_") +#bm280k.data <- Read10X_h5("../data/ica_bone_marrow_h5.h5") bm280k <- CreateSeuratObject(counts = bm280k.data, min.cells = 100, min.features = 500) -bm280k.list <- SplitObject(bm280k, split.by = "orig.ident") -bm280k.list <- lapply(X = bm280k.list, FUN = function(x) { - x <- NormalizeData(x, verbose = FALSE) - x <- FindVariableFeatures(x, verbose = FALSE) -}) +bm280k[["RNA"]] <- split(bm280k[["RNA"]], f = bm280k$orig.ident) + +# Preprocessing +bm280k <- NormalizeData(bm280k, verbose = FALSE) +bm280k <- FindVariableFeatures(bm280k, verbose = FALSE) ``` Next, select features for downstream integration, and run PCA on each object in the list, which is required for running the alternative reciprocal PCA workflow. ```{r hca.full.2} -features <- SelectIntegrationFeatures(object.list = bm280k.list) -bm280k.list <- lapply(X = bm280k.list, FUN = function(x) { - x <- ScaleData(x, features = features, verbose = FALSE) - x <- RunPCA(x, features = features, verbose = FALSE) -}) +features <- VariableFeatures(bm280k) +bm280k <- ScaleData(bm280k, features = features, verbose = FALSE) +bm280k <- RunPCA(bm280k, features = features, verbose = FALSE) + ``` Since this dataset contains both men and women, we will chose one male and one female (BM1 and BM2) to use in a reference-based workflow. We determined donor sex by examining the expression of the XIST gene. ```{r integration.hca.full} -anchors <- FindIntegrationAnchors(object.list = bm280k.list, reference = c(1, 2), reduction = "rpca", dims = 1:50) -bm280k.integrated <- IntegrateData(anchorset = anchors, dims = 1:50) +bm280k <- IntegrateLayers(object = bm280k, + method = RPCAIntegration, + reference = c(1, 2), + dims = 1:50, + verbose = F) + +# this seems to be issue: as.data.frame(x = labels(object = cmap, values = Cells(x = object[[assay]], + #layer = scale.layer))) # this takes a super long time + + + +# speicifcially getting the labels call is really slow +# as.data.frame(labels(object = cmap, values = head(Cells(x = bm280k[["RNA"]], layer = "scale.data")))) +# as.data.frame(labels(object = cmap_small, values = Cells(x = bm280k[["RNA"]], layer = "scale.data"))) +# bboth of these smaller versions are fast + +data.frame(x = Cells(x = bm280k[["RNA"]], layer = "scale.data"), y = rep("scale.data", length(Cells(x = bm280k[["RNA"]], layer = "scale.data")))) ``` + ```{r downstream.hca.full} -bm280k.integrated <- ScaleData(bm280k.integrated, verbose = FALSE) -bm280k.integrated <- RunPCA(bm280k.integrated, verbose = FALSE) -bm280k.integrated <- RunUMAP(bm280k.integrated, dims = 1:50) +bm280k <- RunUMAP(bm280k, dims = 1:50) ``` ```{r viz.hca.full, fig.height = 9, fig.width = 16} -DimPlot(bm280k.integrated, group.by = "orig.ident") +DimPlot(bm280k, group.by = "orig.ident") ``` ```{r save.img, include=TRUE} diff --git a/vignettes/seurat5_integration_mapping.Rmd b/vignettes/seurat5_integration_mapping.Rmd index 51435c67f..0d3a009a4 100644 --- a/vignettes/seurat5_integration_mapping.Rmd +++ b/vignettes/seurat5_integration_mapping.Rmd @@ -58,7 +58,6 @@ panc8 <- UpdateSeuratObject(panc8) panc8[["RNA"]] <- CreateAssay5Object(panc8[["RNA"]]@counts) # split the dataset into layers by technology panc8[["RNA"]] <- split(panc8[["RNA"]], f = panc8$tech) -panc8 <- DietSeurat(panc8, layers = c("celseq", "celseq2", "fluidigmc1", "smartseq2")) ``` Prior to finding anchors, we perform standard preprocessing (log-normalization), and identify variable features individually for each. Note that Seurat implements an improved method for variable feature selection based on a variance stabilizing transformation (`"vst"`) From 561cad6eb52bac62cbde6f0d8e95603ca169db5e Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 19 Jan 2023 16:08:53 -0500 Subject: [PATCH 379/979] rewrite findAnchors --- R/integration.R | 133 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 132 insertions(+), 1 deletion(-) diff --git a/R/integration.R b/R/integration.R index 7c310d351..024420300 100644 --- a/R/integration.R +++ b/R/integration.R @@ -3957,7 +3957,9 @@ FilterAnchors <- function( return(object) } -FindAnchors <- function( + + +FindAnchors_v3 <- function( object.pair, assay, slot, @@ -4056,6 +4058,135 @@ FindAnchors <- function( return(anchors) } + +FindAnchors_v5 <- function( + object.pair, + assay, + slot, + cells1, + cells2, + internal.neighbors, + reduction, + reduction.2 = character(), + nn.reduction = reduction, + dims = 1:10, + k.anchor = 5, + k.filter = 200, + k.score = 30, + max.features = 200, + nn.method = "annoy", + n.trees = 50, + nn.idx1 = NULL, + nn.idx2 = NULL, + eps = 0, + projected = FALSE, + verbose = TRUE +) { + reference.layers <- Layers(object.pair[[assay]], search = 'data')[1] + query.layers <- setdiff(Layers(object.pair[[assay]], search = 'data'), reference.layers) + query.ncell <- sapply(X = query.layers, FUN = function(x) ncol(object.pair[[assay]][[x]])) + query.offsets <- as.vector(x = cumsum(x = c(0, query.ncell)))[1:length(x = query.layers)] + anchor.list <- list() + for (i in seq_along(query.layers)) { + cells2.i <- Cells( + x = object.pair[[assay]], + layer = query.layers[i] + ) + object.pair.i <- subset( + x = object.pair, + cells = c(cells1, cells2.i) + ) + anchor.list[[i]] <- FindAnchors_v3( + object.pair = object.pair.i, + assay = assay, + slot = slot, + cells1 = cells1, + cells2 = cells2.i, + internal.neighbors = internal.neighbors, + reduction = reduction, + reduction.2 = reduction.2, + nn.reduction = nn.reduction, + dims = dims, + k.anchor = k.anchor, + k.filter = k.filter, + k.score = k.score, + max.features = max.features, + nn.method = nn.method, + n.trees = n.trees, + nn.idx1 = nn.idx1, + nn.idx2 = nn.idx2, + eps = eps, + projected = projected, + verbose = verbose + ) + anchor.list[[i]][,2] <- anchor.list[[i]][,2] + query.offsets[i] + anchor.list[[i]] <- t(anchor.list[[i]]) + } + anchors <- t(x = matrix( + data = unlist(x = anchor.list), + nrow = 3, + ncol = sum( + sapply(X = anchor.list, FUN = function(x) ncol(x)) + ) + ) + ) + return(anchors) +} + +FindAnchors <- function( + object.pair, + assay, + slot, + cells1, + cells2, + internal.neighbors, + reduction, + reduction.2 = character(), + nn.reduction = reduction, + dims = 1:10, + k.anchor = 5, + k.filter = 200, + k.score = 30, + max.features = 200, + nn.method = "annoy", + n.trees = 50, + nn.idx1 = NULL, + nn.idx2 = NULL, + eps = 0, + projected = FALSE, + verbose = TRUE +) { + if (inherits(x = object.pair[[assay]], what = 'Assay')) { + FindAnchor.function <- FindAnchors_v3 + } else if (inherits(x = object.pair[[assay]], what = 'Assay5')) { + FindAnchor.function <- FindAnchors_v5 + } + anchors <- FindAnchor.function( + object.pair = object.pair, + assay = assay, + slot = slot, + cells1 = cells1, + cells2 = cells2, + internal.neighbors = internal.neighbors, + reduction = reduction, + reduction.2 = reduction.2, + nn.reduction = nn.reduction, + dims = dims, + k.anchor = k.anchor, + k.filter = k.filter, + k.score = k.score, + max.features = max.features, + nn.method = nn.method, + n.trees = n.trees, + nn.idx1 = nn.idx1, + nn.idx2 = nn.idx2, + eps = eps, + projected = projected, + verbose = verbose + ) + return(anchors) +} + # Find Anchor pairs # FindAnchorPairs <- function( From d4d0e1400f7e9bded334ec588bd3779e66481bf2 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Thu, 19 Jan 2023 16:34:40 -0500 Subject: [PATCH 380/979] changing integrate layers back to normal --- R/integration5.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/integration5.R b/R/integration5.R index f0036a866..5a3c7859a 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -445,10 +445,10 @@ IntegrateLayers <- function( df } else if (is.null(x = group.by) && length(x = layers) > 1L) { cmap <- slot(object = object[[assay]], name = 'cells')[, layers] - data.frame(x = Cells(object[[assay]], - layer = scale.layer), - y = rep(scale.layer, length(Cells(object[[assay]], - layer = scale.layer)))) + as.data.frame(x = labels( + object = cmap, + values = Cells(x = object[[assay]], layer = scale.layer) + )) } else if (is_scalar_character(x = group.by) && group.by %in% names(x = object[[]])) { FetchData( object = object, From acf881df6edd557d21475f3f0c08c45e33f9599a Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 19 Jan 2023 17:08:33 -0500 Subject: [PATCH 381/979] multi layer mapping --- R/integration.R | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/R/integration.R b/R/integration.R index 024420300..a2f3d27d0 100644 --- a/R/integration.R +++ b/R/integration.R @@ -4084,8 +4084,6 @@ FindAnchors_v5 <- function( ) { reference.layers <- Layers(object.pair[[assay]], search = 'data')[1] query.layers <- setdiff(Layers(object.pair[[assay]], search = 'data'), reference.layers) - query.ncell <- sapply(X = query.layers, FUN = function(x) ncol(object.pair[[assay]][[x]])) - query.offsets <- as.vector(x = cumsum(x = c(0, query.ncell)))[1:length(x = query.layers)] anchor.list <- list() for (i in seq_along(query.layers)) { cells2.i <- Cells( @@ -4119,7 +4117,7 @@ FindAnchors_v5 <- function( projected = projected, verbose = verbose ) - anchor.list[[i]][,2] <- anchor.list[[i]][,2] + query.offsets[i] + anchor.list[[i]][,2] <- match(x = cells2.i, table = cells2)[anchor.list[[i]][,2]] anchor.list[[i]] <- t(anchor.list[[i]]) } anchors <- t(x = matrix( @@ -4130,6 +4128,7 @@ FindAnchors_v5 <- function( ) ) ) + colnames(anchors) <- c('cell1', 'cell2', 'score') return(anchors) } @@ -6230,7 +6229,7 @@ ValidateParams_TransferData <- function( if (!is.null(x = query)) { if (!isTRUE(x = all.equal( target = gsub(pattern = "_query", replacement = "", x = query.cells), - current = Cells(x = query), + current = colnames(x = query), check.attributes = FALSE) )) { stop("Query object provided contains a different set of cells from the ", @@ -6362,7 +6361,7 @@ ValidateParams_IntegrateEmbeddings_TransferAnchors <- function( } query.cells <- slot(object = anchorset, name = "query.cells") query.cells <- gsub(pattern = "_query", replacement = "", x = query.cells) - if (!isTRUE(x = all.equal(target = query.cells, current = Cells(x = query), check.attributes = FALSE))) { + if (!isTRUE(x = all.equal(target = query.cells, current = colnames(x = query), check.attributes = FALSE))) { stop("The set of cells used as a query in the AnchorSet does not match ", "the set of cells provided in the query object.") } From e0dcbe9f954dabc00ec1cdbe6e42ffd412715fda Mon Sep 17 00:00:00 2001 From: Gesmira Date: Thu, 19 Jan 2023 17:41:23 -0500 Subject: [PATCH 382/979] large dataset vignette should run --- vignettes/seurat5_integration_large_datasets.Rmd | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/vignettes/seurat5_integration_large_datasets.Rmd b/vignettes/seurat5_integration_large_datasets.Rmd index 567ceaa76..0b56a405a 100644 --- a/vignettes/seurat5_integration_large_datasets.Rmd +++ b/vignettes/seurat5_integration_large_datasets.Rmd @@ -65,7 +65,7 @@ After acquiring the data, we first perform standard normalization and variable f ```{r hca.full.1} -bm280k.data <- Read10X_h5("/brahms/haoy/HCA_BoneMarrow/ica_bone_marrow_h5.h5") +bm280k.data <- Read10X_h5("../data/ica_bone_marrow_h5.h5") #bm280k.data <- Read10X("/brahms/mollag/seurat_v5/data/e7448a34-b33d-41de-b422-4c09bfeba96b.mtx/") #colnames(bm280k.data) <- make.unique(c(colnames(bm280k.data)), sep="_") #bm280k.data <- Read10X_h5("../data/ica_bone_marrow_h5.h5") @@ -95,17 +95,7 @@ bm280k <- IntegrateLayers(object = bm280k, dims = 1:50, verbose = F) -# this seems to be issue: as.data.frame(x = labels(object = cmap, values = Cells(x = object[[assay]], - #layer = scale.layer))) # this takes a super long time - - -# speicifcially getting the labels call is really slow -# as.data.frame(labels(object = cmap, values = head(Cells(x = bm280k[["RNA"]], layer = "scale.data")))) -# as.data.frame(labels(object = cmap_small, values = Cells(x = bm280k[["RNA"]], layer = "scale.data"))) -# bboth of these smaller versions are fast - -data.frame(x = Cells(x = bm280k[["RNA"]], layer = "scale.data"), y = rep("scale.data", length(Cells(x = bm280k[["RNA"]], layer = "scale.data")))) ``` From 959b225af8a7145e390e3511ea82c41f10543d42 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 20 Jan 2023 00:10:23 -0500 Subject: [PATCH 383/979] fix features and add reference neighbors --- R/integration.R | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/R/integration.R b/R/integration.R index a2f3d27d0..009d191df 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1108,7 +1108,7 @@ FindTransferAnchors <- function( } k.nn <- max(k.score, k.anchor) query.neighbors <- NNHelper( - data = Embeddings(object = combined.ob[[reduction]])[Cells(x = query), ], + data = Embeddings(object = combined.ob[[reduction]])[colnames(x = query), ], k = max(mapping.score.k, k.nn + 1), method = nn.method, n.trees = n.trees, @@ -1126,8 +1126,18 @@ FindTransferAnchors <- function( } if (!is.null(x = reference.neighbors)) { precomputed.neighbors[["ref.neighbors"]] <- reference[[reference.neighbors]] - nn.idx1 <- Index(object = reference[[reference.neighbors]]) + } else { + precomputed.neighbors[["ref.neighbors"]] <- NNHelper( + data = Embeddings(combined.ob[[reduction]])[ + colnames(x = reference), + 1:length(x = dims) + ], + k = max(k.score, k.anchor), + method = nn.method, + cache.index = TRUE + ) } + nn.idx1 <- Index(object = precomputed.neighbors[["ref.neighbors"]]) anchors <- FindAnchors( object.pair = combined.ob, assay = reference.assay, @@ -5138,8 +5148,7 @@ ProjectCellEmbeddings.StdAssay <- function( f = intersect, x = list( rownames(x = Loadings(object = reference[[reduction]])), - rownames(x = reference[[reference.assay]]), - rownames(x = query) + rownames(x = reference[[reference.assay]]) ) ) if (normalization.method == 'SCT') { @@ -5261,7 +5270,7 @@ ProjectCellEmbeddings.IterableMatrix <- function( block.size = 10000 ) { features <- features %||% rownames(x = Loadings(object = reference[[reduction]])) - + features <- intersect(features, rownames(query)) if (normalization.method == 'SCT') { reference.SCT.model <- slot(object = reference[[reference.assay]], name = "SCTModel.list")[[1]] cells.grid <- split( From 9de66ed7304421fb3660f4fdceead3bf90a2cb6c Mon Sep 17 00:00:00 2001 From: Gesmira Date: Fri, 20 Jan 2023 12:14:18 -0500 Subject: [PATCH 384/979] first element of assay list --- R/integration.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/integration.R b/R/integration.R index 009d191df..2bd10cff3 100644 --- a/R/integration.R +++ b/R/integration.R @@ -4165,9 +4165,9 @@ FindAnchors <- function( projected = FALSE, verbose = TRUE ) { - if (inherits(x = object.pair[[assay]], what = 'Assay')) { + if (inherits(x = object.pair[[assay[1]]], what = 'Assay')) { FindAnchor.function <- FindAnchors_v3 - } else if (inherits(x = object.pair[[assay]], what = 'Assay5')) { + } else if (inherits(x = object.pair[[assay[1]]], what = 'Assay5')) { FindAnchor.function <- FindAnchors_v5 } anchors <- FindAnchor.function( From 6c1f4c42032c4fdae0b4ac84e077d0c574bf3d14 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Fri, 20 Jan 2023 13:10:29 -0500 Subject: [PATCH 385/979] fix error if cells was longer than colnames(object.pair) --- R/integration.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/integration.R b/R/integration.R index 2bd10cff3..23bb7a36d 100644 --- a/R/integration.R +++ b/R/integration.R @@ -4102,7 +4102,7 @@ FindAnchors_v5 <- function( ) object.pair.i <- subset( x = object.pair, - cells = c(cells1, cells2.i) + cells = unique(cells1, cells2.i) ) anchor.list[[i]] <- FindAnchors_v3( object.pair = object.pair.i, From e8862ed22445d0b26278b65b39994fbe4de13a32 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Fri, 20 Jan 2023 14:27:00 -0500 Subject: [PATCH 386/979] joinlayers to make one data slot --- R/integration.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/integration.R b/R/integration.R index 23bb7a36d..53609107b 100644 --- a/R/integration.R +++ b/R/integration.R @@ -802,6 +802,7 @@ FindTransferAnchors <- function( reduction.2 <- character() feature.mean <- NULL reference.reduction.init <- reference.reduction + reference[[reference.assay]] <- JoinLayers(reference[[reference.assay]], search = "data", new = "data") if (normalization.method == "SCT") { if (is.null(x = reference.reduction)) { reference <- suppressWarnings(expr = GetResidual( @@ -4102,7 +4103,7 @@ FindAnchors_v5 <- function( ) object.pair.i <- subset( x = object.pair, - cells = unique(cells1, cells2.i) + cells = c(cells1, cells2.i) ) anchor.list[[i]] <- FindAnchors_v3( object.pair = object.pair.i, From f663049f93b7d50ef9c812380f5735e3ed5599de Mon Sep 17 00:00:00 2001 From: Gesmira Date: Fri, 20 Jan 2023 14:31:41 -0500 Subject: [PATCH 387/979] format --- R/integration.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/integration.R b/R/integration.R index 53609107b..5546d8b53 100644 --- a/R/integration.R +++ b/R/integration.R @@ -802,7 +802,8 @@ FindTransferAnchors <- function( reduction.2 <- character() feature.mean <- NULL reference.reduction.init <- reference.reduction - reference[[reference.assay]] <- JoinLayers(reference[[reference.assay]], search = "data", new = "data") + reference[[reference.assay]] <- JoinLayers(reference[[reference.assay]], + search = "data", new = "data") if (normalization.method == "SCT") { if (is.null(x = reference.reduction)) { reference <- suppressWarnings(expr = GetResidual( From c1954befed5649499a370c8f5429fbb9e0ef436e Mon Sep 17 00:00:00 2001 From: Gesmira Date: Fri, 20 Jan 2023 15:07:15 -0500 Subject: [PATCH 388/979] assay5 only --- R/integration.R | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/R/integration.R b/R/integration.R index 5546d8b53..b58d98f7f 100644 --- a/R/integration.R +++ b/R/integration.R @@ -802,8 +802,12 @@ FindTransferAnchors <- function( reduction.2 <- character() feature.mean <- NULL reference.reduction.init <- reference.reduction - reference[[reference.assay]] <- JoinLayers(reference[[reference.assay]], - search = "data", new = "data") + if (inherits(x = reference[[reference.assay]], what = 'Assay5')) { + if (length(Layers(reference, search = "data")) > 1) { + reference[[reference.assay]] <- JoinLayers(reference[[reference.assay]], + search = "data", new = "data") + } + } if (normalization.method == "SCT") { if (is.null(x = reference.reduction)) { reference <- suppressWarnings(expr = GetResidual( From 364d13d0393300b52f473d5dac3fbf095f17d364 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 20 Jan 2023 23:56:54 -0500 Subject: [PATCH 389/979] fix sct clip --- R/integration.R | 9 +++++++-- R/preprocessing5.R | 3 ++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/R/integration.R b/R/integration.R index b58d98f7f..ddbe3c4bf 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5080,10 +5080,15 @@ ProjectCellEmbeddings.Assay <- function( rownames(x = query) ) ) + if (normalization.method == 'SCT') { + slot <- 'counts' + } else { + slot <- 'data' + } proj.pca <- ProjectCellEmbeddings( query = GetAssayData( object = query, - slot = "data"), + slot = slot), reference = reference, reference.assay = reference.assay, reduction = reduction, @@ -5127,7 +5132,7 @@ ProjectCellEmbeddings.SCTAssay <- function( ) query.data <- GetAssayData( object = query, - slot = "data")[features,] + slot = "scale.data")[features,] ref.feature.loadings <- Loadings(object = reference[[reduction]])[features, dims] proj.pca <- t(crossprod(x = ref.feature.loadings, y = query.data)) return(proj.pca) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index fd61a6293..726cc396b 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -2176,6 +2176,7 @@ FetchResiduals_reference <- function(object, features = NULL, verbose = FALSE) { features_to_compute <- features + features_to_compute <- intersect(features_to_compute, rownames(object)) vst_out <- SCTModel_to_vst(SCTModel = reference.SCT.model) # override clip.range @@ -2217,7 +2218,6 @@ FetchResiduals_reference <- function(object, umi = umi, residual_type = "pearson", min_variance = min_var, - res_clip_range = c(clip.min, clip.max), verbosity = as.numeric(x = verbose) * 2 ) @@ -2228,5 +2228,6 @@ FetchResiduals_reference <- function(object, STATS = ref.residuals.mean, FUN = "-" ) + new_residual <- MinMax(data = new_residual, min = clip.min, max = clip.max) return(new_residual) } From fd992e8c06845891f1e6b25db026e77830880f4b Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sun, 22 Jan 2023 00:32:07 -0500 Subject: [PATCH 390/979] fix fetch residuals nUMI --- R/integration.R | 20 +++++++++++++++++--- R/preprocessing.R | 1 - R/preprocessing5.R | 13 ++++++++----- 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/R/integration.R b/R/integration.R index ddbe3c4bf..d9747fe5f 100644 --- a/R/integration.R +++ b/R/integration.R @@ -770,6 +770,8 @@ FindTransferAnchors <- function( mapping.score.k = NULL, verbose = TRUE ) { + op <- options(Seurat.object.assay.calcn = FALSE) + on.exit(expr = options(op), add = TRUE) # input validation ValidateParams_FindTransferAnchors( reference = reference, @@ -929,6 +931,7 @@ FindTransferAnchors <- function( query = query, scale = scale, dims = dims, + nCount_UMI = query[[]][, paste0("nCount_", query.assay)], feature.mean = feature.mean, verbose = verbose ) @@ -1138,7 +1141,7 @@ FindTransferAnchors <- function( colnames(x = reference), 1:length(x = dims) ], - k = max(k.score, k.anchor), + k = max(k.score, k.anchor) + 5, method = nn.method, cache.index = TRUE ) @@ -5018,6 +5021,7 @@ ProjectCellEmbeddings.Seurat <- function( normalization.method = c("LogNormalize", "SCT"), scale = TRUE, verbose = TRUE, + nCount_UMI = NULL, feature.mean = NULL, feature.sd = NULL ) { @@ -5050,6 +5054,7 @@ ProjectCellEmbeddings.Seurat <- function( scale = scale, normalization.method = normalization.method, verbose = verbose, + nCount_UMI = nCount_UMI, feature.mean = feature.mean, feature.sd = feature.sd ) @@ -5069,6 +5074,7 @@ ProjectCellEmbeddings.Assay <- function( scale = TRUE, normalization.method = NULL, verbose = TRUE, + nCount_UMI = NULL, feature.mean = NULL, feature.sd = NULL ) { @@ -5097,6 +5103,7 @@ ProjectCellEmbeddings.Assay <- function( normalization.method = normalization.method, verbose = verbose, features = features, + nCount_UMI = nCount_UMI, feature.mean = feature.mean, feature.sd = feature.sd ) @@ -5116,6 +5123,7 @@ ProjectCellEmbeddings.SCTAssay <- function( scale = TRUE, normalization.method = NULL, verbose = TRUE, + nCount_UMI = NULL, feature.mean = NULL, feature.sd = NULL ) { @@ -5151,6 +5159,7 @@ ProjectCellEmbeddings.StdAssay <- function( scale = TRUE, normalization.method = NULL, verbose = TRUE, + nCount_UMI = NULL, feature.mean = NULL, feature.sd = NULL ) { @@ -5180,6 +5189,7 @@ ProjectCellEmbeddings.StdAssay <- function( normalization.method = normalization.method, verbose = verbose, features = features, + nCount_UMI = nCount_UMI_i, feature.mean = feature.mean, feature.sd = feature.sd )) @@ -5211,6 +5221,7 @@ ProjectCellEmbeddings.default <- function( normalization.method = NULL, verbose = TRUE, features = NULL, + nCount_UMI = NULL, feature.mean = NULL, feature.sd = NULL ){ @@ -5220,7 +5231,8 @@ if (normalization.method == 'SCT') { query <- FetchResiduals_reference( object = query, reference.SCT.model = reference.SCT.model, - features = features) + features = features, + nCount_UMI = nCount_UMI) } else { query <- query[features,] reference.data <- GetAssayData( @@ -5276,6 +5288,7 @@ ProjectCellEmbeddings.IterableMatrix <- function( normalization.method = NULL, verbose = TRUE, features = features, + nCount_UMI = NULL, feature.mean = NULL, feature.sd = NULL, block.size = 10000 @@ -5293,7 +5306,8 @@ ProjectCellEmbeddings.IterableMatrix <- function( query.i <- FetchResiduals_reference( object = as.sparse(query[,cells.grid[[i]]]), reference.SCT.model = reference.SCT.model, - features = features) + features = features, + nCount_UMI = nCount_UMI) proj.list[[i]] <- t(Loadings(object = reference[[reduction]])[features,dims]) %*% query.i } proj.pca <- t(matrix( diff --git a/R/preprocessing.R b/R/preprocessing.R index 07afeb350..933f61366 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -3097,7 +3097,6 @@ SCTransform.default <- function( } else { sct.method <- "default" } - # set vst model vst.out <- switch( EXPR = sct.method, diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 726cc396b..c0b963c64 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -2174,7 +2174,14 @@ FetchResidualSCTModel <- function(object, FetchResiduals_reference <- function(object, reference.SCT.model = NULL, features = NULL, + nCount_UMI = NULL, verbose = FALSE) { + ## Add cell_attr for missing cells + nCount_UMI <- nCount_UMI %||% colSums(object) + cell_attr <- data.frame( + umi = nCount_UMI, + log_umi = log10(x = nCount_UMI) + ) features_to_compute <- features features_to_compute <- intersect(features_to_compute, rownames(object)) vst_out <- SCTModel_to_vst(SCTModel = reference.SCT.model) @@ -2196,11 +2203,7 @@ FetchResiduals_reference <- function(object, umi <- object[features_to_compute, , drop = FALSE] - ## Add cell_attr for missing cells - cell_attr <- data.frame( - umi = colSums(object), - log_umi = log10(x = colSums(object)) - ) + rownames(cell_attr) <- colnames(object) vst_out$cell_attr <- cell_attr From ed99fea6f4f8c854f19c609f95896e3c132540ad Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sun, 22 Jan 2023 06:38:25 -0500 Subject: [PATCH 391/979] fix neighbors bug --- R/integration.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/integration.R b/R/integration.R index d9747fe5f..46de92ef5 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1141,7 +1141,7 @@ FindTransferAnchors <- function( colnames(x = reference), 1:length(x = dims) ], - k = max(k.score, k.anchor) + 5, + k = max(k.score, k.anchor) + 1, method = nn.method, cache.index = TRUE ) @@ -4367,6 +4367,7 @@ FindNN <- function( eps = eps, index = nn.idx1 ) + nn.idx1 <- Index(object = nnaa) } if (!is.null(x = internal.neighbors[[2]])) { nnbb <- internal.neighbors[[2]] @@ -4378,8 +4379,9 @@ FindNN <- function( method = nn.method, n.trees = n.trees, eps = eps, - index = nn.idx1 + cache.index = TRUE ) + nn.idx2 <- Index(object = nnbb) } if (length(x = reduction.2) > 0) { nnab <- NNHelper( From 89f853fec701b8fc8c5e4c71e770da94d016d832 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sun, 22 Jan 2023 06:49:42 -0500 Subject: [PATCH 392/979] add cache --- R/integration.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/integration.R b/R/integration.R index 46de92ef5..dcfb82fb7 100644 --- a/R/integration.R +++ b/R/integration.R @@ -4365,6 +4365,7 @@ FindNN <- function( method = nn.method, n.trees = n.trees, eps = eps, + cache.index = TRUE, index = nn.idx1 ) nn.idx1 <- Index(object = nnaa) From 57ce3a5dc72a4180bfebf818bbc2da536d2580d4 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 23 Jan 2023 08:01:46 -0500 Subject: [PATCH 393/979] bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index a68af7c8f..779cc830e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9020 -Date: 2023-01-12 +Version: 4.9.9.9021 +Date: 2023-01-23 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From 10101c4d5e8b400adad076d0c272095babb2c242 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 23 Jan 2023 12:39:02 -0500 Subject: [PATCH 394/979] fix project embeddings --- R/integration.R | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/R/integration.R b/R/integration.R index dcfb82fb7..f765154fa 100644 --- a/R/integration.R +++ b/R/integration.R @@ -924,14 +924,16 @@ FindTransferAnchors <- function( approx = approx.pca ) } - projected.pca <- ProjectCellEmbeddings( + query_nCount_UMI <- query[[]][, paste0("nCount_", query.assay)] + names(query_nCount_UMI) <- colnames(query) + projected.pca <- ProjectCellEmbeddings( reference = reference, reduction = reference.reduction, normalization.method = normalization.method, query = query, scale = scale, dims = dims, - nCount_UMI = query[[]][, paste0("nCount_", query.assay)], + nCount_UMI = query_nCount_UMI, feature.mean = feature.mean, verbose = verbose ) @@ -5192,7 +5194,7 @@ ProjectCellEmbeddings.StdAssay <- function( normalization.method = normalization.method, verbose = verbose, features = features, - nCount_UMI = nCount_UMI_i, + nCount_UMI = nCount_UMI[Cells(x = query, layer = layers.set[i])], feature.mean = feature.mean, feature.sd = feature.sd )) From 7b278fac183362c1201862f7e75d54fe3c1aee13 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Mon, 23 Jan 2023 16:06:07 -0500 Subject: [PATCH 395/979] run through errors in v3 vignettes --- vignettes/BPCells_COVID_SCTMapping.Rmd | 183 +++++++ vignettes/BPCells_COVID_logMapping.Rmd | 187 +++++++ .../BPCells_sketch_clustering_mouse_brain.Rmd | 184 +++++++ ...ells_sketch_clustering_mouse_brain_SCT.Rmd | 178 +++++++ vignettes/BPCells_sketch_inte_1M.Rmd | 181 +++++++ vignettes/BPCells_sketch_inte_1M_SCT.Rmd | 187 +++++++ vignettes/atacseq_integration_vignette.Rmd | 8 +- vignettes/atomic_integration.Rmd | 3 +- vignettes/bridge_integration_vignette.Rmd | 3 +- vignettes/cell_cycle_vignette.Rmd | 4 +- vignettes/conversion_vignette.Rmd | 3 +- vignettes/de_vignette.Rmd | 6 +- vignettes/dim_reduction_vignette.Rmd | 6 +- vignettes/essential_commands.Rmd | 3 +- vignettes/future_vignette.Rmd | 3 +- vignettes/get_started_v5.Rmd | 112 ++++ vignettes/get_started_v5.nb.html | 293 +++++++++++ vignettes/hashing_vignette.Rmd | 3 +- vignettes/integration_introduction.Rmd | 13 +- vignettes/integration_large_datasets.Rmd | 5 +- vignettes/integration_mapping.Rmd | 8 +- vignettes/integration_rpca.Rmd | 12 +- vignettes/interaction_vignette.Rmd | 8 +- vignettes/merge_vignette.Rmd | 7 +- vignettes/mixscape_vignette.Rmd | 8 +- vignettes/multimodal_reference_mapping.Rmd | 12 +- vignettes/multimodal_vignette.Rmd | 7 +- vignettes/pancreas_integrated_umap.jpg | Bin 0 -> 602847 bytes vignettes/pbmc3k_tutorial.Rmd | 9 +- vignettes/sctransform_v2_vignette.Rmd | 5 +- vignettes/sctransform_vignette.Rmd | 3 +- vignettes/seurat5_large_dataset_analysis.Rmd | 112 ++++ .../seurat5_large_dataset_analysis.nb.html | 423 +++++++++++++++ vignettes/seurat5_sketch_integration.Rmd | 131 +++++ vignettes/seurat5_sketch_integration.nb.html | 483 ++++++++++++++++++ vignettes/seurat5_spatial_vignette.Rmd | 47 +- vignettes/spatial_vignette.Rmd | 50 +- vignettes/spatial_vignette_2.Rmd | 3 +- vignettes/vignettes_v5.yaml | 79 +++ .../weighted_nearest_neighbor_analysis.Rmd | 3 +- 40 files changed, 2915 insertions(+), 60 deletions(-) create mode 100755 vignettes/BPCells_COVID_SCTMapping.Rmd create mode 100755 vignettes/BPCells_COVID_logMapping.Rmd create mode 100755 vignettes/BPCells_sketch_clustering_mouse_brain.Rmd create mode 100755 vignettes/BPCells_sketch_clustering_mouse_brain_SCT.Rmd create mode 100755 vignettes/BPCells_sketch_inte_1M.Rmd create mode 100755 vignettes/BPCells_sketch_inte_1M_SCT.Rmd create mode 100644 vignettes/get_started_v5.Rmd create mode 100644 vignettes/get_started_v5.nb.html create mode 100644 vignettes/pancreas_integrated_umap.jpg create mode 100644 vignettes/seurat5_large_dataset_analysis.Rmd create mode 100644 vignettes/seurat5_large_dataset_analysis.nb.html create mode 100644 vignettes/seurat5_sketch_integration.Rmd create mode 100644 vignettes/seurat5_sketch_integration.nb.html create mode 100644 vignettes/vignettes_v5.yaml diff --git a/vignettes/BPCells_COVID_SCTMapping.Rmd b/vignettes/BPCells_COVID_SCTMapping.Rmd new file mode 100755 index 000000000..11f964465 --- /dev/null +++ b/vignettes/BPCells_COVID_SCTMapping.Rmd @@ -0,0 +1,183 @@ +--- +title: "COVID SCTransform Mapping" +output: html_notebook +--- + +```{r setup, include=FALSE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + tidy.opts = list(width.cutoff = 95), + message = FALSE, + warning = FALSE, + time_it = TRUE +) +``` + +## load package + +```{r, warning=F, message=F} +library(Seurat) +library(BPCells) +library(dplyr) +``` + +## load matrix +```{r, warning=F, message=F} + +time0_loadMatrix <- system.time({ +meta.list <- readRDS('/brahms/haoy/vignette_data/PBMCVignette/PBMC_meta.list') + +file.dir <- "/brahms/haoy/vignette_data/PBMCVignette/" +files.set <- c("arunachalam_2020_processed.BPCells", "combes_2021_processed.BPCells", "lee_2020_processed.BPCells", + "wilk_2020_processed.BPCells", "yao_2021_processed.BPCells") +input.list <- list() +for (i in 1:length(files.set)) { + input.list[[i]] <- open_matrix_dir(dir = paste0(file.dir, files.set[i]) ) +} + names(input.list) <- paste0('counts.',gsub('_processed.BPCells','',files.set)) +}) +``` + +## load query +```{r,warning=F, message=F} + +options(Seurat.object.assay.version = "v5", Seurat.object.assay.calcn = T) +time1_normalize <- system.time({ + i = 4 + object <- CreateSeuratObject(counts = input.list[[i]], meta.data = meta.list[[i]] ) + object <- NormalizeData(object, verbose = FALSE) +}) + +``` + +## load reference +```{r} + +obj.ref <- readRDS("/brahms/haoy/seurat4_pbmc/pbmc_multimodal_2023.rds") + + +``` +## mapping +```{r} + +time2_anchoring <- system.time({ +anchor <- FindTransferAnchors(reference = obj.ref, + query = object, + reference.reduction = 'pca', + normalization.method = 'SCT', + k.filter = NA, + k.anchor = 5, + features = rownames(obj.ref[['pca']]@feature.loadings)) +}) + +time3_MapQuery <- system.time({ +object <- MapQuery( + anchorset = anchor, + query = object, + reference = obj.ref, + refdata = list( + l1.s5 = "celltype.l1", + l2.s5 = "celltype.l2" + ), + reduction.model = "wnn.umap" +)}) + +``` +```{r} +anchor +``` + + +```{r} + p1<- DimPlot(object, reduction = 'ref.umap', group.by = 'predicted.l2.s5',alpha = 0.8, label = T) + NoLegend() + + p2<- DimPlot(object, reduction = 'ref.umap', group.by = 'cell.type.fine',alpha = 0.8, label = T) + NoLegend() + p1+p2 +``` + +## pseudo-bulk +```{r} + +time4_bulk <- system.time( bulk <- AverageExpression(object, + method = 'aggregate', + return.seurat = T, + slot = 'counts', + assays = 'RNA', + group.by = c("predicted.l2.s5","Donor","Status") + ) +) + +bulk$celltype <- sapply(strsplit(Cells(bulk), split = "_"), '[', 1) +bulk$donor <- sapply(strsplit(Cells(bulk), split = "_"), '[', 2) +bulk$disease <- sapply(strsplit(Cells(bulk), split = "_"), '[', 3) + + +``` + +## computing time summary +```{r} +all_T <- ls(pattern = 'time') +overall <- sum(sapply(all_T, function(x) round(get(x)['elapsed'], digits = 3)))/60 + + +for (i in 1:length(all_T)) { + T_i <- get(all_T[i])['elapsed'] + if (T_i > 60) { + print(paste(all_T[i], round(T_i/60, digits = 1), 'mins')) + } else { + print(paste(all_T[i], round(T_i, digits = 1), 'secs')) + } +} +print(paste('Total time ', round(overall, digits = 1), 'mins' )) +``` +```{R} +marker.list <- list() +celltype.set <- unique(bulk$celltype ) +for (i in seq_along(celltype.set)) { + bulk.i <- subset(bulk, subset = celltype == celltype.set[i]) + Idents(bulk.i) <- 'disease' + if (any(table(bulk.i$disease) < 3)) { + marker.list[[i]] <- EmptyDF(n = 0) + } else { + marker.list[[i]] <- FindMarkers(bulk.i, ident.1 = 'COVID',ident.2 = 'Healthy', slot = 'counts', test.use = 'DESeq2', verbose = F ) + } + +} +names(marker.list) <- celltype.set + +``` +```{r} +marker.list.filter <- lapply(marker.list, function(x) { + if(nrow(x) > 0) { + x <- x[x$p_val_adj < 0.01 & !is.na(x$p_val_adj ),] + } + if (nrow(x) > 0) { + return(x) + } +}) + + +``` +```{r} +object$Status <- factor(object$Status, levels = c('Healthy', 'COVID')) +bulk$disease <- factor(bulk$disease, levels = c('Healthy', 'COVID')) + +``` +```{r} +VlnPlot(bulk, features = 'IFI44L', group.by = 'celltype', split.by = 'disease') + +VlnPlot(object, features = 'IFI44L', group.by = 'predicted.celltype.l2', split.by = 'Status') +``` + \ No newline at end of file diff --git a/vignettes/BPCells_COVID_logMapping.Rmd b/vignettes/BPCells_COVID_logMapping.Rmd new file mode 100755 index 000000000..8a328f8cd --- /dev/null +++ b/vignettes/BPCells_COVID_logMapping.Rmd @@ -0,0 +1,187 @@ +--- +title: "COVID Log Normalization Mapping" +output: html_notebook +--- + +```{r setup, include=FALSE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + tidy.opts = list(width.cutoff = 95), + message = FALSE, + warning = FALSE, + time_it = TRUE +) +``` + +## load package + +```{r, warning=F, message=F} +library(Seurat) +library(BPCells) +library(dplyr) +``` + +## load matrix +```{r, warning=F, message=F} + +time0_loadMatrix <- system.time({ +meta.list <- readRDS('/brahms/haoy/vignette_data/PBMCVignette/PBMC_meta.list') + +file.dir <- "/brahms/haoy/vignette_data/PBMCVignette/" +files.set <- c("arunachalam_2020_processed.BPCells", "combes_2021_processed.BPCells", "lee_2020_processed.BPCells", + "wilk_2020_processed.BPCells", "yao_2021_processed.BPCells") +input.list <- list() +for (i in 1:length(files.set)) { + input.list[[i]] <- open_matrix_dir(dir = paste0(file.dir, files.set[i]) ) +} + names(input.list) <- paste0('counts.',gsub('_processed.BPCells','',files.set)) +}) +``` + +## load query +```{r,warning=F, message=F} + +options(Seurat.object.assay.version = "v5", Seurat.object.assay.calcn = T) +time1_normalize <- system.time({ + i = 4 + object <- CreateSeuratObject(counts = input.list[[i]], meta.data = meta.list[[i]] ) + object <- NormalizeData(object, verbose = FALSE) +}) + +``` + +## load reference +```{r} +library(SeuratData) +data("pbmc3k") +obj.ref <- pbmc3k +obj.ref <- UpdateSeuratObject(obj.ref) +obj.ref$seurat_annotations <- as.character(obj.ref$seurat_annotations) +obj.ref$seurat_annotations[is.na(obj.ref$seurat_annotations)] <- 'other' +obj.ref$celltype.l1 <- obj.ref$celltype.l2 <- obj.ref$seurat_annotations +obj.ref <- NormalizeData(obj.ref) %>% FindVariableFeatures() %>% ScaleData() %>% RunPCA() +obj.ref <- RunUMAP(obj.ref, dims = 1:30, return.model = T, reduction.name = 'wnn.umap', reduction.key = 'W_') +``` +## mapping +```{r} + +time2_anchoring <- system.time({ +anchor <- FindTransferAnchors(reference = obj.ref, + query = object, + reference.reduction = 'pca', + k.filter = NA, + k.anchor = 5, + features = rownames(obj.ref[['pca']]@feature.loadings)) +}) + +time3_MapQuery <- system.time({ +object <- MapQuery( + anchorset = anchor, + query = object, + reference = obj.ref, + refdata = list( + l1.s5 = "celltype.l1", + l2.s5 = "celltype.l2" + ), + reduction.model = "wnn.umap" +)}) + +``` +```{r} +anchor +``` + + +```{r} + p1<- DimPlot(object, reduction = 'ref.umap', group.by = 'predicted.l2.s5',alpha = 0.9, label = T) + NoLegend() + + p2<- DimPlot(object, reduction = 'ref.umap', group.by = 'cell.type.fine',alpha = 0.9, label = T) + NoLegend() + p1+p2 +``` + +## pseudo-bulk +```{r} + +time4_bulk <- system.time( bulk <- AverageExpression(object, + method = 'aggregate', + return.seurat = T, + slot = 'counts', + assays = 'RNA', + group.by = c("predicted.celltype.l2","Donor","Status") + ) +) + +bulk$celltype <- sapply(strsplit(Cells(bulk), split = "_"), '[', 1) +bulk$donor <- sapply(strsplit(Cells(bulk), split = "_"), '[', 2) +bulk$disease <- sapply(strsplit(Cells(bulk), split = "_"), '[', 3) + + +``` + +## computing time summary +```{r} +all_T <- ls(pattern = 'time') +overall <- sum(sapply(all_T, function(x) round(get(x)['elapsed'], digits = 3)))/60 + + +for (i in 1:length(all_T)) { + T_i <- get(all_T[i])['elapsed'] + if (T_i > 60) { + print(paste(all_T[i], round(T_i/60, digits = 1), 'mins')) + } else { + print(paste(all_T[i], round(T_i, digits = 1), 'secs')) + } +} +print(paste('Total time ', round(overall, digits = 1), 'mins' )) +``` +```{R} +marker.list <- list() +celltype.set <- unique(bulk$celltype ) +for (i in seq_along(celltype.set)) { + bulk.i <- subset(bulk, subset = celltype == celltype.set[i]) + Idents(bulk.i) <- 'disease' + if (any(table(bulk.i$disease) < 3)) { + marker.list[[i]] <- EmptyDF(n = 0) + } else { + marker.list[[i]] <- FindMarkers(bulk.i, ident.1 = 'COVID',ident.2 = 'Healthy', slot = 'counts', test.use = 'DESeq2', verbose = F ) + } + +} +names(marker.list) <- celltype.set + +``` +```{r} +marker.list.filter <- lapply(marker.list, function(x) { + if(nrow(x) > 0) { + x <- x[x$p_val_adj < 0.01 & !is.na(x$p_val_adj ),] + } + if (nrow(x) > 0) { + return(x) + } +}) + + +``` +```{r} +object$Status <- factor(object$Status, levels = c('Healthy', 'COVID')) +bulk$disease <- factor(bulk$disease, levels = c('Healthy', 'COVID')) + +``` +```{r} +VlnPlot(bulk, features = 'IFI44L', group.by = 'celltype', split.by = 'disease') + +VlnPlot(object, features = 'IFI44L', group.by = 'predicted.celltype.l2', split.by = 'Status') +``` + \ No newline at end of file diff --git a/vignettes/BPCells_sketch_clustering_mouse_brain.Rmd b/vignettes/BPCells_sketch_clustering_mouse_brain.Rmd new file mode 100755 index 000000000..6ca1ab3a5 --- /dev/null +++ b/vignettes/BPCells_sketch_clustering_mouse_brain.Rmd @@ -0,0 +1,184 @@ +--- +title: "Sketch clustering in mouse brain" +output: html_notebook +--- + +```{r setup, include=FALSE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + tidy.opts = list(width.cutoff = 95), + message = FALSE, + warning = FALSE, + time_it = TRUE +) +``` + +## load library +```{r, warning=FALSE, message=FALSE} +library(Seurat) +library(BPCells) +``` + +## load data from h5ad +```{r, warning=FALSE, message=FALSE} +t0_CreateObject <- system.time({ + +mat <- open_matrix_dir("/brahms/haoy/test/mouse_1M_neurons_counts") +devtools::load_all("~/share/package/MetricPatch/") +mat <- ConvertEnsembleToSymbol(mat = mat, species = 'mouse') + +options(Seurat.object.assay.version = "v5", Seurat.object.assay.calcn = T) +obj <- CreateSeuratObject(counts = mat ) + +}) + +``` + +## create sketch assay +```{r, warning=FALSE, message=FALSE} +t1_CreateSketchAssay <- system.time({ +obj <- NormalizeData(obj) +obj <- FindVariableFeatures(obj, layer = 'counts') +obj <- LeverageScore(obj) +obj <- LeverageScoreSampling(object = obj, ncells = 50000, cast = 'dgCMatrix') + +}) + +``` +## Sketch assay clustering +```{r, warning=FALSE, message=FALSE} +t2_SketchClustering <- system.time({ +obj <- FindVariableFeatures(obj) +obj <- ScaleData(obj) +obj <- RunPCA(obj) +obj <- FindNeighbors(obj, dims = 1:50) +obj <- FindClusters(obj) +}) + +obj <- RunUMAP(obj, dims = 1:50, return.model = T) +``` + +```{r} +DimPlot(obj, label = T, reduction = 'umap') + NoLegend() +``` +## Azimuth mapping to annotate clusters +```{r, warning=F, message=F} + + +options(Seurat.object.assay.version = "v3", Seurat.object.assay.calcn = T) +obj.v3 <- CreateSeuratObject(counts = as.sparse(obj[['RNA']]$counts[,colnames(obj[['sketch']])]) ) +library(Azimuth) +obj.v3 <- RunAzimuth(query = obj.v3, assay = 'RNA', reference = 'mousecortexref', do.adt = F) + +obj$predicted.subclass <- obj.v3$predicted.subclass +obj$predicted.cluster <- obj.v3$predicted.cluster + +``` + + + +```{r} +DimPlot(obj, reduction = 'umap', label = T) + NoLegend() +DimPlot(obj, reduction = 'umap', group.by = 'predicted.subclass',label = T) + NoLegend() +DimPlot(obj, reduction = 'umap', group.by = 'predicted.cluster',label = T) + NoLegend() + +``` +```{r,fig.height = 20, fig.width = 15} +features.set <- c('Aqp4', 'Sox10', 'Slc17a7', 'Aif1', 'Foxj1', 'Pax6', 'Slc17a6', 'Lum', 'Nanog', 'Gad2', 'Foxj1', 'Cldn5','Alas2') +features.gaba.set <- c('Gad1','Mef2c','Sst','Lhx6','Nr2f2','Prox1') +DefaultAssay(obj) <- 'sketch' +FeaturePlot(obj, reduction = 'umap', features = features.set, max.cutoff = "q99", min.cutoff = 'q1') +FeaturePlot(obj, reduction = 'umap', features = features.gaba.set, max.cutoff = "q99", min.cutoff = 'q1') + +``` + +## Project full cells to PCA from sketch assay +```{r, warning=FALSE, message=FALSE} +t3_ProjectEmbedding <- system.time({ + ref.emb <- ProjectCellEmbeddings(query = obj, + reference = obj, + query.assay = 'RNA', + reference.assay = 'sketch', + reduction = 'pca') +obj[['pca.orig']] <- CreateDimReducObject(embeddings = ref.emb, assay = 'RNA') +DefaultAssay(obj) <- 'RNA' +}) + + + +``` + +## Transfer labels and umap from sketch to full data +```{r, warning=FALSE, message=FALSE} +t4_transferLabel <- system.time({ + options(future.globals.maxSize = 1e9) + obj <- TransferSketchLabels(object = obj, + atoms = 'sketch', + reduction = 'pca.orig', + dims = 1:50, + refdata = list(cluster_full = 'sketch_snn_res.0.8', + subclass_full ='predicted.subclass', + cluster_anno_full = 'predicted.cluster'), + reduction.model = 'umap' + ) +}) + +``` + + +```{r} +library(ggplot2) +DimPlot(obj, label = T, reduction = 'ref.umap', group.by = 'predicted.subclass_full', alpha = 0.1) + NoLegend() + +DimPlot(obj, label = T, reduction = 'ref.umap', group.by = 'predicted.cluster_anno_full', alpha = 0.1) + NoLegend() + +``` + +```{r} +all_T <- ls(pattern = '^t') +overall <- sum(sapply(all_T, function(x) round(get(x)['elapsed'], digits = 3)))/60 + + +for (i in 1:length(all_T)) { + time.i <- get(all_T[i])['elapsed'] + if (time.i > 60) { + print(paste(all_T[i], round(time.i/60, digits = 1), 'mins')) + } else { + print(paste(all_T[i], round(time.i, digits = 1), 'secs')) + } +} +print(paste('Total time ', round(overall, digits = 3), 'mins' )) + +``` + + +```{r} + +obj[['pca.nn']] <- Seurat:::NNHelper(data = obj[['pca.orig']]@cell.embeddings[,1:50], + k = 30, + method = "hnsw", + metric = "cosine", + n_threads = 10) +obj <- RunUMAP(obj, nn.name = "pca.nn", reduction.name = 'umap.orig', reduction.key = 'Uo_') + +``` + +```{r} +DimPlot(obj, label = T, reduction = 'umap.orig', group.by = 'predicted.subclass_full', alpha = 0.1) + NoLegend() + +DimPlot(obj, label = T, reduction = 'umap.orig', group.by = 'predicted.cluster_anno_full', alpha = 0.1) + NoLegend() +#saveRDS(obj, file = "/brahms/haoy/test/mouse_1M_neurons_seurat.rds") + +``` \ No newline at end of file diff --git a/vignettes/BPCells_sketch_clustering_mouse_brain_SCT.Rmd b/vignettes/BPCells_sketch_clustering_mouse_brain_SCT.Rmd new file mode 100755 index 000000000..d2d5e02fb --- /dev/null +++ b/vignettes/BPCells_sketch_clustering_mouse_brain_SCT.Rmd @@ -0,0 +1,178 @@ +--- +title: "Sketch clustering in mouse brain (SCTransform)" +output: html_notebook +--- + +```{r setup, include=FALSE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + tidy.opts = list(width.cutoff = 95), + message = FALSE, + warning = FALSE, + time_it = TRUE +) +``` + +## load library +```{r, warning=FALSE, message=FALSE} +library(Seurat) +library(BPCells) +library(Azimuth) +``` + +## load data from h5ad +```{r, warning=FALSE, message=FALSE} +t0_CreateObject <- system.time({ + +mat <- open_matrix_dir("/brahms/haoy/test/mouse_1M_neurons_counts") +# devtools::load_all("~/share/package/MetricPatch/") +devtools::load_all("/home/haoy/share/package/MetricPatch/") +mat <- ConvertEnsembleToSymbol(mat = mat, species = 'mouse') + + +options(Seurat.object.assay.version = "v5", Seurat.object.assay.calcn = T) +obj <- CreateSeuratObject(counts = mat ) + +}) + +``` + +## create sketch assay +```{r, warning=FALSE, message=FALSE} +t1_CreateSketchAssay <- system.time({ +obj <- NormalizeData(obj) +obj <- FindVariableFeatures(obj, layer = 'counts') +obj <- LeverageScore(obj) +#obj <- LeverageScoreSampling(object = obj, ncells = 5000, cast = 'dgCMatrix') +obj <- LeverageScoreSampling(object = obj, ncells = 50000, cast = 'dgCMatrix') + +}) + +``` +## Sketch assay clustering +```{r, warning=FALSE, message=FALSE} +t2_SketchClustering <- system.time({ +obj <- SCTransform(object = obj) +obj <- RunPCA(obj) +obj <- FindNeighbors(obj, dims = 1:50) +obj <- FindClusters(obj) +}) + +obj <- RunUMAP(obj, dims = 1:50, return.model = T, verbose = F) +``` + +```{r} +DimPlot(obj, label = T, reduction = 'umap') + NoLegend() +``` +```{r} + +options(Seurat.object.assay.version = "v3", Seurat.object.assay.calcn = T) +obj.v3 <- CreateSeuratObject(counts = as.sparse(obj[['sketch']]$counts) ) +obj.v3 <- RunAzimuth(query = obj.v3, assay = 'RNA', reference = 'mousecortexref', do.adt = F) + +obj$predicted.subclass <- obj.v3$predicted.subclass +obj$predicted.cluster <- obj.v3$predicted.cluster + + +``` +```{r,fig.height = 20, fig.width = 15} +features.set <- c('Aqp4', 'Sox10', 'Slc17a7', 'Aif1', 'Foxj1', 'Pax6', 'Slc17a6', 'Lum', 'Nanog', 'Gad2', 'Foxj1', 'Cldn5','Alas2') +features.gaba.set <- c('Gad1','Mef2c','Sst','Lhx6','Nr2f2','Prox1') +DefaultAssay(obj) <- 'sketch' +FeaturePlot(obj, reduction = 'umap', features = features.set, max.cutoff = "q99", min.cutoff = 'q1') +FeaturePlot(obj, reduction = 'umap', features = features.gaba.set, max.cutoff = "q99", min.cutoff = 'q1') + +``` + + +## Project full cells to PCA from sketch assay +```{r, warning=FALSE, message=FALSE} +t3_ProjectEmbedding <- system.time({ + ref.emb <- ProjectCellEmbeddings(query = obj, + reference = obj, + query.assay = 'RNA', + reference.assay = 'SCT', + normalization.method = 'SCT', + reduction = 'pca') +obj[['pca.orig']] <- CreateDimReducObject(embeddings = ref.emb, assay = 'RNA') +DefaultAssay(obj) <- 'RNA' +}) + + + +``` + +## Transfer labels and umap from sketch to full data +```{r, warning=FALSE, message=FALSE} +t4_transferLabel <- system.time({ + options(future.globals.maxSize = 1e9) + obj <- TransferSketchLabels(object = obj, + atoms = 'sketch', + reduction = 'pca.orig', + dims = 1:50, + refdata = list(cluster_full = 'SCT_snn_res.0.8', + subclass_full ='predicted.subclass'), + reduction.model = 'umap' + ) +}) + +``` + + +```{r} +library(ggplot2) + + +DimPlot(obj, label = T, reduction = 'ref.umap', group.by = 'predicted.cluster_full', alpha = 0.1) + NoLegend() +DimPlot(obj, label = T, reduction = 'ref.umap', group.by = 'predicted.subclass_full', alpha = 0.1) + NoLegend() + + +``` + +```{r} +all_T <- ls(pattern = '^t') +overall <- sum(sapply(all_T, function(x) round(get(x)['elapsed'], digits = 3)))/60 + + +for (i in 1:length(all_T)) { + T_i <- get(all_T[i])['elapsed'] + if (T_i > 60) { + print(paste(all_T[i], round(T_i/60, digits = 1), 'mins')) + } else { + print(paste(all_T[i], round(T_i, digits = 1), 'secs')) + } +} +print(paste('Total time ', round(overall, digits = 3), 'mins' )) +``` + + +```{r} + +obj[['pca.nn']] <- Seurat:::NNHelper(data = obj[['pca.orig']]@cell.embeddings[,1:50], + k = 30, + method = "hnsw", + metric = "cosine", + n_threads = 10) +obj <- RunUMAP(obj, nn.name = "pca.nn", reduction.name = 'umap.orig', reduction.key = 'Uo_') + +``` + +```{r} +DimPlot(obj, label = T, reduction = 'umap.orig', group.by = 'predicted.cluster_full', alpha = 0.1) + NoLegend() +DimPlot(obj, label = T, reduction = 'umap.orig', group.by = 'predicted.subclass_full', alpha = 0.1) + NoLegend() + +#saveRDS(obj, file = "/brahms/haoy/test/mouse_1M_neurons_seurat_SCT.rds") + +``` diff --git a/vignettes/BPCells_sketch_inte_1M.Rmd b/vignettes/BPCells_sketch_inte_1M.Rmd new file mode 100755 index 000000000..4639c14bf --- /dev/null +++ b/vignettes/BPCells_sketch_inte_1M.Rmd @@ -0,0 +1,181 @@ +--- +title: "Sketch integration" +output: html_notebook +--- + +```{r setup, include=FALSE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + tidy.opts = list(width.cutoff = 95), + message = FALSE, + warning = FALSE, + time_it = TRUE +) +``` + +## load package + +```{r, warning=F, message=F} +library(Seurat) +library(BPCells) +library(dplyr) +``` + +## load matrix +```{r, warning=F, message=F} + +time0_loadMatrix <- system.time({ + #mat <- open_matrix_dir('/brahms/haoy/test/pbmc_150k_sparse/') + #meta <- readRDS('/brahms/haoy/seurat5/S5_object/ParseBio_PBMC_meta_100K.rds') + mat <- open_matrix_dir('/brahms/haoy/test/pbmc_ParseBio_sparse//') + meta <- readRDS('/brahms/haoy/seurat5/S5_object/ParseBio_PBMC_meta.rds') + +}) +``` + +## sketch object +```{r,warning=F, message=F} + +options(Seurat.object.assay.version = "v5", Seurat.object.assay.calcn = T) +time1_normalize <- system.time({ + object <- CreateSeuratObject(counts = mat, meta.data = meta) + object <- NormalizeData(object, verbose = FALSE) + # object[['RNA']]$data <- write_matrix_dir( + # mat = object[['RNA']]$data, + # dir = '/brahms/haoy/test/pbmc_ParseBio_sparse_data' + # ) + #object[['RNA']]$data <- open_matrix_dir(dir = '/brahms/haoy/test/pbmc_ParseBio_sparse_data') +}) + + +time2_split.mat <- system.time({ + options(Seurat.object.assay.calcn = FALSE) + object[['RNA']] <- split(object[['RNA']], f = object$sample) +}) + + +time3_FindVariable <- system.time({ + object <- FindVariableFeatures(object, layer = 'counts', verbose = FALSE) +} +) + +time4_LeverageScoreSampling <- system.time({ + object <- LeverageScore(object, verbose = FALSE) + object <- LeverageScoreSampling(object = object, ncells = 5000, cast = 'dgCMatrix', verbose = FALSE) +}) + +``` + +## integrate sketched assay +```{r} + +time5_SketchIntegration <- system.time({ + DefaultAssay(object) <- 'sketch' + object <- FindVariableFeatures(object, verbose = F, nfeatures = 2000) + features <- VariableFeatures(object) + object <- ScaleData(object, features = features, verbose = F) + object <- RunPCA(object, features = features, verbose = F) + DefaultAssay(object) <- 'sketch' + options(future.globals.maxSize = 3e9) + object <- IntegrateLayers(object, + method = RPCAIntegration, + orig = 'pca', + new.reduction = 'integrated.rpca', + dims = 1:30, + k.anchor = 20, + reference = which(Layers(object, search = 'data') %in% c( 'data.H_3060')), + verbose = F) + +}) +object <- RunUMAP(object, reduction = 'integrated.rpca', dims = 1:30, return.model = T, verbose = F) +plot.s1 <- DimPlot(object, group.by = 'sample', reduction = 'umap') + NoLegend() +plot.s2 <- DimPlot(object, group.by = 'celltype.weight', reduction = 'umap') + NoLegend() +``` +```{r} +plot.s1 + plot.s2 + +``` + + +## proporgate embeddings to full data +```{r} +time6_UnSketch <- system.time({ + object <- IntegrateSketchEmbeddings(object = object, + atoms = 'sketch', + orig = 'RNA', + reduction = 'integrated.rpca' , + layers = Layers(object = object[['RNA']], search = 'data'), + features = features ) + +}) + + +``` + +```{r} +object <- RunUMAP(object, reduction = 'integrated.rpca.orig', dims = 1:30 , reduction.name = 'umap.orig', reduction.key = 'Uorig_') +``` + +## save object +```{r} +#time7_saveRDS <- system.time(saveRDS(object, "/brahms/haoy/test/pbmc_ParseBio_seurat.rds")) +``` + +## pseudo-bulk +```{r} +time8_bulk <- system.time( bulk <- AverageExpression(object, + return.seurat = T, + slot = 'counts', + assays = 'RNA', + group.by = c("celltype.weight","sample") + ) +) +marker <- FindAllMarkers(object = bulk, only.pos = TRUE, verbose = FALSE) +marker %>% + group_by(cluster) %>% + top_n(n = -5, wt = p_val) -> top5 +bulk <- ScaleData(bulk, features = top5$gene) + +``` + +## computing time summary +```{r} +all_T <- ls(pattern = 'time') +overall <- sum(sapply(all_T, function(x) round(get(x)['elapsed'], digits = 3)))/60 + + +for (i in 1:length(all_T)) { + T_i <- get(all_T[i])['elapsed'] + if (T_i > 60) { + print(paste(all_T[i], round(T_i/60, digits = 1), 'mins')) + } else { + print(paste(all_T[i], round(T_i, digits = 1), 'secs')) + } +} +print(paste('Total time ', round(overall, digits = 1), 'mins' )) +``` + + + +```{r} +p1<- DimPlot(object, reduction = 'umap.orig', group.by = 'sample',alpha = 0.1) + NoLegend() +p2<- DimPlot(object, reduction = 'umap.orig', group.by = 'celltype.weight', label = T, alpha = 0.1) + NoLegend() +p1+p2 + +``` +```{r,fig.height = 20, fig.width = 15} +DoHeatmap(bulk, features = top5$gene) + NoLegend() + +``` diff --git a/vignettes/BPCells_sketch_inte_1M_SCT.Rmd b/vignettes/BPCells_sketch_inte_1M_SCT.Rmd new file mode 100755 index 000000000..a52110e0e --- /dev/null +++ b/vignettes/BPCells_sketch_inte_1M_SCT.Rmd @@ -0,0 +1,187 @@ +--- +title: "Sketch integration using SCTransform" +output: html_notebook +--- + +```{r setup, include=FALSE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + tidy.opts = list(width.cutoff = 95), + message = FALSE, + warning = FALSE, + time_it = TRUE +) +``` + +## load package + +```{r, warning=F, message=F} +library(Seurat) +library(BPCells) +library(dplyr) +``` + +## load matrix +```{r, warning=F, message=F} + +time0_loadMatrix <- system.time({ + #mat <- open_matrix_dir('/brahms/haoy/test/pbmc_150k_sparse/') + #meta <- readRDS('/brahms/haoy/seurat5/S5_object/ParseBio_PBMC_meta_100K.rds') + mat <- open_matrix_dir('/brahms/haoy/test/pbmc_ParseBio_sparse//') + meta <- readRDS('/brahms/haoy/seurat5/S5_object/ParseBio_PBMC_meta.rds') + +}) +``` + +## sketch object +```{r,warning=F, message=F} + +options(Seurat.object.assay.version = "v5", Seurat.object.assay.calcn = T) +time1_normalize <- system.time({ + object <- CreateSeuratObject(counts = mat, meta.data = meta) + object <- NormalizeData(object) + # object[['RNA']]$data <- write_matrix_dir( + # mat = object[['RNA']]$data, + # dir = '/brahms/haoy/test/pbmc_ParseBio_sparse_data' + # ) + #object[['RNA']]$data <- open_matrix_dir(dir = '/brahms/haoy/test/pbmc_ParseBio_sparse_data') +}) + + +time2_split.mat <- system.time({ + options(Seurat.object.assay.calcn = FALSE) + object[['RNA']] <- split(object[['RNA']], f = meta$sample) +}) + + +time3_FindVariable <- system.time({ + object <- FindVariableFeatures(object, layer = 'counts') +} +) + +time4_LeverageScoreSampling <- system.time({ + object <- LeverageScore(object) + object <- LeverageScoreSampling(object = object, ncells = 5000, cast = 'dgCMatrix') +}) + +``` + +## integrate sketched assay +```{r} + +time5_SketchIntegration <- system.time({ + DefaultAssay(object) <- 'sketch' + object <- SCTransform(object) + object <- RunPCA(object, verbose = F) + features <- rownames(object[['pca']][]) + DefaultAssay(object) <- 'SCT' + options(future.globals.maxSize = 8e9) + object <- IntegrateLayers(object, + method = RPCAIntegration, + orig = 'pca', + new.reduction = 'integrated.rpca', + normalization.method = "SCT", + dims = 1:30, + k.anchor = 20, + features = features, + reference = which(Layers(object[['sketch']], search = 'data') %in% c( 'data.H_3060')), + verbose = F) + +}) +object <- RunUMAP(object, reduction = 'integrated.rpca', dims = 1:30, return.model = T, verbose = F) +plot.s1 <- DimPlot(object, group.by = 'sample', reduction = 'umap') + NoLegend() +plot.s2 <- DimPlot(object, group.by = 'celltype.weight', reduction = 'umap') + NoLegend() +``` +```{r} +plot.s1 + plot.s2 + +``` + + +## proporgate embeddings to full data +```{r} +time6_UnSketch <- system.time({ + object <- IntegrateSketchEmbeddings(object = object, + atoms = 'sketch', + orig = 'RNA', + reduction = 'integrated.rpca' , + layers = Layers(object = object[['RNA']], search = 'data'), + features = features ) + +}) + + +``` + +```{r} + object <- RunUMAP(object, reduction = 'integrated.rpca.orig', dims = 1:30 , reduction.name = 'umap.orig', reduction.key = 'Uorig_') +``` + +## save object +```{r} +#time7_saveRDS <- system.time(saveRDS(object, "/brahms/haoy/test/pbmc_ParseBio_seurat_SCT.rds")) +``` + +## pseudo-bulk +```{r} +time8_bulk <- system.time( bulk <- AverageExpression(object, + return.seurat = T, + slot = 'counts', + assays = 'RNA', + group.by = c("celltype.weight","sample") + ) +) +marker <- FindAllMarkers(object = bulk, only.pos = TRUE, verbose = FALSE) +marker %>% + group_by(cluster) %>% + top_n(n = -5, wt = p_val) -> top5 +bulk <- ScaleData(bulk, features = top5$gene) + +``` + +## computing time summary +```{r} +all_T <- ls(pattern = 'time') +overall <- sum(sapply(all_T, function(x) round(get(x)['elapsed'], digits = 3)))/60 + + +for (i in 1:length(all_T)) { + T_i <- get(all_T[i])['elapsed'] + if (T_i > 60) { + print(paste(all_T[i], round(T_i/60, digits = 1), 'mins')) + } else { + print(paste(all_T[i], round(T_i, digits = 1), 'secs')) + } +} +print(paste('Total time ', round(overall, digits = 1), 'mins' )) +``` + + +```{r} +object$disease <- sapply(strsplit(x = object$sample, split = "_"), '[', 1) + +``` +```{r} +p1<- DimPlot(object, reduction = 'umap.orig', group.by = 'sample',alpha = 0.1) + NoLegend() +p2<- DimPlot(object, reduction = 'umap.orig', group.by = 'celltype.weight', label = T, alpha = 0.1) + NoLegend() +p3<- DimPlot(object, reduction = 'umap.orig', group.by = 'disease', label = T, alpha = 0.1) + NoLegend() + +p1+p2 + p3 + +``` +```{r,fig.height = 20, fig.width = 15} +DoHeatmap(bulk, features = top5$gene) + NoLegend() + +``` diff --git a/vignettes/atacseq_integration_vignette.Rmd b/vignettes/atacseq_integration_vignette.Rmd index 59f391a69..dadea5323 100644 --- a/vignettes/atacseq_integration_vignette.Rmd +++ b/vignettes/atacseq_integration_vignette.Rmd @@ -26,7 +26,8 @@ knitr::opts_chunk$set( tidy.opts = list(width.cutoff = 95), message = FALSE, warning = FALSE, - time_it = TRUE + time_it = TRUE, + error = TRUE ) options(SeuratData.repo.use = 'satijalab04.nygenome.org') ``` @@ -66,6 +67,9 @@ library(cowplot) pbmc.rna <- LoadData("pbmcMultiome", "pbmc.rna") pbmc.atac <- LoadData("pbmcMultiome", "pbmc.atac") +pbmc.rna <- UpdateSeuratObject(pbmc.rna) +pbmc.atac <- UpdateSeuratObject(pbmc.atac) + # repeat QC steps performed in the WNN vignette pbmc.rna <- subset(pbmc.rna, seurat_annotations != 'filtered') pbmc.atac <- subset(pbmc.atac, seurat_annotations != 'filtered') @@ -100,7 +104,7 @@ p2 <- DimPlot(pbmc.atac, group.by = 'orig.ident', label = FALSE) + NoLegend() + p1 + p2 ``` -```{r save.img, include = FALSE} +```{r save.img, include = TRUE} plot <- (p1 + p2) & xlab("UMAP 1") & ylab("UMAP 2") & theme(axis.title = element_text(size = 18)) diff --git a/vignettes/atomic_integration.Rmd b/vignettes/atomic_integration.Rmd index fedce4754..7e7fe7725 100644 --- a/vignettes/atomic_integration.Rmd +++ b/vignettes/atomic_integration.Rmd @@ -24,7 +24,8 @@ knitr::opts_chunk$set( fig.width = 10, message = FALSE, warning = FALSE, - time_it = TRUE + time_it = TRUE, + error = TRUE ) ``` diff --git a/vignettes/bridge_integration_vignette.Rmd b/vignettes/bridge_integration_vignette.Rmd index 9561ec0a5..827d168a0 100644 --- a/vignettes/bridge_integration_vignette.Rmd +++ b/vignettes/bridge_integration_vignette.Rmd @@ -24,7 +24,8 @@ knitr::knit_hooks$set(time_it = local({ knitr::opts_chunk$set( message = FALSE, warning = FALSE, - time_it = TRUE + time_it = TRUE, + error = TRUE ) ``` diff --git a/vignettes/cell_cycle_vignette.Rmd b/vignettes/cell_cycle_vignette.Rmd index d74c5c0c6..a180b1ae1 100644 --- a/vignettes/cell_cycle_vignette.Rmd +++ b/vignettes/cell_cycle_vignette.Rmd @@ -25,7 +25,7 @@ knitr::opts_chunk$set( tidy = TRUE, tidy.opts = list(width.cutoff = 95), warning = FALSE, - error = FALSE, + error = TRUE, message = FALSE, fig.width = 8, time_it = TRUE @@ -81,7 +81,7 @@ marrow <- RunPCA(marrow, features = c(s.genes, g2m.genes)) DimPlot(marrow) ``` -```{r save.img, include=FALSE} +```{r save.img, include=TRUE} library(ggplot2) plot <- DimPlot(marrow) + theme(axis.title = element_text(size = 18), legend.text = element_text(size = 18)) + diff --git a/vignettes/conversion_vignette.Rmd b/vignettes/conversion_vignette.Rmd index ac4917b27..7b9af30c9 100644 --- a/vignettes/conversion_vignette.Rmd +++ b/vignettes/conversion_vignette.Rmd @@ -27,7 +27,8 @@ knitr::opts_chunk$set( fig.width = 10, message = FALSE, warning = FALSE, - time_it = TRUE + time_it = TRUE, + error = TRUE ) ``` diff --git a/vignettes/de_vignette.Rmd b/vignettes/de_vignette.Rmd index 125c0977a..711275f7a 100644 --- a/vignettes/de_vignette.Rmd +++ b/vignettes/de_vignette.Rmd @@ -26,7 +26,8 @@ knitr::opts_chunk$set( tidy.opts = list(width.cutoff = 95), message = FALSE, warning = FALSE, - time_it = TRUE + time_it = TRUE, + error = TRUE ) ``` @@ -42,6 +43,7 @@ This vignette highlights some example workflows for performing differential expr library(Seurat) library(SeuratData) pbmc <- LoadData("pbmc3k", type = "pbmc3k.final") +pbmc <- UpdateSeuratObject(pbmc) ``` # Perform default differential expression tests @@ -110,7 +112,7 @@ The following differential expression tests are currently supported: For MAST and DESeq2, please ensure that these packages are installed separately in order to use them as part of Seurat. Once installed, use the `test.use` parameter can be used to specify which DE test to use. -```{r include = FALSE} +```{r include = TRUE} # necessary to get MAST to work properly library(SingleCellExperiment) ``` diff --git a/vignettes/dim_reduction_vignette.Rmd b/vignettes/dim_reduction_vignette.Rmd index fa66c107e..173ee1d71 100644 --- a/vignettes/dim_reduction_vignette.Rmd +++ b/vignettes/dim_reduction_vignette.Rmd @@ -25,7 +25,8 @@ knitr::opts_chunk$set( tidy.opts = list(width.cutoff = 95), message = FALSE, warning = FALSE, - time_it = TRUE + time_it = TRUE, + error = TRUE ) ``` @@ -37,6 +38,7 @@ This vignette demonstrates how to store and interact with dimensional reduction library(Seurat) library(SeuratData) pbmc <- LoadData("pbmc3k", type = "pbmc3k.final") +pbmc <- UpdateSeuratObject(pbmc) ``` # Explore the new dimensional reduction structure @@ -96,7 +98,7 @@ FeatureScatter(pbmc, feature1 = "MDS_1", feature2 = "PC_1") ``` -```{r save.img, include=FALSE} +```{r save.img, include=TRUE} library(ggplot2) plot <- DimPlot(pbmc, reduction = "mds", pt.size = 0.5) ggsave(filename = "../output/images/pbmc_mds.jpg", height = 7, width = 12, plot = plot, quality = 50) diff --git a/vignettes/essential_commands.Rmd b/vignettes/essential_commands.Rmd index 40edf0bc1..563b7c65b 100644 --- a/vignettes/essential_commands.Rmd +++ b/vignettes/essential_commands.Rmd @@ -16,7 +16,8 @@ knitr::opts_chunk$set( message = FALSE, warning = FALSE, results = 'hold', - eval = FALSE + eval = FALSE, + error = TRUE ) ``` diff --git a/vignettes/future_vignette.Rmd b/vignettes/future_vignette.Rmd index b8c58c4a3..1f10104e2 100644 --- a/vignettes/future_vignette.Rmd +++ b/vignettes/future_vignette.Rmd @@ -25,7 +25,8 @@ knitr::opts_chunk$set( tidy.opts = list(width.cutoff = 95), message = FALSE, warning = FALSE, - time_it = TRUE + time_it = TRUE, + error = TRUE ) ``` In Seurat, we have chosen to use the `future` framework for parallelization. In this vignette, we will demonstrate how you can take advantage of the `future` implementation of certain Seurat functions from a user's perspective. If you are interested in learning more about the `future` framework beyond what is described here, please see the package vignettes [here](https://cran.r-project.org/web/packages/future/index.html) for a comprehensive and detailed description. diff --git a/vignettes/get_started_v5.Rmd b/vignettes/get_started_v5.Rmd new file mode 100644 index 000000000..0735df1b7 --- /dev/null +++ b/vignettes/get_started_v5.Rmd @@ -0,0 +1,112 @@ +--- +title: "Getting Started with Seurat 5" +output: + html_document: + theme: united + df_print: kable + pdf_document: default +--- + +```{r fxns, include = FALSE} +library('htmlTable') +make_list <- function(items) { + paste0("
      ", sprintf('
    • %s
    • ', items), '
    ', collapse = '') +} +make_href <- function(url, text){ + paste0("") +} +make_href2 <- function(url, text){ + paste0("", text, "") +} +process_entry <- function(dat) { + if (grepl(pattern = "https://satijalab.org/img/vignette_images", x = dat$image)) { + img <- paste0('![](', dat$image, '){width=3000px}') + } else if (grepl(pattern = "assets/", x= dat$image)) { + img <- paste0('![](', dat$image, '){width=3000px}') + } else { + img <- paste0('![](', '../output/images/', dat$image, '){width=3000px}') + } + if (grepl(pattern = "https://satijalab.org/", x = dat$name)) { + link <- dat$name + } else { + link <- paste0(dat$name, ".html") + } + go.button <- paste0('GO') + data.frame( + title = make_href(url = link, text = dat$title), + img = img, + desc = dat$summary, + btn = go.button + ) +} +process_wrapper_entry <- function(dat) { + data.frame( + Package = dat$name, + Vignette = make_href2(url = dat$link, text = dat$title), + Reference = make_href2(url = dat$reference, text = dat$citation), + Source = make_href2(url = dat$source, text = dat$source) + ) +} +make_vignette_card_section <- function(vdat, cat) { + vignettes <- vdat[[cat]]$vignettes + dat <- data.frame(title = character(), img = character(), desc = character()) + for (v in 1:length(x = vignettes)) { + dat <- rbind(dat, process_entry(vignettes[[v]])) + if(nrow(x = dat) == 3 | v == length(x = vignettes)){ + colnames(dat) <- NULL + dat <- t(dat) + if (ncol(x = dat) == 2) { + print(htmlTable( + dat, + align = '|l|l|', + css.cell = "padding-left: .75em; width: 50%", + css.class = "two-column-htmltable" + )) + } else if (ncol(x = dat) == 1){ + print(htmlTable( + dat, + align = '|l|', + css.cell = "padding-left: .75em; width: 100%", + css.class = "one-column-htmltable" + )) + } else { + print(htmlTable( + dat, + align = '|l|l|l|', + css.cell = "padding-left: .75em; width: 30%" + )) + } + dat <- data.frame(title = character(), img = character(), desc = character()) + } + } +} +``` + +```{r yaml, include = FALSE} +library(yaml) +vdat <- read_yaml(file = "vignettes_v5.yaml") +``` + +```{=html} + +``` + +We provide a series of vignettes, tutorials, and analysis walkthroughs to help users get started with Seurat. You can also check out our [Reference page](../reference/index.html) which contains a full list of functions available to users. + +# Seurat 5 Vignettes + +For new users of Seurat, we suggest starting with a guided walk through of a dataset of 2,700 Peripheral Blood Mononuclear Cells (PBMCs) made publicly available by 10X Genomics. This tutorial implements the major components of a standard unsupervised clustering workflow including QC and data filtration, calculation of high-variance genes, dimensional reduction, graph-based clustering, and the identification of cluster markers. + +We provide additional introductory vignettes for users who are interested in analyzing multimodal single-cell datasets (e.g. from CITE-seq, or the 10x multiome kit), or spatial datasets (e.g. 10x Visium or Vizgen MERFISH). + +```{r results='asis', echo=FALSE, warning=FALSE, message = FALSE} +make_vignette_card_section(vdat = vdat, cat = 1) +``` diff --git a/vignettes/get_started_v5.nb.html b/vignettes/get_started_v5.nb.html new file mode 100644 index 000000000..4bb9d6bf6 --- /dev/null +++ b/vignettes/get_started_v5.nb.html @@ -0,0 +1,293 @@ + + + + + + + + + + + + + +R Notebook + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + +

    This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

    +

    Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

    + + + +
    plot(cars)
    + + + +

    Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.

    +

    When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).

    +

    The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.

    + + +
    LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKVGhpcyBpcyBhbiBbUiBNYXJrZG93bl0oaHR0cDovL3JtYXJrZG93bi5yc3R1ZGlvLmNvbSkgTm90ZWJvb2suIFdoZW4geW91IGV4ZWN1dGUgY29kZSB3aXRoaW4gdGhlIG5vdGVib29rLCB0aGUgcmVzdWx0cyBhcHBlYXIgYmVuZWF0aCB0aGUgY29kZS4gCgpUcnkgZXhlY3V0aW5nIHRoaXMgY2h1bmsgYnkgY2xpY2tpbmcgdGhlICpSdW4qIGJ1dHRvbiB3aXRoaW4gdGhlIGNodW5rIG9yIGJ5IHBsYWNpbmcgeW91ciBjdXJzb3IgaW5zaWRlIGl0IGFuZCBwcmVzc2luZyAqQ3RybCtTaGlmdCtFbnRlciouIAoKYGBge3J9CnBsb3QoY2FycykKYGBgCgpBZGQgYSBuZXcgY2h1bmsgYnkgY2xpY2tpbmcgdGhlICpJbnNlcnQgQ2h1bmsqIGJ1dHRvbiBvbiB0aGUgdG9vbGJhciBvciBieSBwcmVzc2luZyAqQ3RybCtBbHQrSSouCgpXaGVuIHlvdSBzYXZlIHRoZSBub3RlYm9vaywgYW4gSFRNTCBmaWxlIGNvbnRhaW5pbmcgdGhlIGNvZGUgYW5kIG91dHB1dCB3aWxsIGJlIHNhdmVkIGFsb25nc2lkZSBpdCAoY2xpY2sgdGhlICpQcmV2aWV3KiBidXR0b24gb3IgcHJlc3MgKkN0cmwrU2hpZnQrSyogdG8gcHJldmlldyB0aGUgSFRNTCBmaWxlKS4KClRoZSBwcmV2aWV3IHNob3dzIHlvdSBhIHJlbmRlcmVkIEhUTUwgY29weSBvZiB0aGUgY29udGVudHMgb2YgdGhlIGVkaXRvci4gQ29uc2VxdWVudGx5LCB1bmxpa2UgKktuaXQqLCAqUHJldmlldyogZG9lcyBub3QgcnVuIGFueSBSIGNvZGUgY2h1bmtzLiBJbnN0ZWFkLCB0aGUgb3V0cHV0IG9mIHRoZSBjaHVuayB3aGVuIGl0IHdhcyBsYXN0IHJ1biBpbiB0aGUgZWRpdG9yIGlzIGRpc3BsYXllZC4K
    + + + +
    + + + + + + + + + + + + + + + + diff --git a/vignettes/hashing_vignette.Rmd b/vignettes/hashing_vignette.Rmd index 5d92ded3f..af0ba6d2f 100644 --- a/vignettes/hashing_vignette.Rmd +++ b/vignettes/hashing_vignette.Rmd @@ -26,7 +26,8 @@ knitr::opts_chunk$set( tidy.opts = list(width.cutoff = 95), message = FALSE, warning = FALSE, - time_it = TRUE + time_it = TRUE, + error = TRUE ) ``` diff --git a/vignettes/integration_introduction.Rmd b/vignettes/integration_introduction.Rmd index bec4b4dd3..a7c40ca6b 100644 --- a/vignettes/integration_introduction.Rmd +++ b/vignettes/integration_introduction.Rmd @@ -26,7 +26,8 @@ knitr::opts_chunk$set( fig.width = 10, message = FALSE, warning = FALSE, - time_it = TRUE + time_it = TRUE, + error = TRUE ) ``` @@ -66,7 +67,8 @@ InstallData('ifnb') ```{r init, results='hide', message=FALSE, fig.keep='none'} # load dataset -LoadData('ifnb') +ifnb <- LoadData('ifnb') +ifnb <- UpdateSeuratObject(ifnb) # split the dataset into a list of two seurat objects (stim and CTRL) ifnb.list <- SplitObject(ifnb, split.by = "stim") @@ -154,7 +156,7 @@ markers.to.plot <- c("CD3D","CREM","HSPH1","SELL","GIMAP5","CACYBP","GNLY","NKG7 DotPlot(immune.combined, features = markers.to.plot, cols = c('blue', 'red'), dot.scale = 8, split.by = "stim") + RotatedAxis() ``` -```{r save.img, include = FALSE} +```{r save.img, include = TRUE} library(ggplot2) plot <- DotPlot(immune.combined, features = markers.to.plot, cols = c('blue', 'red'), dot.scale = 6, split.by = "stim") + RotatedAxis() @@ -210,7 +212,7 @@ plots <- VlnPlot(immune.combined, features = c("LYZ", "ISG15", "CXCL10"), split. wrap_plots(plots = plots, ncol = 1) ``` -```{r save, include = FALSE} +```{r save, include = TRUE} saveRDS(immune.combined, file = "../output/immune.combined.rds") ``` @@ -228,7 +230,8 @@ Below, we demonstrate how to modify the Seurat integration workflow for datasets ```{r panc8.cca.sct.init, results='hide', message=FALSE, fig.keep='none'} -LoadData('ifnb') +ifnb <- LoadData('ifnb') +ifnb <- UpdateSeuratObject(ifnb) ifnb.list <- SplitObject(ifnb, split.by = "stim") ifnb.list <- lapply(X = ifnb.list, FUN = SCTransform) features <- SelectIntegrationFeatures(object.list = ifnb.list, nfeatures = 3000) diff --git a/vignettes/integration_large_datasets.Rmd b/vignettes/integration_large_datasets.Rmd index c1870ac0e..bf5867144 100644 --- a/vignettes/integration_large_datasets.Rmd +++ b/vignettes/integration_large_datasets.Rmd @@ -27,7 +27,8 @@ knitr::opts_chunk$set( fig.width = 10, message = FALSE, warning = FALSE, - time_it = TRUE + time_it = TRUE, + error = TRUE ) ``` @@ -98,7 +99,7 @@ bm280k.integrated <- RunUMAP(bm280k.integrated, dims = 1:50) DimPlot(bm280k.integrated, group.by = "orig.ident") ``` -```{r save.img, include=FALSE} +```{r save.img, include=TRUE} library(ggplot2) plot <- DimPlot(bm280k.integrated, group.by = "orig.ident") + xlab("UMAP 1") + ylab("UMAP 2") + theme(axis.title = element_text(size = 18), legend.text = element_text(size = 18)) + diff --git a/vignettes/integration_mapping.Rmd b/vignettes/integration_mapping.Rmd index 71f5769cd..4dd02f598 100644 --- a/vignettes/integration_mapping.Rmd +++ b/vignettes/integration_mapping.Rmd @@ -28,7 +28,8 @@ knitr::opts_chunk$set( error = FALSE, message = FALSE, fig.width = 8, - time_it = TRUE + time_it = TRUE, + error = TRUE ) ``` @@ -52,7 +53,8 @@ InstallData('panc8') To construct a reference, we will identify 'anchors' between the individual datasets. First, we split the combined object into a list, with each dataset as an element (this is only necessary because the data was bundled together for easy distribution). ```{r preprocessing1} -data('panc8') +panc8 <- LoadData('panc8') +panc8 <- UpdateSeuratObject(panc8) pancreas.list <- SplitObject(panc8, split.by = "tech") pancreas.list <- pancreas.list[c("celseq", "celseq2", "fluidigmc1", "smartseq2")] ``` @@ -108,7 +110,7 @@ p2 <- DimPlot(pancreas.integrated, reduction = "umap", group.by = "celltype", p1 + p2 ``` -```{r save.img, include = FALSE} +```{r save.img, include = TRUE} plot <- DimPlot(pancreas.integrated, reduction = "umap", label = TRUE, label.size = 4.5) + xlab("UMAP 1") + ylab("UMAP 2") + theme(axis.title = element_text(size = 18), legend.text = element_text(size = 18)) + guides(colour = guide_legend(override.aes = list(size = 10))) diff --git a/vignettes/integration_rpca.Rmd b/vignettes/integration_rpca.Rmd index 9f3936b98..5d1652621 100644 --- a/vignettes/integration_rpca.Rmd +++ b/vignettes/integration_rpca.Rmd @@ -26,7 +26,8 @@ knitr::opts_chunk$set( fig.width = 10, message = FALSE, warning = FALSE, - time_it = TRUE + time_it = TRUE, + error = TRUE ) ``` @@ -47,6 +48,7 @@ options(SeuratData.repo.use = "http://satijalab04.nygenome.org") ``` ```{r installdata} +library(Seurat) library(SeuratData) # install dataset InstallData('ifnb') @@ -54,7 +56,8 @@ InstallData('ifnb') ```{r init, results='hide', message=FALSE, fig.keep='none'} # load dataset -LoadData('ifnb') +ifnb <- LoadData('ifnb') +ifnb <- UpdateSeuratObject(ifnb) # split the dataset into a list of two seurat objects (stim and CTRL) ifnb.list <- SplitObject(ifnb, split.by = "stim") @@ -131,7 +134,7 @@ p2 <- DimPlot(immune.combined, reduction = "umap", label = TRUE, repel = TRUE) p1 + p2 ``` -```{r save.img, include = FALSE} +```{r save.img, include = TRUE} library(ggplot2) plot <- DimPlot(immune.combined, group.by = "stim") + xlab("UMAP 1") + ylab("UMAP 2") + @@ -147,7 +150,8 @@ Now that the datasets have been integrated, you can follow the previous steps in As an additional example, we repeat the analyses performed above, but normalize the datasets using [SCTransform](sctransform_vignette.html). We may choose to set the `method` parameter to `glmGamPoi` (install [here](https://bioconductor.org/packages/release/bioc/html/glmGamPoi.html)) in order to enable faster estimation of regression parameters in `SCTransform()`. ```{r panc8.cca.sct.init, results='hide', message=FALSE, fig.keep='none'} -LoadData('ifnb') +ifnb <- LoadData('ifnb') +ifnb <- UpdateSeuratObject(ifnb) ifnb.list <- SplitObject(ifnb, split.by = "stim") ifnb.list <- lapply(X = ifnb.list, FUN = SCTransform, method = "glmGamPoi") features <- SelectIntegrationFeatures(object.list = ifnb.list, nfeatures = 3000) diff --git a/vignettes/interaction_vignette.Rmd b/vignettes/interaction_vignette.Rmd index 2c52fe43c..13bf2d8d6 100644 --- a/vignettes/interaction_vignette.Rmd +++ b/vignettes/interaction_vignette.Rmd @@ -25,11 +25,12 @@ knitr::opts_chunk$set( tidy.opts = list(width.cutoff = 95), message = FALSE, warning = FALSE, - time_it = TRUE + time_it = TRUE, + error = TRUE ) ``` -```{r, include = FALSE} +```{r, include = TRUE} options(SeuratData.repo.use = "http://satijalab04.nygenome.org") ``` @@ -42,6 +43,7 @@ library(Seurat) library(SeuratData) InstallData("pbmc3k") pbmc <- LoadData("pbmc3k", type = "pbmc3k.final") +pbmc <- UpdateSeuratObject(pbmc) # pretend that cells were originally assigned to one of two replicates (we assign randomly here) # if your cells do belong to multiple replicates, and you want to add this info to the Seurat object @@ -136,7 +138,7 @@ CellScatter(cluster.averages, cell1 = "CD8_T_rep1", cell2 = "CD8_T_rep2") DoHeatmap(cluster.averages, features = unlist(TopFeatures(pbmc[['pca']], balanced = TRUE)), size = 3, draw.lines = FALSE) ``` -```{r save.times, include = FALSE} +```{r save.times, include = TRUE} write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/interaction_vignette_times.csv") ``` diff --git a/vignettes/merge_vignette.Rmd b/vignettes/merge_vignette.Rmd index 2fd230f7d..4d1adca2d 100644 --- a/vignettes/merge_vignette.Rmd +++ b/vignettes/merge_vignette.Rmd @@ -26,11 +26,12 @@ knitr::opts_chunk$set( tidy.opts = list(width.cutoff = 95), message = FALSE, warning = FALSE, - time_it = TRUE + time_it = TRUE, + error = TRUE ) ``` -```{r, include = FALSE} +```{r, include = TRUE} options(SeuratData.repo.use = "http://satijalab04.nygenome.org") ``` @@ -96,7 +97,7 @@ GetAssayData(pbmc.combined)[1:10, 1:15] GetAssayData(pbmc.normalized)[1:10, 1:15] ``` -```{r save.times, include = FALSE} +```{r save.times, include = TRUE} write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/merge_vignette_times.csv") ``` diff --git a/vignettes/mixscape_vignette.Rmd b/vignettes/mixscape_vignette.Rmd index 4edf7fe62..34997ade4 100644 --- a/vignettes/mixscape_vignette.Rmd +++ b/vignettes/mixscape_vignette.Rmd @@ -23,7 +23,8 @@ knitr::knit_hooks$set(time_it = local({ knitr::opts_chunk$set( message = FALSE, warning = FALSE, - time_it = TRUE + time_it = TRUE, + error = TRUE ) options(SeuratData.repo.use = 'satijalab04.nygenome.org') ``` @@ -65,6 +66,7 @@ We use a 111 gRNA ECCITE-seq dataset generated from stimulated THP-1 cells that ```{r eccite.load} # Load object. eccite <- LoadData(ds = "thp1.eccite") +eccite <- UpdateSeuratObject(eccite) # Normalize protein. eccite <- NormalizeData( @@ -301,7 +303,7 @@ VlnPlot( theme(axis.text.x = element_text(angle = 0, hjust = 0.5), plot.title = element_text(size = 20), axis.text = element_text(size = 16)) ``` -```{r save.img, include=FALSE} +```{r save.img, include=TRUE} p <- VlnPlot(object = eccite, features = "adt_PDL1", idents = c("NT","JAK2","STAT1","IFNGR1","IFNGR2", "IRF1"), group.by = "gene", pt.size = 0.2, sort = T, split.by = "mixscape_class.global", cols = c("coral3","grey79","grey39")) +ggtitle("PD-L1 protein") +theme(axis.text.x = element_text(angle = 0, hjust = 0.5)) ggsave(filename = "../output/images/mixscape_vignette.jpg", height = 7, width = 12, plot = p, quality = 50) ``` @@ -360,7 +362,7 @@ p2 <- p+ p2 ``` -```{r save.times, include = FALSE} +```{r save.times, include = TRUE} write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/mixscape_vignette_times.csv") ``` diff --git a/vignettes/multimodal_reference_mapping.Rmd b/vignettes/multimodal_reference_mapping.Rmd index 2d0be4f07..0affbb33c 100644 --- a/vignettes/multimodal_reference_mapping.Rmd +++ b/vignettes/multimodal_reference_mapping.Rmd @@ -23,7 +23,8 @@ knitr::knit_hooks$set(time_it = local({ knitr::opts_chunk$set( message = FALSE, warning = FALSE, - time_it = TRUE + time_it = TRUE, + error = TRUE ) ``` @@ -53,7 +54,7 @@ library(ggplot2) library(patchwork) ``` -```{r, include = FALSE, cache=FALSE} +```{r, include = TRUE, cache=FALSE} options(SeuratData.repo.use = "http://satijalab04.nygenome.org") ``` @@ -78,6 +79,7 @@ To demonstrate mapping to this multimodal reference, we will use a dataset of 2, ```{r 3k.load} library(SeuratData) InstallData('pbmc3k') +pbmc3k <- UpdateSeuratObject(pbmc3k) ``` The reference was normalized using `SCTransform()`, so we use the same approach to normalize the query here. @@ -165,7 +167,7 @@ Each prediction is assigned a score between 0 and 1. FeaturePlot(pbmc3k, features = c("pDC", "CD16 Mono", "Treg"), reduction = "ref.umap", cols = c("lightgrey", "darkred"), ncol = 3) & theme(plot.title = element_text(size = 10)) ``` -```{r save.img, include=FALSE} +```{r save.img, include=TRUE} library(ggplot2) plot <- FeaturePlot(pbmc3k, features = "CD16 Mono", reduction = "ref.umap", cols = c("lightgrey", "darkred")) + ggtitle("CD16 Mono") + theme(plot.title = element_text(hjust = 0.5, size = 30)) + labs(color = "Prediction Score") + xlab("UMAP 1") + ylab("UMAP 2") + theme(axis.title = element_text(size = 18), legend.text = element_text(size = 18), legend.title = element_text(size = 25)) @@ -198,7 +200,7 @@ In our [manuscript](https://doi.org/10.1016/j.cell.2021.04.048), we map a query We emphasize that if users are attempting to map datasets where the underlying samples are not PBMC, or contain cell types that are not present in the reference, computing a 'de novo' visualization is an important step in interpreting their dataset. -```{r hiddendiet, include=FALSE} +```{r hiddendiet, include=TRUE} reference <- DietSeurat(reference, counts = FALSE, dimreducs = "spca") pbmc3k <- DietSeurat(pbmc3k, counts = FALSE, dimreducs = "ref.spca") ``` @@ -377,7 +379,7 @@ p5 <- FeaturePlot(hcabm40k, features = c("CD45RA", "CD16", "CD161"), reduction = p3 / p4 / p5 ``` -```{r save.times, include = FALSE} +```{r save.times, include = TRUE} write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/reference_mapping_times.csv") ``` diff --git a/vignettes/multimodal_vignette.Rmd b/vignettes/multimodal_vignette.Rmd index 5c4911f69..98fdd1c21 100644 --- a/vignettes/multimodal_vignette.Rmd +++ b/vignettes/multimodal_vignette.Rmd @@ -28,7 +28,8 @@ knitr::opts_chunk$set( message = FALSE, warning = FALSE, time_it = TRUE, - fig.width = 10 + fig.width = 10, + error = TRUE ) ``` @@ -204,13 +205,13 @@ plot3 <- FeatureScatter(pbmc10k, feature1 = 'adt_CD3', feature2 = 'CD3E', pt.siz (plot1 + plot2 + plot3) & NoLegend() ``` -```{r save.img, include = FALSE} +```{r save.img, include = TRUE} plot <- FeatureScatter(cbmc, feature1 = "adt_CD19", feature2 = "adt_CD3") + NoLegend() + theme(axis.title = element_text(size = 18), legend.text = element_text(size = 18)) ggsave(filename = "../output/images/citeseq_plot.jpg", height = 7, width = 12, plot = plot, quality = 50) ``` -```{r save.times, include = FALSE} +```{r save.times, include = TRUE} write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/multimodal_vignette_times.csv") ``` diff --git a/vignettes/pancreas_integrated_umap.jpg b/vignettes/pancreas_integrated_umap.jpg new file mode 100644 index 0000000000000000000000000000000000000000..560a2b1a68545d39ba7c582b6dd690b4ec2a5ade GIT binary patch literal 602847 zcmeFa1ymf-y64*j0zrcWg1cLA2_Cd@m&RQhcL|z6fZznT5L_B>oB+Yy-QC^&an8KC z>)x4bGjrFx^Uk5F7mM1vS9jH~t9SkD|9#)?r|G9d0Gf!2oslWP1n`&TX$~L?Ku1PF zLqS4ELq)^HK*z$R#mB|L!DXeOBBK4sF2K*r&ciJ%r6VsarXkMFqiCa~p>JevVJ;}= z;9+m#u48I${L})#c=Lt>4INe)10ab3_=Ev?Y6p-205Gp%{<{42t)BnDyn=;$4Ug~! z5efPE2~c#vD;QYVS8%YeU&FyY_j*6~1K==TW0J9oz+)*HB9J@0WAl&Bd_(c6ycJt{ zwh=DWJ59s+zinrWVN9#MI2(!qUpg z*~Qh(-NWe`RIo+1iswRd!Ob@%j+ zj*U-DPEF6uuB@)DZ)|RD@9dtOo}FJ@UR~eZKFtD9V3h%qfKLE`5a1l?FDvb@(3i&x zffoWV1YQWd5O^W*Lg0nK3xO8`F9co)ybyRH@Iv5)zzcyF0xtw!2)qz@A@FxZptdv^ z)R~zBG+<-~!H|0232zZNOzr7tv@;nij%JtPyk`PL2|oOE3a{O}C}CK|DbKf5)WX^E_~PS?01w7P*~9oZ@{SuPh5|K%lMOBKrj7w z1nnhU@==MU5I@Gn3nQi=RMZQJpo)59^xC%Rx@rFxF2SOEXM+qp*p!zX~fFa1BY6bvK>bY=1cm;wLW zBS^!Z-Sx${ht~U|-_=imNKM~)lHPx6D+I;oyyP>y5O^W*k3!&|3yObx1o_t##y@L| z&nd9@M*;RCs$K~E!xQ*7?@9i}5c01GjDIFNa-MHd{+{&L%dxx=cp>ob5O}dQ{^741 zFCy`U!2cqFf1xN|Y>k&8>kR2(g#07XmK?{uKf*w#L5#<$qDHy}ZH; zfqy6hFSf>isK#EN@Iv5)z`r2yVr%@}>WW`?VuEh5Tauek04Mu}8x{WP`=Jm0x=(ICc(d+2f6W^Wp?+UCZ|eK6pM<@Q8Z5#K5Gc&yN5$c_1^r{&>Wm(vC_d~DScu5*Fx+xB9EN%YnnAda-sOLE)2t} z&;m;*?2QTq#9p560o?$TbnJNcINKkp6IAb%#9tmS1pZG6XrzBuc>-wXJ^>nN9{cn> zP81$&Qy%v-?wCBbI)pCj?d~dk_n}MLzFoyahi^A@FKX^R+a8x6)&8kTc^u00f4Ubh z=kotV0DEZfc7N8pcW#c{b`~$G(-%g|Gd+i@_z!KIQ&`NO(A>|uT+=7kDdlJDsaOX) zYJ&~b@6k(qzo$1#OYc~hsGp89e1&8D!Xr?mw`f}HPo?D7#&e!i5Yi#(>qQh;o~$S} z2p|F$)(4)0ybIEww_ZhLkE`gky*f-+4aV}*^@Drl%s^`A;_r|P=W;F4El4oCP!^&Db*Xd^bC{RYY)y%~@%Gi4xL%=1C!jTzPjtO?5YP%k!U5~?cdv^r#vTddDb8N-Hx z;&cDs3s`a&wtuPB6juVn>pH~KAVvw`{a*SBe7TdkbM<9AO)qY_BJh5j8C!-G2|-z& zK~%EIYY&F;CjyFUbc^6cW|ltmFqJ05s-j$70Osle96R1)Yr*)G(BhF0i=HuCy z)bk;p{0mf(*Nq{_EI$6<CEXqo*QD zwcM_iFi3ZSn6Yh#8DhhG#!d?MK|2R=r5KnhgIGePO(Lt|>Z_v`5J>iEkM6#tUM)%Y z*hNauPm>7hndlfvP=;dyU34u#>Qd-2OZnPKliMMYc~gZ)h}xY|s52vb>1Nv~UDy<1 zV@UlSG-FkWAUC`q1(!*1_{7iD%=@Q_Iqa9|@hOc}#LrNQF5CUAxXl)u zaX(eHe{?FE0K>0Q7hI?f7xFd?BJJQ?MUhsL#3Ghn>JSsXpz5l<=?enfk`*}0FPXb< zKM8&O0-CaR=24cJ!No(vKsQYxvZSevSP+T2*hWxMvPy2sjm2yli_%$9lK>Vtr-6pF zs_x`|qzn@kziCz37Dt~K=XscWu!1*+N3{l4HsaIhj6rS$rLnGFJXnBiA*5RXk&RnDcz<3bsbQExjHA;ov`X zk8M=(rQ3h&jpY?m=;O4SQ%M9h5>_?HckTKPffzmxsW(=p_EuCS-Sv@C;`heEwI^(Z z^a$*xT_%COiD!&qDx-D*hSwA0C*s{&nA->u;} zjS>vXf&?NKmE$5UyBy#7vTDZ3L#BROC`sZ(+majgYPd>u(ZuRXBnQW1z)=RU$L?A{ zkf1M8)lZJppQk;{K^o z70M*@U$8STn&!U~00PCGPOmE#n8Pe#`AL_%31SZso&b6kOWGl3v*qj(Xxv-3O4_@- z+eGqeK_VB}nVf$_t;foH=?~Xx^Gj%Cz#i1Gd`dEe8hoqQMqK4O9==VBSgu|M$(zia zYUBDT_F+x9T<+(bN!eg?<@y*3Kp6cYf~%MR130BB^!*v}a%}|XP&cn}6yWj+u%|2> zU48kZOdRjVGj>NCy9vA+Yy-ietS~)odGkx2_0R#NxX0Q75U-)ILxOdPgkYQAYk)6J z98iLk?Tv>P?yUsmFXt4?Fe>VM8YoEqNS^JfUhu(<+wdnCeI@E`F^8|sAOt$(>vrUm zcUxD(sYFoNF5jIC-m8K63{mq<_`VQp%LiA8{REG2oQao+L|o^5y+Ua-|&?nc6t|f212GS)%n(IJW55;8<$@TJ^>!A zc5E6A?Yw?L4;mRO83Wj{GV0sSc@J4obuv^`E7{3SYiwq{!DS;Z!7eH4@l+-ze7Xdw z_z@(SbMrF`l0_BTT^ms*vZrrxVPn(5{C#LefsO47@B1S%`rZwpYikY|nKb|S6Malc zB;jS;3$M3EFG(aM4g5vUHd1St`ih71&rGYGxv!1;6vjM= zau+wBG-5f9V(uKWr>lp`w#v{(y&>A$DZ(hgz~6ofFWwu&njSa1R}TrWI3uZ&B#X8W zjX*Tkt@+j{YFXMrF^3=-?03?vl>TjA4TAK?-G!A}GQQ9a-E*tv=Db+p4Nm>I>s(```cWjMPXJIgCy{)gObynO4kK%kWhIv{kcsTn%9a#pgb8I}6V zV`vx++Iu~u>v7MAJ^0-jgh(~RSdNKwGJJ*i1Q4)FyJ!U)3=8$jBN7#5oJf?6ZbuL0 z%wr+u)c3Yt~j@0i+=lD%tJp}ak81@RgjYyYB4D>LkJ1Onxm+;;)^V^Vk^}71Vli( z>5!kj3@_I2vJt4tVyfT`>GY{R(9*|>Ie&=$!CQgkX0uHq@B~212h%HHLYH@ecFVt}b=S@Os z6i1d*lfSj%Bp|4812=+4q-%|XNN;_}t2j%M!Kfy`-e4;0$7vAwU2OhJpWoTQ-L>Z@+;L3eAJf()+JJXxMOjyiF_;t_EsYAWmpoX=a63}v@m*ip3#JEvQJ4mj) zp-}(}v0l6e%b$2j7SAjK5mEtB)`L=Ox$gYNP0pC04o|V|4L6t7x;+R|S9=a8pLW9c z0TP~>)aWkRWA~B_+465PWkJ2uB=ltd>3OL!KG~Bb_mo&7LJ9AH>trn|SHTxU5cTTG zCAk?e81o?>Q{RtDsgZuX-P=v5728)n-zIw8X-@*LrlM5CFveVWcHM5UrWc)D+Vu zr)6`9kJon~{C1J(`VksPT;z@KVYI6rX>UxxOD}a<&_hjuBR?cR(XI{ljB8v~YfKtk z?$GUnzn!vM7UEW{k3W_1j{d}zwH2^6wk?!csKEv_tkBd(YY#N~==lWrxUCR+RC3TJ zZe6`A&)R?eHH0R8~@;pFDBB#%Lj;XuWhVd8j|M4*FlpLF^Cqi1? z-EEO^tBR5DgoTTA&}_Jm*Saj?py^ki@wxPWPs}b=&e|C_{oErXAF`tM~iot-OIq z1%=M?=CYx*$0`r4^P6&U?sibzMA|Ayn@{we1#3<7n(U-4B~Y-pnUM_8byW}}5z{%+ zof#eUMqr1grfG^p+j874p4vbo_hj!@jX{nnr`iKD=VUu;e17kXpOd>YtU0d(Z7|^l zz z!uZjnus(b-7A=-&B5-VhC!z0$&0JW3Nb5ld*4$i4_e%8aCqy%bz+#zF+?^5fs#h{$ zsVuOzOM_X%*$M5FSF;Q-?@Yzp6~gungj+8VI6*ErrG#yaGqW0#b>1>63HptW#;4=7 z82MYc&jRVAOLN+ z6)J@obfg3ll=FIKD2YFR*}7#pLLRV{;RM3}5e3rp0ETSDm9b7oTuf-`Xt}$|xm4B% zoo-6>l^H@E_TNgkPTHLB&AEJ2h>;MUyT5v5e$#?W=%iH!q%^q+trKVntq}<93aP5i zuC3LiF#gRM&b|x7D|nH%l5{?YDzQ1GEjKKit|z@AGU(_Pk&3aWyuer?775b^o8h^- z__j=60qhc>c)+uBQIglbh&SrHLdcCn&8O;z7dOR73FkbdlFllV%i)ChdGFhMfVnX< zJVIWRKEBJ>Ta!`bFDNH(Szgtt6Z>5WFeDt;1php~e_wj(7Qd_ZicO)fb=&J|Xl1s_ zhFN{agb(P*bDn+t2t^CWp7UU$YWIWVy>Z|76&lUA0IBTF*3UA2N4sxe?P(!$I8vJX zV@v(!jn1i6dUPIFX#rDxeOCix`hc9=;K~v5f}Cr@`;jUPvDSdHHeRCgX$|cn_pq5oXd?^5 z&1ZJjqiy|o z$>NHHu{3w_OK8Lhg@0B&C6Y$!RjHsyDTkc>7dkc(8XkU@TTcwVrnY?2bz%|h@`9c6 zUB$x6cP6~*uso$A6CBe}DRlN9rW=G#la0Uh;lTQ{;ehYXrq^|hTx&w^XBmGgO3U-o zD&0t!g+d#zl!lKu?waXMUu&ktz_z^wEuA!fJF>N9x*$L=A&s>5i^?{LZAfqOxK*(^ z%pN~jj~)^EYIu44W-EC-8C6oNKH#zkYX~|E33z8t8MJkAi`02XH1GsSup}(&qHHHK z8|2MrQZlI%8{{T@7r=WUl0T^)#Zw-=}O^*)nHIj6q)IIiB|?_YqJ{a z)tT7BqD|#!5B95{XtO%vfkyV6v2PHbvM((*5j8dx$$s4Ajg1rhs`g-~tj*vYCPr!L zzfud(a3}(k|9K;;6yzbH$_;qy`6uO16mIq@jRVcGT_?iStIDcA3n+u7G!8JzKftl^ zd4~!f@{r@@p1;oqZxICc5>7~hF5C}(X+nI4i)ZX;7&XR1)0`WE1vz0dvaCQ*7F1Iy0d`q$Gt|Tm7{$*dKqv6a6Rq2jDOrtnA5o0oQxeg zW|%0>-j42vZ|8RV5P_AnNlPl5Q8&HBE&;2J-~QQiI^Tnfjj+%}G18i4&+qAWz;HHo zL7k{#%t0b%rcX4~Bv-P!6(k*+j&FXEEIfoy=m@}n+^pVys zMIs}6CuMMr=s1ZYIBf(9A8G-i5#h{^LFx1iaPN)o=4@E)F|3-*Tm6JoRQ^UvB_ipb z{d%1H(qV4FLwy~t<nB;b=J2KA~J@WK3qCJq}iej+E3FW;N4CM6kCnzZ7b`yG8Q2Ztlyx(Bh(4B5;e)$ zx39-I4E^l*Si9;7yeQQ^CTc47AqZGqTgau{2zcD@j?DlQ6G@ZKBc zCqu2baF9b0)b7h**`f-jG^>|}YXA73qICb=B75;0UkLoC3B1JI|I?`c?|k8=Z1M*J z?AiSnqa!A0@LC3_9F(r(@AVM%gpC&ZwIx`$`KI{mo4Ql^Sm@GK2#w;4_a510bZL9? zvb-}>NzXI_afP@M(la3XHhOdvg2H5HBVBf&t`n{TuGm~e$S_C53M@;v9^%lBY9j>% zxT>9G!|866wi?+FbhY8>h9mSAH(QjvoIEqF)5LrKI?-Oj)7zuOO?uMKg>K{QD1K&? z|0c1;F#W6NS%(`2xj3%WIDIJq#*aZmJm?K$IozuEK)l2J6F_>m%3>v=Khf4_RQEf7 z((Xj~s`n=^Tr!+4V`+JPyiV5Vh<>T&TyX(U*te*(c49Qz4Ds^X25z=?8Is1j7Mk%X z?tI8raH9|h+q^_6n7p@?0Zx4-Xw=g@o$k_6#?9=6RgsLefFH$?RrOva_n}fkGc$&# zF&-sznm!5!cgVJC3Bg^ni}KX!7ea)4HRCIB?>5rI8wG2J(1GzB){hbd;G$RkjR$K_ zfKnYJ>?xAw2>SfoJYBiVUw^-sv6@XOnH0u+sS?=17_TXJXC_)5 zN!rRlT@k?R79pkK(Lu8aQYM2lZJswxzJQOToxe@|fxI-Zix~w2PKE~h3RS>Rhm9{` zZxs>rb2KJDQxt1uHpFlyvy&t-P`q78H3<-CD!k|io1BPKs~;oW|?Ow zW0pO_s~*PYjb|(2>k3d=tB$J5t4xtM%HUv9j~${FgN-q7GKGCXeOSHaKTuDj!;A`< z8X?wK$OO320hMch^7cZa3iI4HLp+0Y{+a~_Xb37hx?ODq!Vqt7MpF91U(yTp@v!F* zA|~N8DnmbbSdz*5Cjemf{z^mcy)Xu)P=xRUgVj|>81w*g+K42^m;b+Nyj!U4~k34k}ohq#ojI&paCxK>7AVL=F97={0QHwfb5EAy3GP6 zH|cESv`q{iHer8Wl(LRp$jlwc^U$C=aJIK#`drauoXwVjV4WP1aTpH<9A`g-x{wI+ z0gc})FeakRps;z|GnAyP%wt&vt(0bS_0NTg3tD^H4xK|-bI5DkG8kREgwm|^+(kZc zS~71oIuSjL=LH#-Mdm>+l(&jzp$)Hmw!sk2HnW_nni#ubVQ^6c}(mQW3pH4(;aai(q z>j?I?h@a_gleuFq-t6sMV_lZqiLBLPq(uN9sIfwIg^U(y4P24;{ll7)yXTrr{KA;5 zqibrdy{NaGeHLi!d`Rf|8Fyr(KN4O{$6FkNZ7;vspNS{d^wg_ z=xvu$ql{VnF78z(+;X)?_n||q+}KE7gsXwh&)(QQohLvr2QrYUbDR;qakpIYx|k-2}Ylsvo~iyVeyfEe?~l9gy#;A&Jf zVgB^v9;{SE4D-6F;cgyhmwT>EbYV0FOEAD_E6h0dX0r^Mn{trNYT$>Op0ULNr3q@3 zd&u*q4)?&9{jG&ew&e~R)pBt(1l1d3UbL9R>s+fTIJzj-ws9m_aNLrnjS+i~v=%iD zYcEu~0g6vYAF;lG@j@-Yq?-@biBs8qtt~@~DYu&%A<2&vv(yICg>Kh}&VN&>6ERX# z$j9#6tv&-S^(VkGYgT(6!d;X&p5l?!yCgghgHBV6P#wQK(&%k?Cq;KxgwP#p<{Ye@ z8d-H2a|8>TOnuQ*rw%!staS=R^d1~u*^4It(x!@Nifm0%c9LzeCcBK81^}&2B|R%r z+N^F_`+R)vj(Q$waZS(Pg5vY@kfTo~t~s#_#f$J1z1sbDig8w_X4XGazUEIe170kj z6Z1Q$oFH2gJIomMC1biVD zs!-Se!2bFhbmU%k>2Hw0OOE~DQV@tPMk-ojRH{`;0w@s3Iin=Q3M1S@9XeD!b-tH` zhhvi>!W(4+hn7cO*j6yxzkwr{?Pd#;3nUi{Tz3&uis`q1EofsdA2B^>XUO^`K&^)G z0fAFgKXJ&Y08C%@;TuvLPuFR&KBnyHPB)WgMdeByOdksIf>ep^p*Q)hdFUH_L`rMM zCqP^36_I~&3ht?_({FA7`g3S?s~HL!IVe3?dv&_}(dJJso9Y9Y1T;$g{*G!aEDHC= z{9asvpG`}l{4&1NeMiNr-r5tAH9og7-)UD5B zD!TZ3QH;?5gz5KF*|YOZ1KAq%Z-9zC2xSzUnX62?`#3zm!nLvys5*s7qd% zT?@`{NWrx$^=xS~vvbFSK}9k@w=Y3o?dQb?2wURJKEp(G-Ge2?$N71w57?z+mg-pH zNQDa4KQBRSW;#p>BS`tO4Ffb|q|P@>ElPhfR)BVf2&9%gq-g9fhxGrHfA)pwS?ZgX zm$jq!2ByVH_Opj)jVY77%9YQ5-$71$#m2=Eyhtnn&QbmamT!K`Z9KZJBeQv zZcAc-iIJ9lk^ym=CMZv5pR9iuzbPkRicUVy27j7#jWEUoaaf*X(mSD+)Ba2gaE}wT z`%HL}?xh|cqc!m1N9@NaHqjDp&*dqB0*k(4Fe24Le;KVXoE#1tf-WSZYqEhan0p!s zMm+r@X8cL$TiDy)EJ4Zz)-Yil*wdh)egp`n`*+W#D*NDr85FCQ`(9Q*-`j~zkiwUX z{EKe5>dkjSYOX2vvjE=i=L``pmG6PQ2S|>fa_1vdtAe6W#8{Z(q-F^exCp$&(48Gq zOQJR%NidT9lBZf2ypYNy^s5DKnK+8qTJllMfZMt*U)uR+9zge}UIrY0c}Lo)um*@M7#*Fm`BI2ZM0Aw>xziBw*3y2-8yWX8-L;C3xz}ouc28>^3yBy=tWeA$Y zi4opEHNa$n2Mhi2e7%_Dz%;NqZYCG%UpY^h>P`2-hq%?Dh;v+H00r%CmNbcUixM_t z2S}Y*N@uW6$4l`}$DNCplidY-P8`pBd!G{;m_1%KuCC(!jEi3(q8#G;MqT$O$FqP$DYOoYqBEC`%Q(!n_yMfZ z7}3Ls{H^M9OxVvT$;@3Lqy`%YvKp51dB5W~J36HiA4-_+zdKWOC&_P66u`OdGT159 zR35LZ*Hp7M)@ZoyQSW_;#;@T47-#reK?VNoJZc4U1Icas&S8&>9tlPm_cJ4INm#Sf zN!$7<*5S-yVJ`|bJkvA1ARMQQGWhhc@057O^}76g2d#>4!`oW~Prvcp$OH1e^X8sV zmb$o6h($BBVe$&IN$~>C!hEVqPIY8K5+5kiwIWs2@Y1!o*>PM2fP31ib44^i&5`;~4GB#;W=%cXItGt|!1np;I?S9?4+C2ee%$ zD|G3~*aF4DhXU|JYMdCfG z+Q9+99;AWs(8c5Xv7wx-dCY+r?399c&>WES{~a^!13B0)1e>?8APFKihxg3zk ztZsWuQks1yHIYH#Hc z^V7KYqK4PwM9q3&xVifHbwcMZPYCyS=PZ8vfKdt_lnwhr0gt^w`dJymZ;0gOo$kE( zq0!?Q+V%_DgL8)IbZG2f?cFa{i>x$q2hkNjA4TN}2<#l^UhcINH)DZOvo%ZOQ*u-2 zzwyj*}`WxyVRwy4`j5OQx80Ede;iHW}?_7q$xFo{$;yNC|pPThI?}3wc zKi{KTCYs-j%I(5`bRP72u28I`iNnqmR*l$afE-U8=C~2Y>dnOxwRuGM=GY_18uJp- z4gIoAaLfEqnk#`^h8tzAKBU|gBLs8tYZ}zKs5)A`>QE=y`*t#W@-r_^xjRkz$}poB zwvwbyY>a|l6T=6MFCqc46(trJf1x6SY4ARAmL<9W&;&*|Z`5paXImYn$-x-cJ6 zCeBzIR{LCXW~C{<80&ynUz7eWHm!nAw=Gv#z(FpZ=Jc}6bcliiujp2Mpfj+bO<)|e zi@=1fKCXKwI^#JM5TA7P(~C{(>J`$h+dX|#;t$=f^)HRwR_FOS5f+nDX}Y*3Y^eeC zX`|YDIzT5U6uC{I#C6oVLyvdi4ixivxa;vfdJ__Rz|j)J7~p{c{f+Anz}pfxH1OD( zSSUvXCsOdM!U#XKfci4Pb1Z^S?OZ&9Q%o^G3xMXQCLk$SJ+7qg!mDyxk&2;1AphO< zh|tuC>56?=0|Q(r5Pn_(%AMA&3%>OYS8o`hz<<3F7_XEY?q}q|6tQ()5=!)1U6eO1 zn@=G=$034max*KlC!g{R=xh@Givj6i_B}<|)DKIR#kNvyNjn0_yb}sFFC4Mc!JZmO zpANrGtyiuGkY)%bRdDp+bH z{t#f0HoD`PICFAXs_Hx&HERzZ%_n@RfRK#O6eNW^9H(3g?$ z#`|~DIb?PhZ1op6jUIXt{yXz+oPCNPFB~DdcOgNFMM>5S{kjXB%R3cojys!=LL3> z9&fzQ@U7xHC;`7vlqoX7hT^NnQoK&;E6JOCaC%L3x%R~m;TGH8@Ojw1(B)#za9(JY zS-DAIHjJ-FS{^^4@p;KM3#Fe3MJd#ob149k6kTdhYJuT>`{}$S7(KY6vtKms0pYAz z2wTPhyZ951X?Wr{hOS}Xk2pyy9=4RBrqM~il)2*fz5H)g_~|&?qXsM3Iiw~2%r9TT zo)?0022lrMYvIuzH7K1g5>t2-RqMl*Q+g^ z<3&0ufQH67z)aHaxeejjtYNhC!uN%bSy|8ZsJKxaW~_m%%Lbm_bW`yddi00A6~{;u z%bBY5fS)KZ5w9sf;-q1heVwCB^Knzu&+u6ac5R(K}>Q#h79W;|L55Sz%1g7T}bfH&a~OyY{mAh>{Kp0 zne1!XHOf?^p2&8bGue?Y!mrB*G`O5Y3)j_?Z*nYFqI)A_d0-;XOVmHS+f88Y;Emxq zWv}44@zqH#;)1xE-Dr{`{3HVF%hbjk$<@>*nB6GyPP4#!sY`}(ILAKZtOMOciDu5I`dl{wrpRcSU3Ms5CXN-hfvTPQD(xXZlzyPhg%fb)}Z8h zZCUa=jNPi(YJGW40sEZyX1NSugDZQ#Uu+&#@g;_(iB(&jm}u;xVr0m}0d?~zmXa;5 z%P?|JgJb&O5UZD2+4_86TxCsZZrV2 zFM9`Cy)gW#NR__)$08}(X0=HX6Np(CL4`Vv$+2sgm73lfC-0r7*`i9ThhStyq4+Iy=pFC%**V+vpuY3V z$i&Xb*3{i`BUWMSv-_aj{HMhi^sNK`_3tK=5RNB+xx40%1Uy&)5#n!=y*0K6ldCLv z)p6Tay1tU%G@#<~`c!MYK-MVsaSWwv{We8+A4K8Qc^N_OIvo@=KQwQ!VDA$^A!vE7 z_%7SH#&w=fFfM6jWAr@9q*}X@V+=j_Q=;fT1IoRiZ}q|BsNMd<%xRgM@!ju?$Q?zi z>b<-_w(tZ@Pzk=f2_k*5y0L@q$C0>wWWO>TAsu3e2)ad~DJUwNwzf7NYW>IemhT)r zI$;hX_hg}y%(g3hp=2ueu-MvaClc8s*-9T<=c|n2?wpj}jXRVDle07yM-Tt>zWdy_ z>kArc5N3vp>P%^!pV?Lz%h4RIT9CLM_#n%2#Hgh;h?0K2^T;_PN#2KFkKWrx8nIe$ zLw{0g9k2C{VZ8`ujJ&>yc~_=^<&3&VE%f7Es6G>S!Yns2Vi!kgt`?9t>0l%^HT9fk zXEgde1(6x$$SRy{J$Cq!h#~D{JYcTvz`k5!lra>RTMh0kwA9-ejGE%1DDgGA8<$fC zl=z&QKCdZ?$(`5`^fBxS5PdzxzJlC7?sDh;RhA2u0@KsL;dV8DC%*3Yr!F!dTY4sn z_LJSI@;Onf?G70}b`)j-0gn#1*@7B;>nDH}4h>Z{SpoGwAq@oR$k8kp&9CKJ`_t2jA+0mR349``b(;*2F+^Pr(M3GimWR8eK`MnkG%}r ziIC!jA)@XO++Q|BL_5WWyJ~*)5^n@L3_;hN=A)bA2kH6(YRR~O<+5#&$MmXdbqHrh zMr@{kN}Vkq7Tx<)|-|M}+yfr*1XKP!~4B+ZwTk?6@W9H?+5{Ub}^dsGHhe z^OSsKNGSp)ttM-lZg*%F+=D0Y@$z+deTaj+=BpZ#V57sAiJQlcA>X+z4@7>z4-rN{ zw}0a`WGIbL#2njM$IZ7L`7hnys&B{_=4YzDhW!bPUHtr?+9&^Em{5?*$Cqq@7Xtrh z1PakDl1&>S9O^i$>PDTP{Hhi76gWSjV#(IT<%a&5m+sf>R>bMq>)Ff~`DGb@S~a4n z9isIe59!s6fCv8+!gE-oN6qf`=)tEhNnT?{DgPA3qM=X@D9hN+nY$CG8n`&TwNh+7Ql41+RV0 zABrG)R#RC6F%-2pgdSnQp_MN;QyHIyDjz2%OF6+Kn>`rw$Y}hXlv~HG_j@DR;Z)&S z(V|(e@H9;@w!#0=_JFhRb)Fy@&>;M~hkkFX(4RKC>j2^Vz5mtrF^1>$uZr(Q`2LLu z-y}vbqXL2Vlycs-WlU$K*on}B3%yTJon5ec!h2#OCKxKiC?I{rE(}Q~=@k8}m!;co z4EhdvS_Fcx0>tAvd+)rUYWZFD^-w=9iixK;IBslBM(WozY~f1Te9-e;8!A*((_3E- zbkh?IvD0IT8Zm#5CM`AaM~{1|5&7s}OS#n9a&&L}@G!p8_Ay0bu}4r^3N5!JEJBy$ zOEgd2B5z#XF#X*W+w*}B;I$qlAmD8!fXa#NnwIqzMfpSg{!Wxa8NYeyXgpAvTOHS2 zPh+;^+E_0A@nM3bSL~Bk?-o%lLh~tvM>gLSd{-25km;YwPBegr3-Ah%!o50B2vw-2 z{LTdwqtY&6$Z@yjK>R(zdC)Nsnn}spM~B;Tg}_rJh}T?IKLsJjWGkmi*f@lDP*X^; z7VGm<@*nixvHh%TOer02eZU?a-sjg0+I>!N==r?kU=eBxi^c|HC#nsCe9^^@2jrXt ze6+$p}*@?aIZ>RcU|n?Uyd2%X(ut4t(oFacy+WG^;D*q+lE zkM~IWW{9Uly~4XPFXeQumM`&Qrxo`8qQvubb*eq#yCG6z{5E2YsgAYnG|kR!@@VC8 zdWvb*{E(d!RT~?QmOKw^D$KPLXmm!KD}ynFdZotRH_xg$%R>E@YfCFTZrWykD-drR zVMCRJp|rxP>~~A+P`r0#r?{DArN{g7>V>)If=}qKP{_z4v?I%Jvz^ASp0a1q7sdDl zqN^#m&VS?n01iFb)y0FPpxC^+{OgnyC_YfGFAwE5%LJ1mw23Bjuh5oLO(O5zL$`7< zfCZgJgFcsh$t~4dkKL`X?;imlAF;nJgCQb~K+I>Cn_ztq8Ix;Q72yhVmcJX!DPvmzUBI^>EVi<~u*=%_&A_efQ+S zYm4|*xHCIYl0_kjb8gQLZNVd+h+EYqxO0>8Vx@_{`;tZM|E;tJ1a(1}C8J9v{y9>z zlQBCBoRS$7U5C^uwkEZd3}lbOE5GeNpfnw2~6${B#F|Hb((z4VH~*9iHp9u5JZ# zGf86%dY6qI2~E6)C2`*kNc04ST~^D4!2}X@dgNNE%W`Ny%XJJ;Xm=$X#W@^$meJ~p zBj0K*NalM$2)ghKP&6?ztU8Z3(&j;tsV9v}^idnFI(NPq(s`B;%Y-Q=JFpi@<%eSi zN1ycOK<5yCWmeT_8MKxu5u3tk-N9k)g$~Lu*J{lP5d8wHjdx0lIx}+%fp=nb)Qc%W z4oWg`Dr*Db`b2EB1xh#^Z$hzE4FD{!r3$e>hrFVvE5&+^qsetDNwyYo;hXWBmP#hO2 z3xMR!^ddq3mJ$vBHm5W$}#o?STAgTa7%as2wVNjKAFYPBK?!%4?#%q-<=l*T1l>Vp8(;makCnhdf?TOjn5+GF1sN3 zdpF;UqJhsK{?cGB<|Y~8rHG?Ps7L8{$+#2vj;R6*f|G^k$=6Dy|FY&EQl8puB;ZX4 zmhbs1%>&ox`k2|vk&UNbW!1*^)WqwU8{u@7ynZfhuB3Vv$ROXPvnN3N6wDI71hK-I_O^kytL(hQxBZdQ4ui9jGr#5(7LCN!DbjG~ zxv(JQUTTpXYz9`cTZbnU+}|Q@8*TEOdlWEy0@(Y~U;NXrpHD%W*Ct7>5Kfko*O;Wh zu9s7nlg~L|bx{(zj2l26!q_1__d>4vA_W%ZEHI&g3-;je(!yyl&gJb7oXy3JONzGG z0Qrv@`3JgbGm-m;F}TOOs*GO^D?Y0KC=5%q&vr!}<7{vyx4@&qh-&ep>_of=cNj-1 zaqG|gl#V%^Hs-gv82b6Zs&n%dQOXp?e~aI!afJE8WiXy53EmYqq;q6xTS$6pw3vG2 z==W$1D@RY93XT-sa=P#kw)#qYsBstGR`k$@_9EeNCFQAHLA$UXg|I!r_>?>+n6HU$ zO1ECqmFIK=c9Vw4k8o{gUR6C_w2<2z0>V^|UcG8%xbjRNqGbQNr|Tue*^5c9J&w-7 zjs{_PZ8GQB?VF9y;%r7g3XQepJ!y2$eK^hn#=P3PG;Vi$bSM@H)ga&BX61?XnpFRYGoRbr)PpLGKZQjF$ zc{}W=!RKHk*pAikEatsY^eIP1z$JCO`OJa01Gw>J1%%(FNe;D!ma z24~|eIZ^3Q6OuNZ zntm8uJWn=f+AD)%G->;eXv1l-p7&s-Ua^w-LM7F_N&7fEKEX4*-Ol#)K&^&4+(GN* zzHev2d=j~svm=Z^E$IU|VTx0`)&;ScqL!Q!sihRPdt1&Gfd|`8`Ob7O(%gYdx0+-C+olqJI=siwiqdxM+?>a0y=|>p zKQE#A{c1v+A@}@8VbglCejWR_@=XYKlQDzZT5Iee+}^s4C}Wz%!T}c-zQjXnYra>a z-Xlks^2#s^$5aO)gsxF)e06 z*p$-5mN+gpPn7bOb318<=?B6m35k`RPZzn@u2NYOSp9>V*s{(&7rE}X&zrFW-Hb@N zA5Ct@_R32p|Jj(Bi|M2zx3I81o)#9vSh)Tg?qmpnv|@YoAt;Xm&r~a}8wqhO9q{gO zVycHh)MV`AMhshrWIkSptE24j498)eb65lI1?;mY4-~38Pw4{#ha1bQ4C5+bC^9>o z6!$d7bb@n8v%Aa|YCGN6njBFf`n#{R^;zH4!m|KiE%{yc^n6z`kijh1L9<#1>5F_G zu#I67Z-~B?c&bAo*?W#TKk`*#m}|I0(7 zFE98HPXK!}`Ptml_$PDE@~<|a?|-ua|A)T68h=`gWPbD4zKS6dn_#zD0eLnCzIGOZ zjJLZOw9nH--$@p9ikrI!!8)xxe4-*qf-G5F@N}1Gueh{;`S$EqfjA2N#!mp#|5VFv zL*Zf30B4NL+mQI)_aM8y|7KGHryw+XRcW4M{K39kKYXvcb3LAmWrk><07ucXr)1Cd3rbIbCGy6* z)vp#YI}I2b4Al0l53^@Sp%}kpa`(k`ij!KpuE0aTX(d9KQ~n?J-a07Gcg+@U0s#^% z!GcS$puxS7;1D3VySuvvf;Vmn8r&O~#=UW;fyOZA=g+G*cGIZ5lRsyR5DIn0vgwg8p=-DCqOL zt8NC2zxO$TBai$$xoqh3o=r}Rg_bvlYpK6kUVqT7O^A@$TPYW$TGX?bD+GnC-Zbno6rFC=IoKVW zO_``EkChWtI=7xZ#I7r8>a1d6Y5v&sNTbJtbKFF}a1?681eS3qP8{#Fi#Ij8Mi6*~ zG$DB-uAY{E6#8IFX32!tZu-j)TCdkKS6;C(1WEY3ISdE_x@0A-5_u}`>>f_-y+MiL zVwgnkI>=nQx_o$PFK=CetD*({{sRHvb~S#W+0Ha|T(4s1XIyCc@RJ)ePjHt!c2A>~ zX1%dyaw|+NLT;Gu`+GV8Uc};)%j|VuM%nBO`Tlj=4K(!fIziAyefGzR5v-XY1;z~Vd(-m5@G5wNUTxUxY)%N8>8L`?K zk@j+1>Edl^2h+C(j{TSR2Ky`Gqv%SZj6L{*8V2uEqmxYHVrB88Fun=O zuHX^5h|WCRa{k@p!8>NH$H18GfMu<`m?O6RbQGo%=n{GM8@&6iI!2us0z9`WkKASA z(5=rZ@iXc;Z2U;}q=_ji)OrIxS^rijIU^>H<_j_hD(Yihl--#s$%1P>#k@np*WG9~ zKuT`)*>rn7vQ_rOlsH20Y}9RqqA6?oQ(IF#d3((JVW?t2Plfi((bPt_^t>rL36_b@ z6BVc-*sa=mAU&0|%4W#cErN8(U}>1rB<`He)_YEv34pL&OmK+Im~tA^XlG~Ed74ph^ zhpxkA#nV1LIJ^eR$X3i`YLIX#s+@r?lmc_Kh@%c4wB}}v;b(DZwx>rwQSaSj9S_It z>5?p!gLUW@P0#{wC#sy1 zdg22l^}6O%;yEOM0>YguxK_DidQ-i&<#_}hKnHl_SAJQV<{pgE)~)-PH>Pw!f%f&# zWup#zSvPLoN>qGQTlhvcFeS~q+N^&vLF#-I2XWw`df7>4m zJd1FV)r2lsybw=|E7jn$ySl2C*~^`w zw#D7Xoq?WfelD@F+C%2yWyU(MSBVMyL|YHUX3p@b5hEjXYmeHGgi!Jrn;AyqHsr_; z;DCPq%Vx}Ul9b6ZMUF$|O-j!}F74~J#opI}S~1Vs9iE7*2+k;6&$rrII@T$_THU|_ zQfGaSv-yL9U+R8IE&!fsaixPk3%Rmk5>0r);M#Sc#%WfgOIbnpGJ}1XJ)tB=#aX0fHTm}Aa+y3Vj{vhyA2sCA? zY@l4b7r9tG{@C%t#$DarbQjpPbK)=R#X3Av*crd)7&)h=?-=ht^yr6gxG9E{FTgWL zv#%|FY)PvGWJk}RMAr}K{0<&G)0z_+ZebU=6*3n(kB^?ec_tB7d&v|d#~{~w3-POi z7i>34AbV&>fiwOS#ksG#+j!0KNX(PskJ#3H6L;VvW8K7q+hcWEm(gS9*6m_@fUrAj z{v{mv?;fu@&l5W^s7aU?5wWBaMW)oW{ z0nkDbOAxtx9o1u`%?D<|wTmJ)wO@95WKhJ~fAq~c`T;(&4S}gx|XH0MSVQx7)wxb8xZO5?C_q1nB z@6WXO+X&LvJ{gZP?LFI+9MYi=1P1G*r8=X=(=w)YsK-ho-G2d4p}4JN)-z5E2KgSw z1Y1k&gh}QVP|}p2@hhk%iH<|HA4XBWDjVqfC5&lxAAf+n$nSvM(Aw*WET}UMf)+uj8mW=x}@z?w932S+Sc^evQdYQv1w&7o!&P@3fEB3 zdJe3-c^Lan@u(=KV&!kwHz`u^q&$yU@TI8(=7wq(3ez%h*P3D*TUqmXU<{quY_FRU z{QctyE||B=;6(yb+oL<5IW#5=WycU($6!D9ld#Vd(q(O+rjBoU%=TVsm{pN(mV;ahFY;?IGP&k_%al02jM9>udGSVZ8#9k^{PA@e}2`P$oI?qb92***#s*g!OgQ9 zm(*!YQCWF3K@3uK(DC;;f_p;}7Rjvh1n}DB9s2%OGKz8joc_OXI?VZczfnNQc z@Hav1$;Hp;dm&l`ZtnKdWIeoh%KBD3uUr$xaQpuP_*mVy2i0@Q6YWY&PcFxQU8~6g zDax)N3hI^5*M%L)JJ+D<I4t<9J4L)RMg4`yu?f(*{UKA;IA+HrIEPLF8?9|TO}g}(P54=@2d`s zdj~f3fv^!*7e@~x1X5QWifq^S4(WbsHnLhvh<3_Ad(yl%QX~PJ5Y2S}h#}_oDhpY} z(K~W%u5M?R6T+P57$a>)S14ton;O>2QS9mORd2ISa86}JV!hai)2|?)?!7o@9_#YH zgbh@BwNur~v7X+7A$8=@f-8wCf#O7cmNq~6AnN5*B8AbVA)~3kq@}9|`xHO*SQ)qQ;wic@L?8kSnUjHuL=()GAI9 z`@|sOg>pOPlh4gF6q6@Z7kUHso~Tao<0Vh^qf#V}C7OBS7@(ss9Fd(&Iwf@bHj*nt zu$e?V2V80pIrEF%gUCHW`XVT2|5?Tke&=Tos4lE9!n#Jfz35Zs02R$6W4?TdX|HNPlDcQb{D za`B?HuVC*~Zl}k@jwE|kT9tg{bs2tYQP;H%IllGpuBXOgLQzILX`jf{2{ zXvr$aHok8<--x2bNqJZ60D=%uC@$Zp#Y=)l;!A7&HjrQ4O#qPM0JLe5rMh3mZ7{ja!m{ARjj{PET4& zd)ifv?vk7fM$b_^VdG1(ylGO;a_n&*#D(512a=DXP~kAyt&Un>iC5BeF4XT(Y(Dsw zlC=rA9`$+iqHtQKNi4VjtR2hJhGuV|xJwGC$AsnS>+CZEyS-${0-n9y362!NDb3zC zm{;RAaA}W(-?DEV(Sm@yo0njhULp~XghzAK98t%f=@?;I-0Ok~E@>ycu#^EW_;zj6 z)dRgRuuLqx?k@m++(rGch10%&Qhn>M8Y-${Gov6ZCq2a60Z>d%a_F;+-^)K|L-_4q z0IP99%rh}uP#OMYAOe))+$K#mXlxJkn#zs(g%{ODr7(Be5)X%grnaDxC_es+>Aet4GBcEdyp=$m`jy z9AkcE9cO9mg-2ygx7npQpJLFLM`nLN*t0xmzMJvEk_L9eq06L*nzd& zSA3LV)db~0u`cZWc%7xan^PeTtPTn{AnU6KtO85XLgYrNATz(E#0hneh*klovSkTi z>lwIrUj1{-*47)W3O6`%TpCL(X?)@cV~PhAAaU-rOvNg0+?aWrF1U$xEm(uu6_s)_ zuR2B2_*=*K);BA;Wmegzu`cLe?Y+~zg0NziRCB2%ZPj}L}Lh_z*6$mo2goqxjJ7 zu!_YFV7N?7y;0DZ#rR!alZ2n=6T_o^iRHhnJdB}d6p*L+03Ltd^ES34-4!tHy^q6k z?Ao1=lAe3&?z0MI@v%tAy{iQnK<>RECV)z7v90T{T=IGT+CueDUGj)s}Op2i`U*r5J?4@H+t~LjN?M|&%t`r0n3Tch2?*fWIFj5r5yO~ zZx!3Wjr~#2{mYE=@1v-ScakYWW%7BYXKKkske?kjN;NMLX@4+9;VM#Pyr(x83e?E7 z&VI|nVwgqDXi$5y!PmH*WC3QIBTqJ$E8$>dvT6hp3(nyLY-$yST2|OSWG+#+)O7l> z?di&K2;duQMGR7JC91v}90aRHm^1YzD98X0X4=UQH24d%x!NX>;I^u9==RE(2sZ>3 ze*Eq!+!{~l0Kc*cLE=)yVl6q0PNYRUHAl8M(ar}pJ#!}&T|H6joT6kCltM-AB}hxq zq?oR3b?`J@h1$%K&~nkJJ+YNj*9{8Qt*V9{1}tMsA&8q0+ajhecLxzAdzUU5bg~TH z)%2zdJGOIs*#t3Fi{*Xo(zJ<}nXj41zvNZjOzTu#o2U@#3C8hdXNsC;aMAI&3n|Ph zy-8&gDR*pHB)n4-e)C)g71H*~(k}k+j`v15(>aTwC5u#qYG|?>m!OjuW#-u;=v7`* z2u-L&0uP~vN}Lp|q0S;po|_gj_9UrOLb%D*_H&fEKM_5V`O@cnDRR>p z5B;Wc1m1#W1Fp^ux9kP)F68X^br+$6QuGu{MKc-!g9O z)!%Yo-Xc2++~fzXe89DhnuXbQQj`y%Zv`x|cfx8RA%kJt@2%0Dot=*xNK?Z(R0|M?d+aZkt7J1S1#H(nb-}m1wKdJ>dqCH zJx2Bs9ozMDB5WUJJ30dJCb5lRtn|(CVpbpe03ts{_RDzq-eTgDq74}TmsD<>&d;@~8 z&qVQa%=Weos22bH|BQ+#?p}>&g80)xvC%Ej0DdZwZIDd?8|J6AAkB?x?QR_uYy*jE zuh*EwPS65=PSr2rIclB&bOfd6E+rQ~@&tlpL#-Bslh0FjMNRGDIwiN7_=dbnCvu$e zU1+0}@c5GF-@3C+QLIFDW7EB~woP|Al9w+|vaU;p2D#DMYxs$p zoJNi3&*X0Ef^%mDWJ%tKed#M2uOM7XnNGg)7GWY$mtvY0?K&svMfFd&f|mo(EAUjM zhKjsVTGttJs*9JP%VOYfy#i-szJqZXI5t^fao zfWFt6;-h`~(_!{Kv)68)@Kv+JeNDh&{lCtD|9=X2|NGQ;IpvwUe;obsfA06?^;C3* zXrISbIuB(|sE)W{L*LAysw!V_Bat1TLKX_6OVqXd3jk_Z8MbeTped&1UV^&Bgpnnu zxDGSQCD?7Ud=2iO3RK{)+>7i_aoMd7k1K`R`$b#-y2FoSnzH<<)f?vB+QHWEi8n2> zLNMd%F0)qm34}C=>qBTTW7l=Hr}6P5$!Zdn5;RJ;vPtZ;QnvqP@1{z|q)pP1SDPK@P25co5`G%Mvg0` zTSx6{A|~n+(_Pk8ZgVT1X^A3KBo7KKFu^3Toh^fW*~<#KP+?9KCFJ8|0Wa=U@1}2ECAym&Eu)${)m4I$LR~bXA*6!K_lp9gRhgW4pg1K_)U0Xa< zQ+2+=1;hE8ZFRM;0z9>(2|6hPt`7X{*W3*gd;9!PF)@Ye#|F#x-yaU|@INd`#is5> zcBL!NP91*kHbq}$GQ*rcin-$6?ssN8(Bs%oaT=mmTB`d-2i2(*3g06`+vAAJPjNa1m+4^L)F`eNW`$+D zqx`r!cKfl__}@^J=i`T#5YG3d1N>&+ebXRGs@INENLQDo>erKsu2B@WY`e(m>&)&) zEJv5H-jqXWLUMLTMU<(Q@wfXtL{33d1Gj94VP0M}6iONN7cX0J--JCwy#Uq*H+2b^XzwVTaSs`(En8&bI8wRyQ z4%E3Xv?e^QKOBFHW%EqhqSSBig$a@oL^4)WjhRL$e8M|8l$Y?ZBi*h`IggF+$K;Ro zo@UkNRJ07DUg8t)q*}$RMr{ZIv7*L@(cl1{K2gbWJ=`HKizjjOyoy;Cj+r6@5 z_c0^W(ui)Zrj!=?{v60u@Nm7cGWF(c@MfNMd>Xm&XA0j2w)p-pZ}yBx_{19bovGzm zxeVPlA0n`{KgxGw&z4 ztGX^xXtv46#VLUwTdlXr?_&jUv%;3*HSB%m%p0OOkp`(i<)((uEwVOKwOm>|Gaq&6 z-cWY?(c9Zr)|Wj_m33K%ZLc)(3I)Szm<6Q*u4mZ!TWvcDy1#Z^o+$6R5EBO)6Jm-O z`b5I3r|oPJ?)l%0u#RXZ*i@%cXT1wmTS}l;C;NG7Q));koAu3bHo`>9*mH|eO1b>m z!I8gb{iwdMG*!|2yoWeUI?`~91e9@ctM=h?MkY9wMs$DxY(3uPsAK)gaZa9EdNGCl zyP)gf_~k$Coc=5ke-QY`A@FZ+Z?b$<{sQO}{sm~Edm7feYJBkSd0Kl?Th$4GmJ1(a zZU4_9W;x}_zwOljT>P&hfH$<_i6+-F13w*Pr;cu0S7uf8CHtwyGri#@w;f1FBjGZq zk5vWfut_-2ZLznU0pYCfsd(0kq$^E-u}Xgr2?rsZpL{m44@g#To%$)tJR%Z2%zk}C z;XQrT#7>-nO4v=1=rXS;45ag~@I@7ODH?V*6x{>rxl zM{vl=tNrV{&o`{A*g<+lBmIt&p1gOny=Qi?^mZUyBBrQ^>zLC#;#Cm|_g2jsGuz zX5IAm+s}F#pmk(N;J$1N(&W;on%>3uP|>nDMbZ&^PG$&+DbYEGRBvjdcKMNKL7;Qg zbEF#LR8?ivvaDPN9xoy(uq-^d|6K|^8Vb+7FgB+U$DAg^64j5g%5N(S5oG1?%XJ;3 z3(jzT*0A6O>+DTIad4QTGAD%OC9AX0qRI`!|&K7 z_c=iEk%Y-kkihZJX}<*NobX07N7Hrl;sR#Fh?Cw9LJL$|BBo6j9XUYvb5t8)4QW@t zVbK{Z3+}9Rz*=4D)5|Z`E2=j}M<%K0zgm+~rYg}YCQa$lH9&W>#16=MqxQ9&XW(_n zm*4~ei>c;iqq=Mqor>$sqJDcY=KP?s2*IFW7D`p0s0r7iUXU4s5g!$KC_`X!olOoW z+qWi3DwM`z&J!%ppv68mdCW2!bmzsu_24)8WW0O>M=Q;5&h}YbU2@BcZ*`_unnw|P zr8Gh}_=0rCiMIH&Q&B({ziiYud)#@LVhJoo2p$&*j5J|z^`G|Q&izPUxA_D8z>+%~ z8>b~NmE6I%wm)=t5f@P|D{L&O>Kmmg^@@W58cMh_=yD_`&n5=ySE?BCkl?iaqG}gZJ&V|?8WpQzGr|L4+QjNC%PO<&pH@Hw|s&XW|;D;n^I^a5_m7_ z);10I#tZ2`8YeIoY2ktzhqE>t7(}mq(A0GES;d%BG8OG%GrF6I@QQ5~7hk&&Q zYFqCqe_*bPc&8Qcgmcb^qdC@WS=<0|5sL0~L#Q&SN`UA|X9EAxur^Hr|W+HO=)}%ZCH|Q|ii;|4XU| z|31F^KgN>(dDj2=2>eN5`-gsqva+(<_`+6Vj*Z91uq@n<`LoM02sJ&77U=YY)P z+;Ny2V&txyFsinJ?(OLitoQLsZ9rI2Hu(W77nsjVKtd39H|r~KcMRMYGeVHiD~h?@ znmJkr(pThu>s^EX1z<)rNjl#! zhhywzAylCF!F5)st*5_HlX#FGC^5P3nLnNP{qVzd=kgaIAAl&iN{{r$(Z7ibNPm}7#9@%LbYLyg9rNIlt#6ZYMq9*nG&-7(!w!8r z|AoXf26DB_wbPGb&@=Z|w@yWC$b*4TztwPau0rt~bEueB83{)ezq@uS?Lxx84yB$MilC zl(O$l2bk#ScOezNGuP57Ucw?54ZhteWaEjeVaUvnQ*2X87&16)V#$pZ!{K0xp0P)X z{n_>8Hv3yeqh+$r;r$MZ-$^@7Trxa+O(9^gs4y4Ch6-mN@d9KRYFMomgL9{dk0_JT zzx7tmpa(F`ITY-_D$$CVl)b1=x(L>GPi6FS*!jRXA1kK!iV(^5Ik$Hw+Kcq?<`uCV zTD9NsAmO2uK4YdI{|?5Gil+h(_qDY<6_Z-^7gjs?Lq*%n&r)V1)mC{k2M<&9S}F9XOwi_u)AX&zy8=(2 zFU>H@x|Du1C5ns290U!)tw#nKIz{?4Zgi(keR;1f?wUbXQ5+I7M*)pIsdWrwwAp6- z90PYX!m*`C;Ud{uqryQa<7f}gc!5MHAo-zr&Y;j!cE!gvYfBC{q_)c2eQmlv7T=19 zqg7ejyh$ZA@@;}p(a(nq4da*|IVMWQ6*A0MbQfIiev0C!_kvE_yCm$iT46I;C{x9i zy%-E)1adymCH8w06pOj+ZTYsQ>XvU3jv&>H-5ixKiW|%vYSD)Gq+4qbBUG`p!u}ZV zp7^J1SyEOA3Nl&I1PuY~9R1#DgEOwf4)L~2TUg(#$4(S%h_#!hnG2y zVSO3#?FcIbYo2}6V=5cBfhUTql|pi=1nC=G5zk+aSN~P6CQO)1wps#kp%MNba7HR&IBw*No31n}pb|7RfZr;PQ#O2zzV zp#5*&%j>vu4Q;W)-0H~Q2>)Whl=hPct%7tvU-{tW9+|nR85}Bj4gRQd>NeY7!~~m` z*c#d%I~Uc>$Xc2+v$8PCoiDoku1<|Q-Hw~#L0h4tBVmDh4|$@aLYh;u!PVB!CX(Y_ z#9l8*uFmd-36LkVR*f|S}SPk?MWQe32#p3b`HkpxXN-0dfTxfI`e`D27E6eNft(3OB#zmALP}V z11_7cA?Ucl!HZQD!f0AjC3@iw&ZoJ@5+-w`VY}+CZQODT{-c(~=^7q_0AZ{O89N8& zt-d$W^Mgz_9gU5lFuc$et{ARgrM!6LQJ0S#veVy?Ue(KJX?6&|zjr5(mcCBKZ}g## zM>jNm3G}b;{yl`OCn=M!MsrpNR+uG0zauJGJ9#^puRDmbJrS|`DOyPb8`{OSzT_i-Salvd&_O{ zzHgRVlqsIc0=2Z)=|DY9J66W(om#NXk?|TE9U*C->g0;V?9os=j2+OMVd{ZLy41Y_ zmzl?|E-^-_xA~niwBePP#i~uCR}s6uTX}m~G5%&RRim-t1eKW(0qY(G7%*Po((Dmo zk^LoxJ{Kg#_>$NqneOXVskW*Na)JG=LtE^K$=z%^@Wsm52K`2g^4Tt5KQTxhIj;ox zl;b?j4U3eFH4=7wAyRThmmoLyF$)vd`*m&6{+H!GdJEMAdQ#I*jw(oBvwQG{4^qZ$ zN(O37T)&$3A7+2I$QB!wsoqq>e%xU9vl)L-dM#F|d+X!QhmB5h1<0P`%F8WWB<*w5 ziWsj@YvM~J{Pi5i_L)pQl%%g}kkW2-g%sh*t=#vmQ)zf56(kZj$+zn(wa*zWmnV0X zt(lD~UQ>vXs!^O}p_l_kb8;v(h)h}uG)>VMx-8p;Er~__q2KC{ zbKoCl=^uu`ANB4(4En#PJ{e^Y?p;@ykt9l?RVQ!~(oq<0w(~vgrxf8PNQNL7 z7R529oz*9AFDGGG$G(^BZKfFHBO&o|xrKcU0T)al2LDjEC8VMwmcG2%fh>TOn=alj zy(^+7vQJ+BU|5UcLe+ZwGkFY8v8cl1Y+*r$eeF9H- z@b(Whx*3%^IcDztW105h?In>_*M@Df%{J|dvUE!pm3ME;N-r+lD-OzZeDeE7r1x8+ zmrKB3sO%{|kR(pyWz+N}&t|~zIV8bfLcXvH^N*$Mbno>0pb>6J&VMQq;vUid3yMfJm+{DGv0nf92-lZzp-$9+*5ygdz@dDA?CmEm znjo4U)=cbA9ga;%v4u8bUCx7ky9H}F`JQa8c}^-zFKimRMxQ-rjSLA$ z6+HJSU+yhZw=?XYi?>;92bx*5Fsg>B6QhP?qKjcl6~cqA>oJZ#J8Pn%RK7atn-|OU>m6Sn<(vj!`2XqO?1hvV~jvd07|U4yzE<&Lgh`2 zF)Zz^2bEl$^86AP>~!dJ3ivTb=OMoe6SK%Uv)CFojuTni0SOkr8@Y!M!X+50P>YZM z240r>mSwrugzJ~gwI9`IRWBuMy#8B)4|Mx}2MRWs>?`7R3FLKhZ~9M|xUt$S_W85o zN#|pZx#B0*AR?dqxVOZ`{aD{y-f-k5)5>szS5=j?jG;P7BZ^KOHz}#p`2E<$ejo&GRM&a*`6(f!yc5~bf#?t z(QvA0US|Zy%+9I7_x}Dj{^$A+0{@#4__MnI_gLLi&sN&Fql=_4ihFN-l4Kq`*%E-a z1f^*qJ)&<^@5i4LlXoBV%7zQIsMR;Im+Zg%1z1KTq*_f|uZt>64|y+AfD^r2!ZofC zSwxYMHH-Dl4Jf7bEq_1S0d;XFe9`CU_SyjtoJ?AP)`TRA4ftU*A09IF#m$E^> zrcSBP4gn%?C9ab<=*fH7CI~NVg#=^EsF_SW+BCzC+GlfDy3I7jx;qD;2%aZLS}EQ% z`+CSfkemu-oO|$0e|ul07;!Ip&Jz643gr)AIM-u%5Tk5tO7imRh72M;ml2dlF*lR@ z12QGnW8X##))!os!yc*pbAav1l^fyp%tR|ITtD(cx89kDj1j~H#vi);zE>Fy?!-H$ z(rsKmiA;qmk4?efgitapYLXSjDt1F^nYVjeK#7CY!GugQhP-*!?;mH??lgF8t{14! zD?V>;6D$()8U5Cs1c;hIweYggnUrf;Qu4nawm;~$$g~Pca4PIALSM{ za9S~x{~7BC%hDj$r?9hqV4oWixFb(<^!nLyk@Xn)*8*A~)=d>Ed9)g1$%4eS=Ga6B z2SV`Rk@E7&ctPd0ZB6N40GvWZ^epIQ1+nH@3=)0>QMcswTL5nGl5ufty}WCAt{4Zm z24cp&>YMT@h3FTJ-=Xx|Ab-s=lma*HnHzk#`tJyAY{Z)DE8$&O^P}^|&Z6|{+xMT; z^iqHODegWTM7B(Dq0CUfBD?gUmT59Ux+zC{p)j zrxUNBds^_AU?1k4bXP1m21GU910Y+LfI40;QM5M~Q6jF70uo43!o4wJhOGVWi{Y4! z6~UV7A(I3~=V|HL-AUOYx^oZTDvmc!2f>S6wd<=Wa7zz6SK0)xDm65~i{QCf?TN{w zU4%kN)UbUdWOpfT?zA!t-YwMvQKq4?HR|~0!8N5`;ClI|X7C4re;fjM|GLlRUkO3~ zzNX*-TRl6#x(~kn`>vP24XV~N|JTQW)8HyImhIXgLv@Knx|-kkC#n3;{`~*V+o}KZ z+m%0BNY=oP#NSn?y!K$E@1eiu!ab$lYY?5zAuUZQDuC}dY!K&D(+|~T;0eusE%wxg z%1w==*q?<%{C1t39Z@DO%SROzBA6exoLf-%cHLM~3w8F?3s+{iyB8x&5)^M=!1s-6cl!y)Gm$iyiisZ z^gtwW8O1l&(TU|O(NF@hUR_mS;dq8?>UXB-kZmyCrYEhvKp}_ygiN}TV=r@;6^YkM zxh^>E0F3Tcok?9V{os`e&>)3#O5~#PcQrrA0w1DZ8P)& z%@tZbmMfAS8pCyjX_A5@ffyp=2lXtbnN~uV? z!ZV%vr#ywTFG@9>yLpT01*GsK8MG{J@L`Ny;5Fmtrh&o$C#}#1Q z5|ph$JV-~B9v=N(4+ry{_w)6{1omrm!^zLe4zk(2qJ1>K9hM%FcoOzq;c%{B{M?oJ zm573tCT9Jr8CN%DT<&$n|4?<-fBC%h=hOYSN8rD^K(ba)w7e(BD%y*k5OXa&aMOis z;b>xphohoZXWk+MVc$Bmwp1!#llTWNWI#>CsVVy}fKvH5nrA!Ee?!iSxqX};e1&j> zOnQ&$Rb25=q6e3;K5$%jr`R)~x1{{@sUX?sp_V|5xb9}_Mx7ZLIG~S1SF);)wt+kkS|XKA+@UlH$%l7MEq- zZ6k5;qEtY)|z&eG?BA_x8b_QPBAi6m3$SH0l*RI!#+$t zHQwDeC+(-|GOpcZj!{cIspE;HaguC*7i}Tjv#|L~sJy;ap^%{_FUz4o?9a{03XBqc z@lf7ReElAf`;JW&E1?hVL5qju=@Enty`w4iK0dJ2@CSX^o%zuRa^QDl55+5!IkL7A zW-gxw$r!(oXLq#Dr{wCAeeL7y{JI*E^a%1c0$R3Xes5(@oZeJ;Zz}%Wmh9vQx1`^B+GxwB*@UV zeO2(ySZ?_H&rgo%pm%ie@8{GeEL~ENVix0J3|gtu4QNDkDvTxF;W3* z67@CZiV9NJF&k?V_GY>~g;>vi5qEXo{t0uFb{Qn7*BNCv#{%l_zWJKDlv_*o-w`Bd z%1NHR;CBgn1uR7q*v&Fvkszd^h#WXwjo6(zi7r@D3N>SjCZH-ZNmMvvDRRE4)jD2y zv1SWh`Q>cGqtW^UOx-v(KZj_~e*-QAd)ml_4aO>2F#QE!eJ%MS>&H#0%HAA~7B#GA zM1Ab)mj+(^20>*}-5a6NJSx?Dr5xpI22;R)?#ATN&86*dp(VOG>lW+!=H^vrENh85s(wuS+8vzn zg8ikjtz@&!sQ!vd5vAn>9kS#ug5r#6wylrqbv1)Hb-d27(Tp1%yeZS#y?J)0Y4&}b zrpP$Db=V2(z^x)!Qs%nPeFeE~a<@KhRj$#;>rH69slbmmOrJ=*l&d#c)S%M#FW|-Sc#~IH{ucO@E;4Nzvprk%DOsdLQ3W$T>OQ2hpzqfHpZOTqZKJILgrjV#I|!EH1DH&S*6*-lcDB z1K*b*Q6xX{KV5%;CDnu=vkB9ft z%D>{9kr+BRsRT13>rGtlPqVZ?`0q3Fbi?O~e%9ZUw#qmO?$Rg*)M0u@EY&cW3$DGuZ#R(d)hM$Lb$l$ zJ?ZDWE+18(Bu}k+sVI7&s~BC5KYmyU=iR99BE~`7J$70J`Kyf-#yg)zATPzodISy$016+E;Br9A$fvSMAHrlc*5Nh zM{GdchlU9A{`GUeT|UbTD@tOu%P7f_^~3~Ul+EU7|Dk05OEq$TUi|+Rf&b>6x?1|w zQzo!FFFOKqR*bVZPGNw}2VtTiSUsAJnI;YT6wkiW=MR_f-^~gBy;i486QFyQZS!T=P_%8syP<#C}7OvuT`S--b+xw2b*vblK*YeVX z`Wg1DUi0QqiFo(sP?dR5YB(?BxlTau_)p_q=r-zAHDOcDnqKohs zkbkNif)4hyzmmM@sp|2v@+uUGU}6s%(0M()`pd>ecKWFDxR=Knx)mS#N&4dWbjd@QF=1o%)_>pTT*1}PJ<)&_wn6@s`T;EdmR~K zE6*-u!At7IAfWZ7b`JK3Y(teUDv)X#T?7~K)2*`H8`zR610ED=1QSUPc*7&s0ke@^xMf?80m7KhPvd zxGE2W3|%5OTWtyr^z^!%9B>nRsTS&yH;Slt$sYXG3f2v<2A?Af@|*o@acr|bZM^q4 zB{BKs%%Kd|A+P?vOTsn=WVi{dv-k_}&07wrz|mmAL`9eM&kxb&e@@OidhL<5IGSKi zEH>1cOco4KO}Tc`sWuxlS(;s@>w#&Dk8+Zk4D=A^Q^WzQUTV$M3CK$taTZK{HcXOd5ua-BqE@-*u-!*nHA;xGL^QI82B{wO6kzX=hNhz{;6v9v&}vnzPBZlo!mc}aj#y{`oiZ${^5LY zkhg0EkWt}j;k31kvye7)JzvWp&R;2G)&S#xNHH{IW__ov#qsSY+Fr)2-izBvySh_& zn4uhG&CpQI4ohXaj?(Fa7YkU^?n_xc+m+x`GD?K6{Ks(rvEmvX9Xf2O7Ahy8%n9`K z{j5|IB&j3&gKb7>aVO%Q+Q+N(3S-ZN9TSq5oo4!Y&ffP=T=n?|dW(v7=s2=xp{P{m zOsM0ZiSpjJZBi?9u#LSFJQ^?Xn6~0h)T9BAFtAV=J8P_RO zLORDq0PpLW*Y${9oyfVRx;%`&9su8UHIXM- zYklPTJddt4MfL)aNlPMRN`+nY3V?R+m`ii^^(xuyc`>Owr4|3L#9c?78OyER%)*LZ z=|bKNQgZ!nvrT~S?xD$))0aadLasVQ9H|0yI<$c!i2=&yb){-Us7+J4NJqz?3=)9hlTH6Ql7TepIWu6_0rKRbs@;_{~lb9!xnkO2oAx z_E6K`{^Yx=Q80KCswB(HnIFa)Mj9i1z^;=y8^wpG^6Wwf>Nv|P{of~>{fDH)e_rID zjlkd4(EU#}zRtgSptk+Zj{bkG6+cSq{=+}FO}!GH3k$6L8RW~O2l>(z2`Py(z*q1) zEt8@@l@6)R7a9O3({A6bm+89<{6V*kXmmnEy|;@-p&V_>#~nH~YY7FtT$38E*^8F- zUorRlTORwmi+1OjAVya8bKb|YwOym!3?r}moZekpe z=iG7al`_hi+QPX3+4P8{14RFFQK+pVceafSy(*$}=At}Pu|C$X*p6Cl({eW%3a0@+ z6eV+2d)f>bSPJKL>HEwjFLYWb*5w++FwFTnf@rHa64PA8?5xxRXEt>^k)P8wed6C8 zyenOdgNV^bQ{FjkiF|oe2hZljmflAQCysF+Xt5T31TjG0y@M#qNozh~3L{wO3J579@mFKAC&nD=;PF3$UHJ*XET>^2W zifR*n5~;k(hUPZk&a{F}nMD@DQaqa2;8HcNGEGxMjD&7mKl}mPl*y31-qH>bXu&qM zGfjw^O`!>5JHGLK2w;7-NoMz3Gh(so%GQVAt1#FQc2ZE^82B;zI|*JLBB(Mu$|h4N z?fGzSbg?aeswi_N*U8#fxamjx)u+2y==NAx;pP#*K5z1pR!$Sm$l%k-M=nMa5>*j@QN|#1Db--A%wEOj=0^GsZNR_pHuDX=Q>a)z9J2TJU>z0UBwrmL#oMA= z$&1siZOJ%6l^{375<51Vh`8*K-!w7>#pL=G8L|aSxC`X%-^>*m7W@8t)@-&4U0K0L zDa{<`%s`8T47SW`1yBDn;GEBo!c4S`2h$bE-W}$hnfau(zD~Z6!e-$Du!=uj+Gq*J z1&Op4SMP)~DSjIPF+^LT&(H58FbSMGihiocyUH>YEQUf1*`hBcs^wR9mAw^JxR(^_ zP+df?HfiQwkmvaeeQ$_G8a3<6K^T|#4mctpOjx55d~=dU{FB4=4I0$gq-*6gu_t$8 z>uQ6N66I9~jP335HckU4q0`bCf{i;1C)nRfM{YemRB=Wwl|83!He`S0D)4K*#Y_f z&Qeg?a<}`qPl$w7o4Iu_&^~6=s1sayyuk7y{!;*vg3Ay(#c+M)7IE*(fc?unf(56a zRZ|U#!-cL}1I(*JgE!Gy@YBx&E)HW7%VBIu>LkqQwD~?0;A0cr5!cs=z&1WCkc?YC zO!LQY8O3I07pfC9W$DQiEOttg%F6*6RhS#27b=xb&W$Xg*sTE}cj&K_5b-&ih5qbG z6nk&HX!CbL72&8jTxlEQ;7CP5!Apnx(qSNoz&+EW?1J%`fpl4 z^kG!}9;4PH%Bc1xVSDq5L6rX&M2u4ZmdQEDz=2$B5(<$Tlo zZ2PV??7JGcg^lZ#_zA~6Tl@e#AP1HMvQ2iuDPTs2s@yrfxRdhT7HHV}EypM;iR^fY zD_?p&;Sul!9U~0je@h6g#U9h0s&W0?9wxfwBj5e5eakY}uWIc1?Fsdw-$v?p+{m9p zgWZy6S}cJBU$ zA~8>pAtm)FzS`S5Gunr6e=1d(Fr#Nacd_HQ7R9N=xEb%m;!P{{HC2DgP`}O~6(>Ox z<>Js}I>Clj6J*-sVb)vFPd1}LY$$=|&7leYl?>E1)ga~5U?PK>9f{FwRx|f9DJF@p zzd6YReuu3p&F0U1W)DqL?pmszplkVIIyvT6v7q9*9z7C;ettTGH9lq zlzH+U=dNC3J<2z>=|31`rdJw{80!rkEzx-on}2dhaT^T+=nfY zl@yaQ38f0~@_^e|T(>TmX$(1sr6mDJ0BGoug@5DOaMdJ`g3?CO9_ zYt4&>d3`Tg-AOVx3d`YD3_^fZ$ww&-4>sS0{8G5B>_d z_zNIa`LUkJblL3t;!?v|-;JToVxAdZE=N&!Y-Ov9^^Cn}RUVr8Th}U}OAr4uA!e-3 zqxYhBhjKoVRwm)r+g*o;IyT|`-WqM5Yp7rQRJL@Bw3s9+lIkFJ7o^*YwstxRg)Fza zNTHKtuIrZ^>ZKloczmJr`sLzf3#4mCtW|!hKOb&E4ec?RuX13;KF|=4+}SBVLZew$ zr0sMKYm#3Y|A7CHJRdYjtP-m;9uo{6i7izB|wgn;kxMGNoPft;VTfU{H8iM-1AiS!`Muy!EqJ0 zi+S!765&VtG)Jd?laj$0E*F|u7=(AYmm&U@vMfUwKcN%o^I#q@pC~Otzx5w>8c}>Oyq)SMr$ffW}B`-#lMBTWv<7T9gTsY zvNU5GN*mXx%)R;-MpSw;`bOku=CY?|iY##mDy@LxqP=IXkZ~k=kH~wf5pvOsp=0%^ z6?T4Wv!|=o2Gb3Jw7hLVS>n8~z9f(RF-IaoWTjK9=YjAB|8H7NgSt#17a?y>52_$& zDaZU6z^dTeMQEv;Px5FYf*%t=Mi5sRClJ_>YXjv=-~~K-uVqTj9#5G8%aXU|jUJs4 zcJrzQ45xQ;ZPPID5l;r`lFc8l3%4_A(~D!QTS>Iv_0DN7CrZDn8yYuydEg-AXaE@g>Y3($0j@$DK>m2! zb#bM6?`{LKcuwi%5Gn9EtlS%~DEH3lok+EbE8lC<Px(scx7-Q zgJHb45Yk~%ocnqrjRrs=X%8*QaZYqGdibX zco>jWpyoiL$MNdfU_h-l;jVUZb-QPV?i1vz*`APKabV^jU#tHr@%!H$ef%GvIsexH z{-=W6-}k}#e=E@aZLI>BR{W2H>IETc`N13saDQs-$*&na8ha|yG2Ph{+lFboOycu0 zO3`tCkIa}|C>NE@YxFf&OMVcc{LIPL4Ec*SS6C_kP!hYcmYQoO?S+h z;cAV7O7RT^h$*QcC;FqT&o>5!7UPtb)0ND+v#ZCBMZGRN#W8*hvhVGj@0R@#exyyY zm3Tf|klw_$ndC8W<%UFz_jl)N=VyKetmY}}^#s_v5PK*HcA68Wgxzs&uwbwV4-Rj^ zS}nHt=0Ywc5JNVC-)yn^CSZ^Fz*GiWso}-Mjn3nPER1lDt1@=o*Lq(qVi)G2_AZZ33_8gI$H$7mhMrSt*Z0 zrLZcl6X)GrZ6Qm&yOAm4mL?-M9 z_MNg;UJZJd%xnZ-c z?`AduFX5p@3Nl^%Yov28N57}F(u=tKY_9=VcqzEJcm`Y2qso_B(Vdt2(3~gHInD(X zaLQ%G4Im)%2~UuCPevHs0BYkbZEiu8Nh#CrO8ynZNq0-#Ar0921c|mEUfQg2itd}! zT{LFu>Box?>ss4r84K?IXzlsfDy2U9!K3ci9WN#oBrve4W4cO3G2;E}G4?V`i!1tA zZO3|^D>!7OzutU=FTAh-YI#>i)V7vW*vX;%)_>Io)KZsi^ciVaz}hv#Sl<+-*hT`T zn2r8Sf#~E^;bU(@G zIJwxG;S2l5S%*WNFXXFr_H?y91~bcp5#p(n?i$3eTp>-@GGo+VOYOlVnN*g>H#x8> zJzp8)j!Y1&lNK)Dt@{h`yhR1htx8X#0j@jVwWHpU3~2_S9U{bP%=_ug`A&Tvn%n!4>+roj1;`Y45_eYbf?Q_{`Zae z{4?T3-N(n&)VKfhgWCVzKhyuLZ}%VP*Zw}E>Yr=v#LCQW%h5%o_(W>|-@V7nDH$$= zwpXMZJM#MYc}HuDK-(3y5{hD!m*ZO7X6+8#xynV_P?{}kv>M8*et%?f{z1L@_}TY1 zMTkcefN$jN`Qs-}+I6rwwc(voP z0MTnfUC>r&@ZhGlbmso} zF(gI|p5nWHpK7mxgk9|AipI9m#ok3qf6j6&n{a@{MCnb6SObOdCY5~B#j_m})yE|- zu~1^hal%(&N}CG3)Xl(6oT_L7l(pRg`oxMxBoT0FNekyVFzlI@yizc-+TmGI8ihP} z!RL&Sw@W^Qcl%tI*j%qtwGR}PCBb@T%2rKD3V`|-me>e)4v8EIZSmytD8tn3gf$wv zfSn5JL`DIDyS}aSai=O^b=CrIxW}`&7F+H?=}*-)Bsb>o$PANI*n zQHjI>=A~x^Rf7qS23Sm-AkO@5@QWw%89fh?W2!fZH^=qVVbnIJX8fBg{6m+eOXn`U@nJ(k- z#xTdY`q`fp?GFNf5cp>z@UNDUmz_K}4fMBjS4BjQi?88Ln}n4=&dJPxRT-mHOeJ*G zXaZ0y{2xtOZ(S^ea#!f$m<9lxwKD{CyF6$+NTg zr4bw-fUM>uCQtRNKhPF)=TC&#(A;m1X(sfe*mlM}*mfzb#RowB3^%YM&*3xW-QE2@ zKE6aNi!7&n>?<8gb>-8&S53lM$EC?&ny?$utNynm>mV>T(F8-X7~TytUA7VLUjQP( z!AoYBZS}YKwM%%`htTFy$FqS_1xbhP;sona1mI$$?nr2amPhTQ4ZnoS&!Bufc4koq zZ$#ijD!4BI-k*~A%Bfof_VWfyx>qRDjbx}Lk7Wnu=d0G#2{k#9O9r{%HpjoqHuy@o zB3x`YW%jutv2l4(NzzDe{ZKSTO&Z(zm}(CTxLiBsa`8LHHl%5smoz()tnkX1gza^h zkP?<>MQCffMQz{*t3VdL?YgD9&D`w20C~Xj4P4W#qNcRbG~PNE#w|MDU_2ufPyjN$ zXXnYRyC%_3z{O78vPcP%?GQ*|2=cXYDfMxNP|%PB`N7)WwB@cUJq0Ee&!hHMl+!;H z+#sunmi9C0S9*t@Z^o~q2VZ^#))W|zytT6a6nC^;OW-jXth$Z7STSBUcA^mQ7bqQJ zTVY=dBb<&cBuu)taG91qa^FnO&Ay%Xb|F~QGqh3{*o=8P*BInf)%s{91)??=AE7eu zSyy52y4ZSbfF&JFDSzTM|5iI(4++2=jU*sohV6x*{zLtsh>>m zv-qo7_<0!=NzT1*}-4zMZ2lxUtuP&oQTYW8>lH@$&w9$oW;H zRULcsc=4M+UV?(2Y6gxZjOIrgSX2$b$>_kC%r_8|XR-$?Fe-YZG44sfNqmu$fxT67$2a!V1zfPC&JnHFV09hSC483Zb< zG-&ygga1L`4+8%L1pf8P3lW&;yy$d~rG*zy&>x$r2l!xQC(gHOA=VD)3tUcD?&Cy*e^e!42l7AF{~DEOds!D6e`qdhD&9p9Hske8`sTh4(JLIEP1>&HIx&F z20}Y2uPD_&3`v~b+Fw>z41T-Z7rf+CtrmP8Chcl8M;47aU-Pzk%h^&>BDTPzSb_VF zBC#oz#hx4f=o5p?f@!Va^IW}n9+)Ay#`=>{+r4xqiI=e0(Rtb~WsG`wq%8#(LXW6; zb5p8O7tB3cuS{9V#KswKw$Yk}Z>`x8%huE^w*xSs-*zj(SF*u%CuIG#oUM7BvH`YRf2R`X)mFk^pKm2i@&mJaslPUhh?!O2haT(o$eVfaj z{bCAC3-x&SHd{3;nA=eMS2u$kN@#VU$2N-`wxAxq){L$-e zatD6V_Ilb`cWDsUo+X!GtJn)jwq4cu@S1DI>Ss3s`_`}|uL^j8k;XK*dGiL4SL~iS z5fYcs+7Y!SRpn%3+?ewh;BjQ_(K$)g>oz!zPWCSVQ<%p}OORUkn5%tDl*=hQR~-iH z#OKFj4oGK&a1Xs6Cg0U3*9o)FOJT}XzZHL-^ps?cuK9V>JMOna#UE9?sg)$FR9<~k zdK-Jt&RMxILGAc4qBYbv0TV6$O%1b9{?rgpB#ZFD>&hVauq~KC+gp$3E z_+>A8I60+xcs1bhERsNTd>I6*Qj4?J%j?s+$|_I3E`C?t$4<5GbJ1^_d19hdgy*g( zdmLZve+(=3_ShKgAKR+P^*7^8Xr4f$iw-p<_eokcXCVsh=m;PL>sPM@W~8wXjIJis z$d|Qylxx`2SkOZVnb>EGJjHwt2lA^yB_KcDQJM<&p-Z8@n^lAKiHA`IRECNZ3S$uPpb7nPDjR39YJrHtU-MGC&CN$824b5wi&==9r~ zxyX%0x_2Z>$ss1nW5UX|AMb6PGagP6PO3IIHSRH7xn}C_6OiNUfdD3bM=~4FSS@bI zMy78xC64yxP&fUiK3ok{&SRlwx-Tr>C!3!bLZ~x=etG9KuwW>HP36W-K)U;=k_$Y2 zC=8Trs$0$D)3l0|1E5Tz<$jF@9?Gl>HcnM@9m5rJ)i!ac`kbWY72i82j`Mn$zHwF7 zA=gG|6#8JPCO?pT5Ih7q2w|k2+!a60EB;K9*n$RCmVRlCx}D<8vM0o=Y%%sFbD_17 zJ@70|G`*T5HeVc&#P1Zt)Ss0KR{Cf@O*d}DmmQ^qCSmM5@cv@t14f?$_JDg32kc9U zQl+s_6+|y#c+K9CEOp%tt0df;b*V|lAtUAUx=$qSLuqW()SzjL=VBHmjaStV!W&z! zkI~IZ!Q#4YMz|q`cc5Z6p^<~zs`Msw#iLrro-)yimZ}_k!?fG256emDq6S!0*Mb#+ zhE37lGU77z>PJ&B)#T+M0pJ7mtmIASki-}D7IxFtTYF@VDg5g{;X424E&d?zKN^95 zJ&iy!`#x@v_B6>9F4Z%PEDwL9a)c;W6R@yci$#u{-^j;P!Sg|I){TpNg;U8PqV1z{ zhbCW_RO{T+m$BYMuiwHYLbop#7QvSTZxChFZQ=1NO>|K$CX#ZM%1=ek@pFV51^#VU zhmmZ1p7vxl&r2fzdIav4wQr}??Sl~h#D}6-9imX0mjuR_GE=Rl!DLHWej;6vMWs(F zh|L2j&Wt&)UzW41d_61r(Vk>fSW-&6+RKVI2^yr%OrNmS2`yH0yve7l#~Q#T@`D8- zQk5co0eE?P6uRo4eIQ&3(%=R<~@=w<|3pd6*hp_zdls?)-Jfzusp&G{eGs zlHVL!5H>gE%nwmNQovekDMi^X##|As?bECuWkeL%z6MO6dT)=dBq$8o@hR(=wGTv%&C2quD z3Wm=Z$@mwWn~=qAVT$Y-G~ZQb(UG-?$OcWSM?&)YU3oEbi!bPa-O0 zpvm6-syZ8WW;rF>r2X8Cx$#?G&Az!t)$^4FcHk>!rV!(6nCnB&bzfNz?-%o)w$v;@ z{ch^SW^Rzw0j{-2pu1=PcfGH3e*tQYb2%qX^8w*VbnKCViX&*e@+vkSF4+V)yTkzW~oR6(W$U4pG~9o~bw#?rU3g9vAmc z8kl1jv_=olr~E$>_4Lz^lf{Gcw49vl6(`do9xl^4WcM$x`@I6_(yRUg*ppK%GTGdZ znvRPdE#yT4Zs|&HQwVJ0K?5Pm zd$WU1J}OmCBuhlh%{c=M*=UmdG672^|6Ttismhgx@h#10MSp zafOciMf-+VPqzCw!}_dQvXdVft+n`&r-Z)fr?k>kk)@VL1=!HT2& zNF1p^Br-7{9yz!0Jk>mfs6eSEKwcBXsXObIuA9O&K^Zq(0sTZjUhk~|ycZG+8$IK4 zHPH6zx}@FzzR~J%61Kk&_IzzUg7@n<(fqZqQ>&M+(Q8G?+KJr0SQN~H`q|@`#eAC5o=@`_(sS1r65wu<~DqfKKiqYC^A{AZ9%{%5fl{zr!#fBsJY2?3+OtIPZ!c`d7^ z!2?w?CxqMPmrZv8y$@>-27g=9oq==zrM!L=Ot%j=&|)o$Behc$c9TPu^K9yxDanj##Um} z8Ath%*K)StU{{`EmwvnudRc(v^bvy^WqWBV9>i18sl5rAFkY($#!+1+&PEN!S*)D3 zN=k}7r+;eJUks`@`QA0{yf9jwjd$Jz^$6q4ZEuLettZV1*(!f97zbIfW)9yUIUmLK z4d&NvZAxCy_4&dMdwr4Dx2)|~$V2A{GTyQ-d4ea^5-ytGwoNTxdlL6p$9K(EQv>T- zqu7Zt8kz@^n*elc29++FJtw|V3qP@owc2ACSl*L)*v8FQwARsvRzG=p%wy}2-x{St zQBfGC;E^jc33I)a2)vmI${Haa?yVrwVmr(>bgiR)`J-z%H2ioB{J?yAcU#f~;_@@5 z5?0=y&eZn!MU5S3rP_TfMon>U1eU+yEm zppWKtid{^o>pc$ceDGLys#_ef-8LOwC)fC8JJiKuefJxgnp^mSBb`x&jXK4u?&2#| z`-$t_{?_FhJmrm@yIc) zDNAt3-N8#dzfzU9ufoDe4ZEMa)+7XX;IP1Fb9I^XVo=I=pO^2Q8k0BL%r!57zh2@t z`j9h3U0n&4Z&ORoqs zdEK!eW0)&(@9`GFX>4XZN-C=6aqu+S!soi$z}g__M9(r z1jf~x-=Yi>RG3CiJWBCQ(TXdYe)8FsL|rDFs#5ZJtSGVQijW;H`BZb43MGm}^JJcd ziTiVXU(sR+k^+6o>NV2v>!y`VbN<1WAf4NAu>Vb-DSxk!Y?dh^PTNZxOOQCTy3Dvg z$aV0b?bJt%ro5sxHNxm>6k54sdE_akrvcchZ5Si?$t^GTTswx^b%~O8{-O&1rCi5H z9jMBT5{*WiyQQ!7RGztlw|Cc_+~dd^zC5m)SndilQ9`S`*%ripk-pl-=^MgT2f}Vj zwW4n`N4y!>@^;Z_3sGxs@d3qji$RuOOoM4aV@5;wj)m z#=Jn%cgnj*cgm}HZ`x6#bPgs1^Wl~umiXyv6BxAhzaA|3XHq8qyw@KD{yQP?uVs2A zFN>Z8pj9aZ)_uYR3Vg@sAjH58oTL$0lsO}g1| zw%!%ZVsCY%qNz!AuE=s}a^n0@N8;DyNg`y!eI|I*faj}3e!jruJ`om^33s-XdesWm z1T88lOe`8?f4j)jS5F%}Ne;x`&Yi)0HkD^xd783JW7G#cJE!F6cAGZMnf-QFmHoWF z=cjESCtBrtza0sU>XP!GD0U3$c+2_pg=apdW#Ba|<))ri=yP4#3pNKb>rCy{K1>3v z_wQ4`n=UDMqiUc<$Ib93?kOsm8xjQNMPPRM?UvJT8S)et;hP#Hj|8(4%MY%3n%1E0DG5zD06o5(aYuag zrqRLXw@|_z@V=hVLNr|bdbPw!j@_`86lkb*h4M{kkt*#>q2*nALL@V-(W(^ZEL*I^ zCkv`%FIQd#!JAl#7zqDj{1>qWO@A~bocT-@K5y96hf&w&JBn59uUK8tnN_{CxS&|V z&f-wVyJY4iw%@&zA(ibYkk2LGh^ZwMw;@AfJ_H7rerA$QB6TI$1zca1AG;KUvFEa@ zx}hp8kKp|jE3>m#8OP-jvk3Hg)QFp|vYz73<@Azv!p3%|s`ua8g!?{gkbi>@-<~Z? zJD|2^z+%{?+NB?EgdNANWX7{E@|U@EfloNbzi^#88a*`?GqILmi^M9egHdSZ>9&L) zU0=W1qd)@<#J6G(S_0O>!+)3%WjVl5IOEst$VTQJ#7jPZ1$E}An!g-{X&#pr?1w_1 zkcGRkj%Sq@cBi(4X{AH%x^4|3?4YuGKhXNYIzouNGBi5F4Juwtu_Xe&*ozu+bLAf7 z@+OWdw(&LkS!|YhMocV8KAs!fY6&OOn(JG+2mAwi{43*3!&1HKc&EsZ=T9f#g#Y5+|r7y?K##om&0;Bm=DO@+_ zsKzeL03fJERk?g)_@k?aF1qia=T{Nu$#-N)K#{E{=* z6KXYp*0kW8tlGr;MEz~V-R0sc-6dTf+1l)9W28cknd_h!n&j?6!6ytwBgz3vX*gZc zRiVZwb-j@VS-Sg>>oMQWVsCNX(N<^FO=(LD@LUbm9iQfuq4zX{Y4)!I|K&NVO_`_{ zPTOYx&mdyOoBc*$5{Jd2l6d*>53|iZ$E4RuAuO`{iKYG@HV}ZJo}BR5Z{<$Mq&g~z2A!*pSMkip z*gLz_;MJ+hUCs;RhxoB^;oYyL$h4~2%>Pa|qd(u}4+8%(2(+JXgqk&;PKNR{Rh0->TB0WL?UoZin{qrKX@hEjNqn zdQVjqWcO>WC2EN_aj@u1z;7dgO^L%#E|JwO9Q6e}N`_SX7CNQ-;?U1iK~YE(b!#po zNMKMf%t^2> z3=Mg9(oQf$m&4scZGVnidr1j+ws%p3$Avr;I#i>6n#X17@dKC!6ip%v4BP9w|29B2 zwYGIWP{N@_;v``F>4;kvD#Mu!IUDS{tB?wIOnL%fWH#@zkJsAQHv8V|btc%=l;X>= zizW?^D*_I9xKEAo50EvVoB1-(t?qUh)P=I9v_r|5s26AAopM_P35gdimOpw>BTP@o z5rj8}W3X!cz71|j#Qjv@x5sutJ1+B>f7!f7o6o*Ri&;`Y0^zPk+w{;&E|(uoWCNfT z65q6)tMNyxVz%TamfRvVv+D@^wpt|q0@vr2Wm=dljWggh=zMym(nMfD`jlW6{p(cS zqw8URZiy?I4N(tddd;H&t@|-F3HbW*!(!jF&)5vcw(k^3%EwPMQKVahFn+59iu31> z3rAwDw3<`&C#;$`i0D+eC-EA87+T)LzT9biNul+vVYX>@LC06XKZXV%PyrP?hCViy zQdPM&!bV2gt?C6_QT#c93%NCp#jPWfXO#mR?kBba03PgBRnTFu|v zP*x>3T>xg4!umY3B; zLB}H9U==X2Nkm43_}y6n2R7{h_!g=AMrC)1hDDsqe_Wt;w@j*aCYP|SEwcYhx`Xkq zBh~jLv8%0zhD?l^p~{}lx15|V-qyYL$l|G;M7K@qahml|DG*#r+^zv8`TU0;kg8D4 zN30u*Lkh5O^!ytY43{OA**cqeut3%TCm(J1|$P8Aja3pQ>Y@jT^UBFpebKSkH!ae4{ zanfa49PH0d;b~?x#YPs~OAaRD$St`}t0j#F{c?M{wp2AMT01U}gJA{9U z_BZd#M$t2Hd1`8|lPmmjBo*@rEkQw{)QRb$o^<{|V2&Y{)1u}-*3R{eH)wMR6rO;7=v#4+4J>_y;5K_d}!qB&hc{1((YICA<8$wSSbV z3*yXcF2T?BL=CmavT#SMy)v;mzE=oiQ+%N%UZSJ&Wna=#Z^2SW?m9bK>H(%d^J$Iv zY5E77smlnFbpCs&Syg^gmq+)&9DNjsikzy*pzRGm&#z-Mx{pCL0bx9!$5;!uM6EOl zL}hY0XI)*Zt1Zt@*kaj(JC6W&2aO#?Rd~t3^7$L{Sb=aTeDWq)Gi=DZHKaoR#=IkR zKs)HnOye&AM96Sv>tQ4e^GnQdrHHhvc=m3=aqP!O^{hlTu6IkGgz9v&uau)r1U@c{ zf45*6#27@?GV>HLSIpUv%DyRq3eXlz;)QSNNTTk{uZI;u@*qPe;A&acxTb;xZbXuU5M6OGp~!_y#2m z#+xy8FIzQ|BRM~>#+K0nMG@^nli7>~wJNsdgw>$2tbq+A1#Hd_966c&-M2sbOuW^< zK*ax6#Q+bn(VOY(F87k2$&X78X_6^fWQ;lZ4IER3Q+G~;XL~LG06zN*koDm#kF!(U zex+`Nn%BnF}Y4<-^xN(V}F3l*%rnguc4w9!rU4x)v?XxzOutE&MY^)qL)lhP zm0<+fv>xF|*u0sFb6W_ObZ2U>lla2E1#4@TrgDxMMhcO;u{VNKOaAPAlW_5_x#}07 zYhO$)9^sY8xYAz$Sn0E>L{+@%7A*sS!34fjz)tDAsV{{KoK~`3>~tfcj}5wHi%7>n z{LALd<(OC9gWXt@vo3-9zEP}j&pKs-O{mIaCr}z5+%M8Z^dvtfR*8Z2!^2CbpgwGG z5DQ7rchd5H3HkWrO!H9%Vr)?o3^b`GpdjU+tj)yc$k?A)xkMkc3) zadS`~;O2MxWk1Mb#btW*YLr?OrGum2_6B6o-uzYgd~mApg>#buQ~5qc6Uox2BF_Uk z$*a#_By~2b3P*tIMe)c3bgS){+0PP$iaFVP0(_%I!Wm?wqw0o z`q7O+nTMB^%RGhVc)|(3P6fl??7^iDil~($y{c$^rE;~UF!U+YJ=i-4c;V=jTUx464*bQR)GU!2t?ksY{n_Hxb_+8W0~8TkTDXr4 zF#GXrnWc$`SB`;5I-`t>oBL?X_9^G+5^(~7bc$jBoBt?RRRXv6i`{u*6=U0^k`uGo z#_U$333}SsBz*dcM9n#QurGBvv1zVUFybt>w|!Q6+A6Kmg+@7AhVjRP##;-gRi|1t zg+?gptF|q+8ktQMlglgx74#|7(d5^Q<^*!(F1tw!UYXAq_B`=LMT9!+H^gk)pkf7b z)CnzSTTjg~(qoD9bF1n7$1wHJ^FIjuk3!&IgfgX~W2XGJF-^NImeDz2P2Y(B6L)b6&&CH+x@Iz* zrJiZZ*WWU*^zry*aCWM^E3Z30j=7u-uB#pc@N#Fzj1LwH&HK(dJKA9s_ZAe%Py)`d z_8V_+o{D3P8BV(sQ|_Dt?0tykgE#EfIwQtNJ7PEYbA{Q>bAKUB2UjLjd#mqaONG;v zyXTb_GT(mia95G{h_OY@stT%{^X`X-&K1k(M1Hp>4(rCmfohnXrKS4{!Sm)flN>B{ zi%D&upG1F5l{?<@l(J^i?~x!>L!(hVpkj}Gt9nz-Hv0{7#(4BI(*1idUszR#TBjr$Cr^wmZ8T*u_VrRV*fyBW|Vq@I5npO0i4wclcib zu9;1Gy@fVH{F^XBA`{7Pir-YIU6T00@#=INO~lwUbHwyB~lC*<_b(S&72*fU86R*^&9S8Ip6k=x0HMiLcisnmy06eCd^1lg2*S=&qsygCD{Rt=&q-D|&9b!2t zUUESRKrwk1m0q;i_GXz;Cp#4@N=Eg40q8CHzTL27Mwi~q?FO99qiJ}_Pz41S+95v2 zSi=^DH&(7fww|W+D?c8mDq!pOJ!Wln91>l&WK#O9Mhe+rB3S>$tiTowL3kGZzBR;* zkqouq!)+xWzobFia~kWh!5Wt^Ob-*nwl=BiuSzNu$>`+eEd|i*qSeCUdsmp7YmrRT zCGa<2%$3PZZ^G$NCe*z)is4?rIeR9H*0#eE^Rj3RI`oWNH1tg@9AjO!D5L$B`rBy2 zp&gZzp<-3XJhulDd%7Z4Rcn+3%a3~a3uXI8*B+}qiOT&ZU3~FvVg8%>FyFof^(GO% zrM|PZGI`j#|J9h=0g_~be}q^MF@@dlGr^>Y;%k5f)#OxCnttC~oE4Z8-=5~~uamo4 z46JSVEtDARQr(=?TXw5EJ7UUds=KQRj*iTn2OiLKF!Z<{llnru3|1MWdJ%1_N=Y*~B@Nne-4+Z~sMshpnBxb|a&FAN6xqkuTJh^w2cs3K( z9;LT|QxC{XxuzW2x}=c1kxNBVB_wGTaR(u&<3h1|#`89i(9GM;b zU?!dS^*T$Ypxs_o*Hi5wFgi~;*SEqZ1v^0Iqu#^kw6s|ElDdA|!-a@O^8GCU-K)4h zd$QyWd_{BV1sMjwPhk<_Y^#LFqF`G`>x`uy@9I`^Ixe)c`PN9tKghRSw#1fj>*@c! zq7$wWuHN3&#PM}&;bdQkg?1Tr% z;G1{kC#0yT$h2869v-SvAZr}o(;KTvovKjZXw!sVR^A##8m;Gke8B1(t~ z>9GBCmD=>tJXxEU<0Qu>jr=1jpR4C+U}js%d`X@|Ei?Hp*FZ~QwHnxscDAjXot4IN zqqwyRda}HhLIUOWP{!nx(3&i$YdOC|Bt|a}A~+>9CK+oVH<7975}v#m;_ym~|IZbM z7aT7eFxWt6bzj;+= z`fKtjWG#H=8SsmtW04K=d3Ax*@#+F&s(vj>c%Vn3F|hOhV(%@0+v>V(K^bC(m>K1m z?UGcz;Bj+vQbCdnwX9W%wuF|%VkGBf4rf74an)6+fmdj6UI-*kUfs$2IS zohw}}>6Z3cd+)V^ZWk@KNi^gD7GEWX{wC2_9$dgMNdUlJ2zie?)N#>ZIrx1gXO_(04Cf=@c za{!)lZf;5{7p%Q_1%7^v7H}FHPmnlKf@tbcqL+A+B=*6}S@IGL^z%)VFeIA3n@ct@ zTL;c8pi-g_JCF$cBnzjzc@jM?%vWv>$D=Ys>pdOfpzJJYp(&`jqu z$7%m@*$75D5g4#P)Z$>P;)$zk`b{xJ@}Ln*Cfz)}l_c!pc}eDM2^8y#I?~zpLgxm4 z)S^q`OMD?H)*=(<%@S1H$P9f-*BxEU~9V&f-+TY ziJ^~|tT^xYffsc7&XL^QRKR32Cu?$%=Vq?)65kS+#ZlP@mwV_~)$kpAm7~44LkEJm|7TyiPI*P4G6K6BuVn9Ok8)x zOl$BOGc++Vhf4QKn5_I11Z<6X4Hlv-xT0;-`v^&uoa3NvzcwnM3b5gUG{>nizk_)L zw7|Rp3|-Gj*PpLp0+iAa&R|qPDss!i!GngVg4tycb;=NBYQNVq`p|spsLUBYJS)6} z=+GiBhwr05wnF1~ODV>Q#P7%sUXxP+67vclCz{Ua!a_E40%P3_NxY3`pdd$WP>KjA0YF9wULmZ1*jH=6!ERVDxB9 zhS>-iIr(LdTe*7+P!E-zHr_RcJak1QYpAoad1l#Ob7slEAVv%oE*Qu_z#dyDUHa+5 zyHtnReuNm{&+@kK7FV{CxzaBT>*S8=r9)lFp;fG?VhFbv_|=-7yXD#S`o+Ec-MD1c z;Vr0Qw3mbqmroUz5+<`wTY>(#GO!Ogdl};)RndX^BP<^ zC`eNmA{b|;4j13arlbk0ypgx~)&>bbdCTmEl^YOV0NcO7Bj`7t6?`BC4Dij2u8(r1 zokNk6T$8MuJos)nQ<`I_MUr!p<<#b*n5?b-CDr*K-)CZC!v5rdWgGP}HYc{$Fq%BJ zkr5bOTSy${KNEr9M=DB3ab9Y%YwhI{4WIlEd>7Im3bxAhN5xVZ2dMQivY`l zkwF{C6V%@7qTmOHDUT*)FlgPthQ-lkTO)y-qJV(f5@KoX6WhON!j$c0Xoimn1GOp*Y_t9(#2gXED>CbY814k+M^rJo?DeDTgV%aFra&BB$Y}2AzlTFMj~b z;`Wjogh<|xEyR^Vc@ekEqjJ=5ZRz$o#Z5aofMCQljrv%Vf3?}Kz z-~?ER!tsI#KqVm!dMg81638ZDYrjN~Zd1!vR)!f;%UV|9BNSnVZNWI9_v%=5xJIu4 z?g!$dMV@nXZSOFwF#r^U`kV~Eu&*m!g@j)Y(WuGce~PPG?+Ek=J~7Y9(U!h(g67HS zV~;i_S(~4Y)j816;O)kmzJ&rR83Dh`sZly&s0aJA7? z$ka08kS_IaoA;IdT~o}@w?~&K)w|ROcR23S=1174*Vvv*62q22)E*hCt3KTk_*Z77|9q^gK55~C4KQWm#Q7nxmdkhw#dQKcf4 zFjzPEO+^Y;q!y@pmKGoA=UCQk$|UF%C*gV`-rIOxW%v`cU8~N|HtU&1AF8dQv8vF8=)QEz(xe!STRJNXJgMK>9Ejw28=}YAttbqxWM5TXFd4QCN>bNSuTz|=BeoKlB8OL< zhiEI;*4B(oaffLo(E!5~ED7^w^K4-3aV|+hzIro;8pCVVmL>RGCQ4NmH1amAr>=b0*@F8q+f0vX4 zy`*}&T7A^caMG5hV#Yyy#V_WZwvrfr%8%(N^=`l%?22EE!{vH|`H63c$pftGS)as&~69K6p#eI6N1xKFpFI97+flH+D65 zDlA+e4XI4U_a;p|EZ_6%jgnDFzZXY?af4QcQJ>7Jf8f<)j^QVWDsX9y-AFWt06 z;6>z1&^_@o=`)>6RvFtDpi6`q0Mjv^2G4YWcLpAQpEB{EL^D!X0^s8vm1=ayZCAn> zV670fHN4sVZH?nJ{BecvLMj0OUmwlIzHF$JfrSi4{SO;cHfj$#rr+MbZ2Y-oG4@77Z%aj)umo?o@cVb*tjv1VIuit)yLi=4r3F26Ec@|)XArylBPHZsg- zETKiB9ljO?lkuH6YoJ6mQBqha)gnQIXzF~Uima={S)@7#hWLkH{*`3WI-t7!Z^~V$zf2`aULLgVJh)G_fsYf9`V^Q0f8bzpAr{8-*qSw8xP z8z?xtV$S_v1@C>6)pR~yOw>syQ0>|IcvcV)08qY35Cbqwnppifc|0rVE)QGkNN-V9Ufp8Pnv60`CvDn5%6tkp%iV95?y>ig&&;@c`+|C?NI zr*V8k{ZW3uBVPQZU!;c;6)G5-gox{^= zp)zgBkfX8MjYMZc+=cfwEfY9Y+pu0lx{!+F8vU(}bE+=3dNU)!wX83=`iV#7L<@1( zb?N2+zl(iltB zb~cEssIaaU8u(8vuxjew!yV=+$Q9NmnmatfGj)+qnNM4^y%icwdW9Ywmw6;k3T4r3 zuF5_)-AC)x|SoP`DMGycQF5sh8;L3(;)C^L(IvL{LQ%TV$$bEV1oQRCsDYO$*XdYz)~XT zCjh;#`{Y%{6D|9zFL@59jLj#v0WCLGy0268#GZbyU07aMRQW)dtiOVs+g2BW8tJ}m zQ1SMjL1a5EJg|}*ueINh`|?J64-lb_VB|P!{7NXh52KVW8UR2Rv81masxp+tFdGj+ zRu+1;t(ZPY+K~^@(HE{21RiT&9`UOwI?%N`?c9Z$I?hrslkD1N>dFRKICQ9A3^>v` z9?;np`|Sli+bF@`g`e14=%Kd{W}yG3X?)-|`25 zawKM0=WIs5W=~^EcscQIhf3wCYU{Oi{hkh z9pgAAI@Ux>BhiLWFrN?_Gkew5_f9K4;)<}fr_Dr6eG|xzM!`&#LZ`(K%t4c)`ASEAG>{Muijm)mBDX!GkbSp*~^$f3(SE+SkL9BkY zJgR`GQm1PdYnGc8P>Tf*UIv*&>^d7Ow7Ab}vBDI7LRR>E>S3p#qJt0grs#*zNuM!A z+fFA#nA?hH?8zSF1GOLB7uQo5vr*MlVG|))_`Y08Oc`K#2Fe&S3pjNU{|Sy_Gzt4O zKq`egf>xk@^^lY`KYo|DysxvJh_g=J(i^jR@CPujKN$rAm$XF5Os~Jr_@}Y=S0HTc zcOs-7Kl|AKC&zOS>t%64kM`%c?>7IDRJjH-BoHjqEU{IQB+ycB2!C1a5qP6S9ar!W zweP1O+V7bNmig+S?!nOM8bJNMvwp77O262ctLRSqcHHUV3+Le?l%NqjiSu|6L*;m0 z$=P%lY~CZD#`0M)OAab%))tAJg*_^SbGLB&mHo{e6l`A;)Di&@_u@`)P4M^8$mN&k z=dYkOHjNAw{|-^`vH`@Plis?b8Om4|NKAkI=o% zH(&E5lrc~|sCP1B@@?PDpl5=bQF@KzxZvnqud8pIYkP5>QXOkD&^UwJo?D0!u`$uU+*|zE&&PQp}}I-;a=bCw&}31_USG zFLD;y5II(sT)o{x5m>_d5IRR?(@Z{YGCF-RwD_&+fp`8O!aX>^?2(5s)G8Ls)3Ti5 z>_y{{RIw?2L2Z%?Wf?Ag2uM63y=!YRq@tJYO_%6F+-m1G;12ofU9NY$sN8)HX?Iv5 zZ(UogBM%gs$U9Ri_ZdwzDvbg`q!yy7}xmOM#20US`EKR*~Q5a!H+^ zpT#aX16N6cgm8V8k(WUCH;8AE%tGzS}5f z_HDIMXUNA?{01iFezQ37hik1*XATg`RzMmyy@0uv^(}@G`L54GH6M56B17XWDwj&_#Zr^epeQJXWu9 zYnPidru#&Gxz7kYmTjv0cGjQgd|CFHrB10Mu(yuptUAy8@<55DPX0tZ{}W)~^DK|R zZ+4NJl+n%C-`N2mm^xs*QtRciDNE4%rxy5J-TAtIhv&;>W{HllH@##?RPiyJS)5pHa4P(6&J$q@VgrL9t9^lc2 zIZ2A5@TxA1?q5f(d^7`+4B1SCtm}n*WYmvqf$9;6$9|h(YTUbv6HnXAH+UWa3Og7FmsnI-`1Wd2%Q^-w`ax%CQlyg;bfWMU$wSqR zTC#rr__&99e5%VlI!Yo#eMP+(ShmVL{{^oNYW*6Qr3{iXZciS=q2nScR6RHMPld3C z-rF#POzTnFwxDkeCfhQIQwHEV~B2$yi3+}8PzjywyMx`L@G;v0Q z5~=A+a`pTQLi4Y zq^e#cf=RW#4K@^4HD7^i{IQyVjd`}_5s9=OCPKSzH&w3{-V{r22k0Tkh1YE@SSMi%98b3d|xCr^6t)=%1h4xO3Fggl+L1ZV!^}+DDs=Sna)Boa@BILym)^?bSiVD=r|M0*)Bf2+F#W8K=%XD4keH?Bzz}$QY}YKa_r{UxLrRdu>i?- zvf-TS4<(c2LiszK3sjNIo*VFlHFXrBZ@6T1k>jE2FHMhzNOp%vN;xc4I0e%?hhFqn z(rx9(Ehlw^F!2Kdm;srK&JkT-MRc?v*5)^UA&jddP`tOk=xh*DX>cl|kvNH$2FD@(xsRlLH%ZvmEwQ1l>qK~w z1gvdSrsD1X>Q&CB4ECtR)eVS66hY-|-=WM+D^p%V%G1b6(d#t+Kwj>tip;}Y6m`f{ z_XTBk-T`cx8tkqREd1Ih`Ccq<_>~jiLGVdXnc! z>drWncGl@dCcbVJe*d6ONfrO8WqyIv<8PvH2ig>TyKw=xAEH`Ib@gP0Wav$#$ACcc zXWyC95}W1dB}W#gB4KS)Pm;~s?KXRd8#zqg{V(wMXsF~Ew;)leEhvaoW@tPC$yZ}^ z`0S*?+uhIpbqq)Rk73$vU3c1y93;gh`2EIKo;qK&C9jJSc;Z6p-9Z)5m+0G zXO%vsjmg6|n^Mlcaa-Px z6&wsa1M~Om+OL1x_J~8$@tcBA_=yuFh&=oKK%HlW1j@ijG>*Gzj=TnJy*VDYl%pMP zJ#1d=w6P|{_0ri*x{yp-_OG0gLcLhBB%#d7v;C~pnulJ#K0R?m4JiA1b6dEqW1K5b z_jQ6p_)$wSuAa7y!`wLG-DcG94ryc2=Md0$_V{)S=k}SHx;XS6SNdj^bEJ_8gpEc|GWYf<- ztw7(?f;S5U>CAShl^qckE^x9stkO6E92bhk@MhEQ!OCOh(AEMhk&fFP5~|>8aU7|e zS(X7}oR+=1>B_hUiit$NV^oXq1-k}nK@Ah{A*#1qDqi^moLS!IiL4jJK9BX%^TX2%sg+fXy^Ft-nG)`FECol%QKervXP23TwPyd=6tOz&Z4W9fw zzcbC#`qu0NjVi%watG5XiyD1->MyM%zmY-iq=Us-nt8X1VP_H=$YhaV)uC;9yA{XZ zXz&47Y&qE9U^ZN3l;zxy!{;ZZtt(38-|P%^2}mBN`JBYU+od{#wEY{!eqoqM^tAwi z1RkI!JcMXdiWJQOssY!6d#x>16ss@5M*yv zF|mT#*L_{Se%WXSbLaTPrB3R~q@|xL@xW^a*W*MIJAN*|{T9sl%cVUW^@K%DW0rQF z=3|ny!B-cf^L$m|zq#R-MYFzj5RrLH7p`M>fHK{ps!oqgx%LN;Ye48|ZPh}S0^Zzo za@99`z#RV`VPDu1w%?re@qSuXwUq>UrYx3HT6^bJW&~PMD^z`6XNfsPjvVgDc8Wj) zpP5ku0oL`#{P1g*)%c_|e5$Uu8XD5KNML1sfr!bI*e$y$$a2+IY7mi$oT1Z_sx!5!-`#d?hNWrO|2oYdeheVA9X@E`9N=>;qBWd+Pi1Zi=JE zSpSms0dgGlY~joaCyptz-4_Z4!{)^5Q_27;(@xqPs7 zzxLwUccf9VDekU)Cy&7ynyEN^2w56MC}07;Vq4YFdz`P6$sN?#Me&#NU8so=kv1=2 z4YbCUrE6s&H?pKG6MdynZq-!bzTY@OFtI~gBldIyw(Tce_Q-T;P+u5zlr{igYSce@3`jjcvIJ{@f`Jp zSYx`U$&4rzzn*b7V^M&zw2pTJ-|DPmS^pNRp6j>gN7lOhz!wOg?TT%-Ni5mh9>4W7|eCvZB`pQ%|hjFL&=y@05M2^0jf$_*9G0)ro0* zm;TP)f-#FkQ?PJ?-+u=C51=xC3mdcfE64&*zJ5~h)u{gzk`q!uY1-)G@8OjULWh<^ z#j}NQH$88^pMYpCZ%1VK%m zB7vrPmPx8VxGI=`icqW}UscoC@?onJKitrWaeyZkZ2x&Pf~x8 zUM2PwvO(s)op`Wwcv}#UJh)eXuooeUc@vqOeEZe$*Z?cEY-8nU*gR~GnpWDlXQ%E| zpRX9MK7o99laWGp@>4T_UZZ>5&a2>~8(A>ZacqIR#c($$+8(IFk2Ba*-`^d>sgL;N z)1>cxren*T*Gt9G1v4({|V;Y|gFva0IWjg5V4 z3X<5Q6rN}MbO@dH>q14`ul19JiSg<${aogVJSko_H%GP_CPx_Z*{QbJqF|dKlUT5J~$?cG7NP;56RO?wYEB zxAF9~0C$YhWALix8W{X05@Xul^tLHyyRAY*=SJ2v>JLCE4|l^mqWdApC+f$!>5}cD zN)TO4Un&v$#NHo(=l5pXiX)$paSj~%_hx4KothmV3cyW?a^!`ArfQ9e3Eem12e23+ z4|V41zMx}+ZEPf5(*^#Z4iKnn<4=mZNAFu6@|IHj#Q#{h&ye)B(oOqPwwsx^&3ac$ z@m*9i_SU=%a2+HR$-yOYK&9xc#jCku!zJ;meSVUZb>GM5T1V-Ox_dmfHrV zoUb=Nu7M;7U8w1w<}cC+286DmO@QShmDS+*-Y!fjoW^x?QqzmFTvp==pO zhAPeLgB-Y+XIuXQsU{Bc-z-h-wt8+5NYuK>??`xz>u!SVvO5PBvVj)wv34vKj~(QV ztiH}L9;0-!jsv!cxfp=Ig2saT+uti1?h+S*%LP{tSvA#GOlZtgn_}Tdw9^sUi|Ycge>$`<9%{DMzO<#MM>+~TI#@7iz)4ia3=nlf+TyQ zBkR7ZzS(ZM2)w_gZkM;3?CIAe64!$Bvr$5;o)pC#kSPGPY z8~R`ptGL<${!45_x85QepP?-6dSMs)PLb27_c9M!Ow~5&9&M{3BnGOY-)RKZxJ+S5wQwu9U|s+d-Vc_uyh~nEa*=MGw}K*DByBJ$ zh3rCIu48AGAEd-93|LsdJ&2FxYopB{?vrRyCS+HIHIF$oSo;A;vZ7U`i+0c&Ryloz z-H0w@dhYnXPAda_>xUTmki!qsmzGoqyt*qY4ebd-h({;Fu$JNLMQoa7vp6R0!66b7 zgKoFsYFdB;hm)5;g&|$Kbt=-{?rZPQ-@wV;-!>Fly_L!Jx#+T;45Y+G$fM?FuQI?o z>?Lerf~ZURx_z_1i{~5;vTn9zcbZzvzK{dD{YCGdn6b&Nl~?6#1CF!{IA8nt#2;(& z&a;rau=_yi8{fL=$vwX(YfrfpDQ3p$Iy|7xU%Ui+ywO%JmY~IJsXRly9xzFF;n0b= z8rN%XY;MG0D~NkNL?`vlB@pf8N$3_?HfGg_2A`WN7b1I8tzc=8 zHSsvygp&~DxXL|Qb^%fu1}6>f8tM!P7_<)orxwP;raJ1QHrdy9=zJ){TfDC*K5n$3 zL?gL;Y~75xrmx+bWgq{+7}KP*F^Rlh1`~~MfYe~q=T}X#_A9b$kA{7f&y!vhpUL(` zY%)}r48BKZnW^QOMAuzW5?Qn+=#%*=X+BzT!r^~hdsaLnSIjKX`+i;DlByTb{luat zl_d+aa`v7WT*fX3EfvX_YVRnQhoTh~P^Ln3@gM%HWUw3D*Bn;fHON&prcUqT1iwby z4e?F2~?5VU+L!MWBx`vChub=2S^JX5ZHh;D*vk>`QJHy`>*`j z|8rgRiY?GeGB)9o{FzYEP#FL&R$!@v0WEP``k${Q%7XgRD6hl#ApCjqpSrX~;01X! zV)goy262UXP$o%RJxNv)697*(oeNcL*L9IaI28^a&Fcsdi$)il1=RszVxlp#F@>Za zMd5he^xmxbiBbpXDFMd9fKo;%+ef^(3~Q{?f5l zlf&b7B;2Y1W`3LV@j}JjSJ}>NHS4X&k~j&@<~t|OD2rwsxmlnPGy${K2-9)g-O({v zWO`6ZVhqW;DGyKQEn9JCo6>4+%1Rks?O}3Wewb3p;jVb5AX@h>{X06JtRi=_hw3Sw zKY;e8kax60^Toh#47E2e>ap9|lTAb@c~Ym>UI$d4bz_dz?jJ*H((~KnHgxB}rRygw zEgu&ykUy+yD41%s@MqhXnuSR)YsGFHlx^uZN4T>rx#MA|il`L4@;GmY6-vNH^J95^ zQ5*jNnCxa=KH8nol}B#?5nl*AL>1UsY4uXDoi~68$4U#j+n=!pco%YSB9r$`6gOD0 zw>J+i%AbH0)}DN>>k2k(2aU_b;g-mPJtU%Dy!Z>6&3ti><8>b{aW0W2A@u1!FZ8o? z&0M^V2?%-oJep^1k^}ww8`y<#&w6P!t)5{D2-$Ff&%$n>7yP-L}M>2#2F% z-4TGwtQHo?C|oYU0j{S|eXOoub*!x7K2(Wo&WmCx!$3C|gHJa-+qE68{#G|Cx<$q5 z@CqAD6WwqV{z7pqp1p*hR~9a0xig-=qV;H_X2GbYj6puTXK%=O5ce?^Wy}E^`8z?NYLkJ~k~DrfofKBbuI0uh zNN+e`%@IFs=9`&&8sCIQ#Dhn*sCe&%-o_BmY>_k>l5*<^u+*QqBrTrRwK%yA@Kr!9 zC#xwe3jTF7*IunE`CL2}IQ@1Yh3~sns)RMF|2NkuH<9gnNGRgrjkR)_sC5gFcTKwxM}1=WfwmhE3D8QHS!d) zvPaDNEG4YI?2&C_jxOV(9y%NMlVI%5w5KgMu9aZzmrP;voA5;p{T&2i z=yf$nYU{Ute{uEQFtGL-tmAgP#-RF{=U#Q_5+0QE%rX^fL=GeHrrc$Vx!G3Ri(>gt z9agWvHGRv~7nute2GfJtx6KYdn)IM;f$CjtP^zT~*BDtabjT-Nd49&%B5JNJeY2Ig z`vw#)zRah}s=!LtWiY^(-%UD7DtD~t2r`qI(G;E@fwgfNxoQVkCbb*LA_R3i*$O|( zTTMp`m+vO@7tlo@5qnI+sC2Y}EQS!vC=KRcPVXwz!vS&OO*E+tVQ*qXfiWR4B{BcL z)4W9o)1l1#58ZzN;-73+M%-bY>fwoPSid^LPsZr3P0D|iXSVGjL)GxEfeJ>^!lVlN}d^PWfI9bIUi zuqI#pR;x+drQ-}=?7M!;{Z;?xDmA4U1BMNka&42T}g|4hsy-Wh?>jW>re$}L}Bce-3Qu#&jDi`;x z0N7-5n{~#wNGYy+{1dFI*ItLM_hL^-yl?FK22{$BM!-k zc(9*?vaIeY%m>BYYktRQw2vlun6~=Z^KF^+SxE*;wMRJlrb!#f0owS?^{TcPm{55< zj6l)V)mZ7Oa_6et+GcH%8xstmiC2ND+}WzMwp$mrm{LoixX!_h3J9kRsO2J(B*}yN z1VEX_apo(`Eb-1+8)ZYJlEgC8Dny0l-T1qbl4wx`(h^9FWwYh~0I8ymBCUD%pHC|8 z9lV?`&eeRTUPYAz^DCWC~{7Q8q|-Pgo{JCsuB=5 z9!t8uf7V^Dniu^$s|l{KrCb zhKq4^13d+A5y%`HELGP=uNufY|F380BLjgY|GZ&iW0Mr^=NswTw%S%Y6@Enp`oBMv zwhy%QaJKbC-1Uf)*chJkSiJGHBy4t4l(yQ%e2V>7PtRvHXxhf#<^BQCUVne~uXzc& z9(sZ`ttI+wymT%80Z91lyf6d~cRKz7e75}qC=j)|6Mm625PxaS-?!E3*|=ange_yo z{s6djyRcxVX(_=xH7xJ5mO+)vl}Sg-c5mzL-_P*B9FGF3yhfhQfK z*q8s;8CmF6zKPl1+bMyyTi;U^Ud?w7k9VAZ5?^xK7%dGsTHm!;_bQ?KE#SDPrasS0 zLyeL5>Yq32FAKAN8!V6SXUAR+a$27nG{hl|8~md$Q;nD6tq1Hdu$7?p{Noqj8`(rA zPrQHR|KPQIE%k2rUv7CC8ymlbA=Uw~ieaj>xcva3WEmM{xz~Ur1&wI6+Pu%qWXINY zFWaArUvw(#Y&`IO?0xXJr^{cJRfoNtCYxffw_mvmqe*cLyiim_ zjkO0oQfJNxRa!y7DHXpFGvBmPeUtZ_mc(vBVn6_antYutN*=e1DqYwwL(sEz^&>BVX)cjoIyJ zLE02(8b8ABt9K*S_VSSZ0vM~t$3a#O&?KxXG%z?SbZ0&n1nku{!oB!t@t3`sA1or% zJGE{OXBBpx%gUP&G~DL|2nUnL%}+CjDOUoNIRH>C5lc*Px-;UIytR{_2OKl8>HWb~ z)x$v25bY1Nod8d4pOW&Ug*H9On?W;K*RNAD^Y2H`bemF-o-Iu@Nb+Ro983g;txdR90LuuEn42&K0a-|Iz%Esd5T4P&Y`b zy_#dz-sBb4I|KUUB7wtl*xPw%wzXa;fz9n#Y zvg8d0-5Fx){?d=bJq#y72onYNU2MCQZvUS`JzRgj4u2}aK-If|8ge1&DdGr*k*9+%z{X=>ec-a zwaJT3zH($}{)O;Dl01gMg6+2TMA-6JTWk8g2fB8@igc3Jn}sV(SB8yjIv+wcVo)9YTJ|pewb=zh{#?VbJDER$57ooZByC7n$UjQj z#3*q!E$Q8*>9p4$fXMtXI5x(HR%Wc4YeEXv5Wrh~vlx97Z7D0w(&;@^UIB?U=M`mN z=a0v$&R#(X4w8~SQ2lH}dos!cw9BqvI#lBVrm_~K2&aq2apo^HR9_vpZ`|F6Wx9r} z0iJ9{wOj!@L=1oixThmQFlf`LL5bXM4S|ECtcT*_C@O-E=-rV1f!NEuh@8_~e8xZl z{+&O7qY9sxs4JnP3fOOe5oTm^xZtVKWoxzey+fMeygJrVA?p`iTK322guc?P>*_TV z^m`R5rHmr*2YTlgt-L3x@N;)zGnC$8DZ^gd7dCI@Sko*6u87OJqa zZkiWOaEO&fU9XXjG(ssCCZusiIKwO`xS>$J^FE9lS-js_Tte4K?U+KNG=(e6pQcx7 z3lQVf^KIH^@*V9lEA2vfW0e>vj^-iNgpu{@Z~k_0Xk(N058@50a~-APRloYlks7>} zMZ4&m%4@0giH+#^iuQgO>z(Lxr8pB}xJImBFNYp^;$hY{JG3}NtEnw4@qDy)Gv0J6 zzQlF*9b3b;I^$X;*O`HL(+vA*`a$g6mH6cMPde9_8m2MJN8E$P`m~nJ&Pxw3ydt`( zqxMLe)NYl!rTK1Wkvxw-LAzlAGCiYw!Z?P(#cQHB$=2@FXqWH7-EJc!;3y)l_i;Y; zp;|o0;q9{^Fgajm5G-++#Jnf_H#IWG|3?o{xnWmFckJzp=RW?MmhYQ>WtK^bsX<}tdzImzpH?>FWB6H?R)?eKY%dD61$=08c&}S{Qy1Nz!!q9)CMY>&b8xK=X-n5 zXN+H5zffJ#4SC0~)ca9@mip&8K5qZ9;WDopLjzUwX= zt$_=$h?|>GahSLJh<)~N&)jQ_M=|gU=bh*MarixepBBtdAc~~~@By%-&4|#rZxb)7 zFTO2(zFUoISyjIh6oLB%0PD$|g)K&y!p!lyvTt`>n0K7jrgqPe3*f75#Id%c5cm?k z1zO|xHB#a;;<|%4YzB+YP8bWgt%;JOGhWPfWbPw=qaYytJCkpNMLvj?;WrWe);(D= zD9{{ zx`x7Ztg>+$hQVhMn{~zqVbzB_nwkFy*L4yW&cXuav zaA@2uxO;#Q0twJ~fZz~Z8gE>K2My9V!QI_6{mtCpoZ07|z31M2=E&^x{pyb{O6aQf zR=vH}dY<*H*3nYG3t$Sme@sIfw&o)7YQl1?h!^Gf=!&z-Kze<((TH>XU_k8LIAZLf z`&908z;D1Q__C9e(y{G=`=+*Oe9H9-9C8c1H%pC!toVO@m^ZfDYrKbcKC%XJ4i7Ax zh>ZjtR`OmhJ~`6-*#8aaCbbJ<1F>F zTB-%dFw@*d&X>@c#o5Kv8*tn$aKSerG|+p;>g2K2^ZA$;=4?H(I*Sf=wYOZ<+&r5! z!x$gLPmWg41Pxi6=BCU3a}U1(97`~e0Q3wQ^2>sV0zWBE{~8srNoS6vYZ2hSb$=4o zqhI%#J~AYy)ehmCPkCEJm$*AOur_dcWSkngs114R6{Gq%Woz*5YobcN73C+&DM&y= z+;0GtBjx=K>{FUxe?jswv?AnSMe+$zul((Az&UcwZ$K5yX}HAEW7#tPyJp|NZ_fVn ztUoRAS1j=F#hFVVDJjhQP(n80n;}8hCBnC<@gh zTEIrB(ww?1x!gVa7P{G#A}uU5m+x~T`}i>9xD3A{&NW`9cB)JYm$XB6|J=SGg1me0 zl3~x*@T&aXc|3&;Edn_*oX~+$i}}aiQ9NGmwMGn_TVU$PiuwmfC7ar2L5|HAWHAL+ zxdRS%k=~BOMl+3#p19!{iewEAMO8s!g78ck=IEKB&=$1z;+k1Ln$)lqdKFIM0a>84 z(scb~Be_yHZHMSj3X2RYkv@*YW zN#Yk zI!MTtXMWzYNDPuQ%sPUJ*o>Wlegn|C2#}V+?VAj&C5ejn@yqS|qDYhV8`q?Gt}n*9 z^EPurv^CndV&a)5Y0S?7fAni+CQnIl{Jil}21~7%@l5C$61ff*SzXGQMp|P*u)5`b zVig327Fqbez?JzStevS_0+aGFpfw-tp$h9R%O;tUJ37f=Ny3m6R3mOhi&m_Ekn6ue z8IH@b3}U`o)RU8wcDJ(kT03SpPkY~GPm3XSpYA9h>oHgn)F;hf26&KYy`v*FbI6YEO^9 zvp&Rn{Muy27G?PWbP@MGqvWpBB6Vo&(dQucs@&KorlcX`9i8PiZ2}Ev$hjl(_rnF_ zPohy2#0mk*$E(?*pSAR3HT!j-;G$+{Z|`fjN{||UH4%RnGQ^er!Jt+NKQAg~)jUB9 znq~jrlZ`yD4XoZS*%%YhPLs&MAyP!7@7B`Ng394huLydL-ibzFYT5=)(U%FiQ;cx$ z z@2&|W?!W!O52Tgk8mIdWP}cJpPbI@zW+7*M6Q)%6Qo40!KJqTk^XN^ShQYC2lxp?Y z0+xhquH4HTaZfE{CblB_rxgifpik&J>dC?$M^UUWP|FHPEkDK`XBttz71*kXyPTB| zeA@1E^WVt%k(Yu%8b)IgwS!X0YQ89Sk+wo_FA%=eBj&wv0hUB_)bk~oZt%+536 zh7LcPMy!MeD1EgOxx0a&;dqkA&jRUWtQU-6f_7J)C8Gg8|3#Vz;v29~fq|vct9*bQ z0lP$$bxp-jF&Canv>cdv>L+>m-5%t`=3!T~udYCaCTui;RrvFu7*E5LGdebM;!1^Cr9xyO_2_qc5GKy$y`Xow+-t;oygJ>)gu)R*D7cLfc*CF{SdT9v% z`Ucb4olvHUG!&)8V#$p)bGnVO9J5A$$d#q3N4Yo8%q$g#(eT$R7$6TJRCN-V>5{ai z{07+78oe8AHTCH8xnJ`i*mkV^d5D}=Xk_5}Ycp4CrMTU(hcX#$AoGYErsQsSjVXd| zviMlo?W(;}Ii37>TcuKc{mtmBh~!rVv#2qb-LBi;nk6H7`AkW4Op}ah1gDTX)1x+@ z=|+C~*=q)eW#B!vVjH8=4}iVFa$M<>!a00}A3T2acb}15fs& zs(<027UezG0+?*ks08E4QH8-GY_gJW3o`gDd5>Dfcbmi8P-Ex%$c%h;WebZWc4^wC z*58dS^ksFP7LX3ie1)}b&5f$>U^;W4DZ33p%LfD{nxgCgQ~M~`U+bD-uj!a$*Q#_q z?oZz4_mxDIuRXGW;I5VYD6=>p=}kFdC!;(ot3LB}o+kTsKEJ2DV-~!f{0;c73)7Cc z2ZU@E{Myicq;xK;ie9-sQEzHA3Z+#vB-BA_9+0>)>%LwnyKO&lfmK6$H{tSy=h;XZ zWqO|c)e~yYU|`%q$o+%a6WoE-y696nic^6TY^ zv(eCu*MzkxQ7L!zeJM)Lk20fovh5I8M%^jt1V77y{+|5;iV&84FLnIA^hJN*IiKf- z#G8DK;Hb{u~G0@qoDReI&yf zK|78iXbxCe%7~A|A95xp{lJHv{_-cWDho4+7mwrr;mnT?v{lXthDnf@%Sh*ev8vSS8s zsJY;r{H|+*#T^zwgL7_`+st4XD#CBT`=pQ-|yo;F+`~iu#6FSJe2TFdX zKcoDNfqWvD&5L~0p<59q49hxBw&QO5=bpATVxWzZCwQQCW8g`S{eRuo%o!eE>a1*u zyh)OUKh zvwtkv@3bRdw>WiS9@a8TYCN)arf!~g0+JM z_o4N1+m4Mte*-o#t$qVc&;EELTZ1T{rJ#~mCzQ`0SpV6+O5GpL|M%c+e*&NXxEAo= z>65%_aeb%_IfU%w;Qt0d_d1V*wEmn$yZ=UqPXxwe|^S#0%>DhU?GJnQK)M)(vW}q+{()lGxwcT3+tRDlM9PK8+dbK0aC8c{=vZX_yafm&{epsm8P! z;k-QMT6@Uxx@2~%U&mMCwrn<(-v|}{wM}D+E)O!1sPvfo6~gJL%32biVZD+QDsWv6 zA*SQS?>AnWm`eQ3tAlKo zOH^pOzH=*mLbaNNlTI*4TY8VYZDT^dzx9hI3aM^JiY(SI6ObT7)@gnI7cc40o*EfJ!)y-m+a!I^(t&wiPWLt@M4jFQ-Xe)U-yiEHcjko zh^+T@S+BG4W78w|Rq?VCSIfIPcv29>a(U9Mt2feYrQj%!mp7Vr6)e*`gp)Ykv5Oc(E^3al#@hoOsGA91}*c44TEU11oVfaO`{*wio=U z4Wjto`m4dnUzKx#=6v>q%RSVD?N$7!6C)M@>d=yVDS^9!2i-7h}F?XV%xeR5BU?neHmg7I zNKVm9*o6C%=)pTC=b7UBis7Dzd_JwZE4!m!&zIVQlz`YA3#GI&Ru>zuRAaT2R9#lp z%K#@~nx`7p0D@qvaWIju&Y0=K!zAkyb-QDt)(;ts*hNQ?gEv!oH9i8T{WK=)s`)Gm z!sCX{SwEJ$=r!~*Uj~xU$;5_8hRxTm z(xI*LTydOw7;gx?Dou<+bwiMs=b*W#Thb5{o>G@ zl}N@ANVz^ArsMi1R_s|#R&xV(#_N40n0nysQSv%%LB|Ra0~X{q7CR)4dr^O7#>{-l z9hF@G^-`g-ou*5Ct$88BAYpa1m+k}8XV_LLw$q$R4oqj|QO7ez*F>sdb}54|MR5oC zY?pS_a3<13@PEdFBa3GOb7DHozA_Ck>9~sYJ)uoFbl8py16|d*+LG{3MTk6F&ol($hbF8M`q9u#RUb%_-MQKJ9C=kr~rl%xUO zimc`_AVg%JPxNNmNo)_FwVc4oi*Y*G+p5Qrhi+g0t28O^+|6^col0%u_rTAxb&K!7 z6dMy436JIS4F}49C*1c}(uV);|E7AFWyruhsZd@^~9*L#^CR5y-%NyL|@7$+qs|%Hiamx z^%oQLM|quhsXFzBA8W>X-X%(M8s?vFXh&_xiTIl{G+4^qFbxPl4pF9peF*0CEadQE z=(t9;E6=!wF1^oJ<9pBKXJ^yEGnwwTVR31loefW5> zu-W#^+*^_Y!~O)MB`0(Ocsa`N8zsL1;x#4_p=ELv_AgEPyJ|G(G#@!c;w}~m{^es0 zbqfu3mWNwi;v z4M^lwlyhuiAyL_zn$oe^B|nM*a`A)VM*}OY@bwIL>wBsK?6$PEQb&47Bqr6kT= zLt*a90@cz9F_XBy2g){50OjXHBdrNn=a(+Npf3~^!Uz}e?by9O?4@r78$Jt)_A6rp z$wKc!jpKU6wS-2;l35WG7Dqu8f@Bt-W<&31X-~&>VtG9amAmPDF04>u28b-KR)Zmf zkGwOqprt)6ST3n;u6x71pD4(W5*&c}$MR1s+<%dptK%DwUM_sJcsxozd@#Zj@-=-SHX^l4WfRwhKY9F1vvq1sGgTn7NSp5zIeRWY7diQRho zu<>V%3T*m&i~!&|J3ej&x8?k3yI32YtA@JrEhcOYa{-W_@9~oF&qG)))Bjj5C?f+t z`ig+tGo45eOMDaNGsuFag1xsTaj#I1y^R8e<^<<8PH5`{gdX2N9*hLj@8c}(H^9`p zU2F4J6l|T`@8Irc^ZpXLG7X0zfP*9h&+Qwq=SVJq@Gn3w4_MUnpJmGU6JYx@>PF*c?)Be?hLiHq5N$3$N~~yyoqN(pxk!NTRf9 zj5D^p%xhd!NZ(NGb))B_k*3(?&DJhrmr2?G+4|h_d`5Z$uz4EfnEM;>(M%a5Q3cLN zif3d8Gfs2zV{20sEv`CEK0qLX%3amYj>wix{I1WTo(qSh-hE9-O2_@Oyd@;s@@I%= z(9WB<4|!b50axOz>dGB&btkn(GM!JN#-aQNs^sfRJ*`S(bEnF2+M|x0QAdgI78YH@ ztKZ~!HFj`6?zwvV7@*CUaf-Eau9PS8H3|&TXO?74^Q6jnpbp;a$>9(2DbbLXmzW2| z&{5ETasZFG)Y@Tb*eD#N4@dVV$iLQ05P4GplnG-&6}lRgXqZW!;EC6T>F@M8=d*rm z`5&uN*QfklnLWf`FD8m(tRfGW2xYj&ffL9U2<0{z>2oBn6Z6J&64@cBb z;Kq9Zg~-ots43u5}g@$Am15+1SIAl_h{E=EV;v#(UxoSJ<{~ zDOL25*NVbvv_oc*FxGGI0jPWS0 zr4_1bV+6!et1hP1i7)s+VTBd$rrL*W*V3FltjHvF2_N0gh>A-b-!E@LA`3oyCj9Kk zhA5MYMkb^S&6OOSJoC4bMY5no&P-q02qRZ2*q7Sh+w`tkha1C2d2T1irRS_P5q;Q5 z*aj{iLndKjWA^cWYJWD|wZLe*F(qWV<+1GO(STgiFhj1{!o$*ELY@p+{HX-oZuv~k zKq+n^pREn?>y{`;|Jm6b+b_u_U!)*JUU&8F+XSSpv_3(FNJ+!xE&%7vlsBHA0XcjN z=xid26|euN(YPl;>td159cbgj{9yNhX|lS}7y9qt7i9re42vbr_b#R2z3YI2$H?7D3tP)CziUjXER}d2Kj`pHHsZ6q8eo1y#IZ9}J zd6XH5mi=Swe4lhU0s>PRew)^l@LV1HL-Z^?^8K9*`G^`SWzSW`K@P4eavmE)_v2Sm zTnP`wD}<{O_8qKJF{~inPlzO_EOWH!a~~X?VZ9&XoF){72A?AeqzpA$mFPC!o*XHv z2hWIdq3TxyFm@(e#tDq)_DHNumkbodK(7^15$M9EpWq5q1+HJbPa~$q9JUL)tfV+L z(S8k5qE)Vm1OOy|3{+k6_ZgVQ=q|}R(In`($BK`@$3AY(q^+}4bM=MyyEO~JSYKGT zAH*#MVl`@(Ih3R`39bm*i_GTt8e(|ltvkgyPFj%E?Yv#>;Sx03$ECC11DEaf9V1I+ zJ)O(^HT$+wco6F0PIAD;b)63$!$^}QTFpyB>uGE?-FQl@Wi+4^tM{yEXMDRo=)zk~ zjHG4&4#~79R)Kxdx6yPwVjyf|0Ltx8vS>C(8nS$huzgm~@2{kLp%HWdj;Ah=ztJ$5g$ zE89p}yI>ObC2`3l3|m++NYf2)WuV;UHNlSH%TiEPiR3prfh?)%Jgs*F#sN81gz*#_QrX_&0zlBs{~B(o*6eSTfmd(ofLnXJvK> z_OC7M(>iCXLxpA6f%9VY5XvQ6VMZM04R>zr8F~BfzXooIW0iW9{g&RyUTLPgHJ&~7 zTP3>VW((rIeA2OP5My3jJYv{iztC!b#XFX#hAt>OU9$-t2p1C#Dp)o)bYMSwf)21D4p&5tjzLSR3rHS% zRW8B(lf(Onh1EzQlAIA2kD1Dx6|3uhCWmM@RpdIE0G!_&n-uxtOFS3BC-WeO<4_BZ zv~LUZYJ-z9O_m?y{TV?s%kbY#c;z6qSqTN;6=_yPTzRGafTpIM#;7*7S@U09qIqj(7Xp|_=-$zFuZS-)l7GokB z926Iw6|cNC(@!o*|6;V<`scTLf=#Z|pkJ$8+k%3XC38%IQN)r|uJ(HisoIQ*{uAl~MAdVt$C zaDQ>_Dv7)A9kQ&1uQqQ?!-%&W;iEqquB24n_2kiA4HQ>bjkqde!zV`s=90r z2mcgJQEm3tbj{~EYnl_guT#6n1k>;!Jt^>xM_bwvob z^^&2m;{}69MI_PvlC)bQoUODp$y_LH&zHjHvFrR@q!07a z)#ARd&`lrbW#~_TQ6T*^-_}TVtr=c<-@1QN9)G4DrdI6xlvku&ZEF1rNQXk34=+M_ z(dR+Id}=>^{2NfuHv~i<`LN&ne$&fbLH60NQnpV5QzYxxbwnCVw|~5eemq$$2z57P zoFIoFHO|_sPJALt%=vo|q`^C?m%*sjlJWB%3Q zxEDAQZvi@2Nj>`#uQefNj~1hLs27U|oz2}NIF2D|gS_XrW`KMM|}2)8P(`fA&p zR$yNw*&A#sk(o1A(`J^dkf*&=#-hLhRoNWF)_*f8R}vA+#FF%+(#TSra>4ln;bU z8U(Vj1zwN)A3B8IZp{%D92G1;mZ6Y1dS*jj*)+4#fPEg3v=Kl0%%G$%pV5pHHgtdatedcE^s~hAn{e*75Gd-0 zAL2_Mg$?aJ;LoROUYjUToR!MjFFe(9v!HY%uQ&4gwRqDwi)U4@vzd;m{cK1R`iXS! zAeiC>js|wB16$?+!9lDR5F`91F8&`c;JQ(AB~6pFigyWp1SJCa5``XG2k#unMHYI- zc#PB-Y>t}9A^h#j?L!ubL)4~uYVJU96M1o-sc~-(rG+vq9*3=?h57DnE;gkugyzzN zudXCZ(jnLj?3NE?l`oSS1ME8U$^`DaQ#7hx>R$85l{=)0=#F5sxewW}ejetK&6AA9 z^lI7u#+OC0eOvt1!Z`ej?!Bb8p&srXSN+#96y@e}vk=?8y5+-PTUF>V!tj%-hs3Kh z$aNuD!6pb_j&V>vMv=))y^1!f&_0B&?(-%B<`j8(#_4ZBROJceHKa9`r*@NBN?IHA zth)k@@gcw*IWv-(RAuRH7vw(C9A%LswkM;y;?v6-viIcqG33fVxWhol*?L}S!oHIt zqe-F`d=82m%WbnZQhJlXS6&}RQfj#}N5GKCZ-%vdfS3VZ*_P#)gk#}CSyjK)J%~JZ zvQV5pk6g<1EWR*kEwOWGn|FU=<5***i0c+?HkIF48MG!Ec_W znn1&u&P%*G$}4AtNgRb_stBBEguy@|M6!@@r~L*_zt5Jm9(G~B_$|wJPZ!_5S%2Q9 zYfc3)2``90Loh$O?G=WP4Aw~qjkz&_14kvMG<5`VVAU+uLPGaZo$iiEbJ{G(12UKd zY5r?Q_C?sP%%N^@QmvoXM(xbruI4M*_rUeqe(UN6U9-6Elj)S2P_9lZsJ4SA}xwBD0#r8?tPG*W^ zq_*13asZam`FaDG)7iyEhQEw9r)2;oXE3-EZws7Iw zvh-#`<1$^BavADG3Y*Xo;2 zA405n)u|JVA6HW~GfR%9!)|qM@a!AXB8bxp=xpL>Dbgx9iI)b%Q|G=Vv~7C(7`w@1 z6F*8nvQjLnPH+4yZf&XcL#ps+d3-gHd!Y1`#!IW(1;V(Peq!0qDKn82vPNrC( z2Ccr}5{X?e_Y*YfG{cbwP_H*TCsJunp=4fd!1@Qgh*+h;B3mm*e|^SK>|fbUHi;6Y zUEhxnKqion1WdE}p)Pp-yKeo>?2)Q;~m0*4v?oCZxfIhju# z^%8ccaCsmII|X}wV5-xaGe}IaA#!?riyHC=;F-7@# z3}I;ZPE%>Pi8MnU=yZCP&kIDQFxeDjq(kVg7>+};Pkq(D2bD5t(c4%hR4P8CX{`%c|+0(ip<$-xM!q9YWJ%S^dn%lknY- z4{D@F*_E^=b@aHRqy4EZrMy{T8OP>O7w3ZTNO(CG#UuRy==%lGff!Gjgg~{+ruwhTqqK>%zkETx)T6@D0vv zi7Qisu9 zfc2&>%h~w^=Z>H}zX5)mi6MA;KIxk~6RuESo#+wcL`t{q=Ez6M684@lxor+5>?RM% z{$%Mgm^$y=U@&Z1V{W$x$vZ-8A3**HP$JgqEj zLmKXrat1k0z%}bfS3$kd0C^}#DsPI(KO1^f^ur%LKQc1{yQ;ot)&*0``(=0O4{iGaJuq}G}akj7WVO3Q82?=RdZ*ew!s)pt0d&Xy!YyPQhjkO}e#@?UQ;!`C-n z=|(qV4q?k06v^DknYzk1fTXVrjYu_o_xRQ-qK!z(L6h+}59QgJtdLmn!gY?PHwbC&d8H?eNbv=DuS5}@SCLw4!JdCN=Xs00WjMh( z&la!zYkxLLV|iK5#*u#Hs6nV-A1aWxvCMM6Gni~$zvLFjA&^r_E}7b{K$wpaSHC6x zqCy3m$bQ!IPU{8xxf_^)E4aW5j49iba?@zLs(kiptBV-rk%bk^z#2KyI*%T-iek9S zDxym5MYO2Fb9X}AtI~+zzQLGY24yW`ZsFXnJ&guoWN(K}MQt-Ii$9neD^I@JK&(c% zLmSK9HVR8aWvp~JINj4Q+PA$OJb$yV%^6okgfN--c0mr#I1fdb{2789k4?*Od;Wrh zMmNeQJUPrpN_fEPNUy$Jpra!e7bc1+iU$J%lWF`j6GY>cDp7SmQ#%v6LFO`}q5B=)*rC+dx>{*=a%)nb7vEpJ7`*63SJwb68)W zbbH$U%ffF5)j z1fl1~&WgI|UNntP23}99i1%zjGka0ow{wD$%R+nU$E;c{EPt{;{bo zhCFVh?Yh&4+L}|_^$AfGVWgidv7{gS{425n(3D%5QoQf#_ou5&6dP5026k$hO7#G} zhxtl^NAfcExk#YE=bWqVe!#tVmG%cth83Yk!nr(;fJ_WvOH3JxF@|35ABBwyiHrCg z>hFMhk*?n$H8@JBU&rGKVgUmoTRQxKUn1sc`{{Vx;PwUrg52>6Jc$OiGDv+frym}G z$fs~<@9jDq+DE@FwP8&vcBtIFXYOKLDNKLWm}H%g`QA_b^I*wL7)DegaiZT%uSYX& z!V(!tNu%kt}|c)sTnN&|=07hoS9Gsg@krosL1dkYpP6(6li} zLrax1C>bFN)1UraWak^~gW+|MK3aD=T9{JdvPvhnm3*#F%NRCv6gvZ{oZAcVNu@FB zh9!MpO2u-aukN6bmjxM^M-9b0$7DX{Vl{zPj7U{@`k@O8O_B1Rp`&J=a8+H)E^~+6g9Dp@IBI8N|=F z@{6tH4g;U6iaY8&-7k0qmT&uE>Fz?u|DdSqU#a%`=jnf1;Qx9H{F@_Q|0)Z8DwOpW zYJj9z3OZ@cU6r`(5JGFm^n6naYYt#_N?lWshqS7VC>xnXfhDH?rCIT!+lc)j8KGhw z?jMV8`AD9VWex$>B$=#aeP2p{)Ls#^`rDCjT38QKZQ@MpsT=(CGwm19I$`^|{{lcc z*{rH-UDM|G>e!sXQdle`)-eNmm9Z!~om*bN#NfsSxrWKHeC%^exmhdR^tS(V^RX`@|4csW3$`fPy*EP!8#-N7`+w z03=YyumpDt6GW5H3e_cOB-oc)ePS(|s<3NlZEa~uqYKO`MlEmxP>Am@xBoC=iyw6* z+Z}m<6h|~JBT*mULVN7j8znE0C})C7q5GAPNUMpKc!*qK?LPfLOgPZq-Q{%A=qWRA zn8DXh{9>h%Z}>pw8=@vc()XmSUootsWFB-bYld&aQQB>9JB0yq;|ft0z|zrv%ZGUU2o0canVApIkYE zH-23UH#qJv4`#B6UEwBU!a%63Fv_v41>X$~RuS2h3t)+0&*mF|M___SK$oXDsBPo$$w!{u+IgrPyTd5dt~f2BT!5N{O);PE~5R z)$C-6U#nj8B-n!Rs{wza6z0$*I#H5*!2@@|CFig|CImu%Go zykYUiUPZTB*E@Q($yZ3R7I;SrEWDZ4XLq`R&Ujwhrh8|iY>5_Y2n-UB0kI+2!4Ol$ z>|-1;)By}xsvSyoht9&c)%M!UN^pZ&p@3Ko&;B6c(nW=7Ai)C|hfRhd~@$Z%*&j z@T+&NxJp}b{Ne!=84eAG?*#TAzH_czjY~pXGKA#XE8LNn@V&22{&n-t+1`3SIW}Tv3$QFN{c<5Pu?^dkrG4AMpwh0b#3T z!QTCY{bHIXu+cgh@J5gj3bZxJNU*i*S`r#t^58u&NS*E5NF?$oINL@TH+46Tt=mAN zd|O`^Ud!_#=ypeJ)#`p0+rE6o$f2|(s7Ra3ZLF_W-!48#0iqQO!U9tchb=T4iS8Y7 zYV8Xfi&Von^}Z<3c2D&)ilmYXQ@KMKrQk!gZZnCmS73A#UAtzkHe7`#s92cEbH;ig zyUdn3pm+R6Fk$HO9twU7-TGHKxBu8t?|<&V;t$*F@{@~9=`;x3OtAg56B0AO{~(M% zWT~X>6h)*PxJc?OgMhgtqkoY}F)&Rrlff9y*9%c}wiM`A64p-iJjjN`RCSX>5>+sa z%U3h6B|2kSC3fZ3j`%|C7Mav}w-J-3y%|pl64sfF)ao0jDk^P5DS6)P&iRVLja$rK zRz#~bgLtSVTj|K#%fi2z_LVt1_XCWUYT5VJ0Mnx^0i$<`(0m)Wp$Y1pWTY9T7-kvGv z`^*l|E;R0sI%&!g^esKUt_#y~^sY}h`gvWyc6USVKKSSm?VwnjJf|EMGyj}CJicM{ zwrT;+=>)A|ipeE7*Q$8KK=S3)-gYaAF{5*dZo=o^fRvA*xSoXDX_4d}-aD1==6ELt z9egv}DH{(Gv<*uv&9t{e#FXJ4z?++t_#uM!k%<8gbVSS6n#(elJ~iFX68+63E_R^? z7U_yT-l)B{6?-jitxuPSE2hU%VvSY&_(_OvsLKbVF1NltW(%ho<&o(^zUD~8wpTk4 z(Fb&m^ot+$^(neO-+4+G<2)xUi$W<@5y+gRh4v=Pe%<;!|I&8UH&3*&`r@r!AGRN; ztFkCu!T-XUejp3|P!vmAVp@dH17Ff#O{n6a+}`Pgt|5uS$+iAScxR!#RSmB)%YkFe z1yPY+^2pro3cTCsuhe;BZg*e`wq_s%uCH!eklf{*Smi(~ruGB=jDr7)1^%;#ZvT%* zSlwpl>2JT*8tJ6M76z9kQnr)G(vX)^(}X~E>Nidhw_fT(GvPuqN0y09WgR}w(6%9; zz8%6{)Rg$zH0r1|=~w!W3w((NK6atyEuz{l2TxId*3hl1VT$KpJCluI2zt5Vf)G7Z zOjZ}%D4n3TRH0>1SwNiOp+jvV(Mvb!)q5SO)el(MV0Ld`31&qXkY@LG?KJyVl1pV^!UNT<+b34g?T&{ zI9vDZ_Hdj1fXtMsJB_ka9u+ii2m4n8n$;>1QZb1YC~xAFo2@*rTW!8nPEEuGuv{7Z zUGDT>c~Jgw5;}k0;QtQ`R1Ce7E$9ikCgxl>0v*jMt={8JQ1<5uaz%>OQDN@s&_U~W zPb3C!+#@eyIzCN_^QxXvfLe~fTst$SEQaN82wCh);>nG`uj8h5?_)AF7FJdj^VbB$ z#1GDmp;oxQ@Dsiioo#DrQnFdlB;;~~+gB3bM;E1a*;)=2>VmI*6kbc6bjjXcw@y?o z)YTvZ1YVeUn^Y#I55U*9NqdVIM%5#h`Y!1E5LM*hn2g(-gI$zUg9-;MqSjp`$QQ@U z_znVwoKD($@#s7!1VRdJcPkb?774?VzGB;G_YhI|G^^9;c3%5BKhVNMU7UBJh5ZjU zVd)lWtHvnqM<9^JCw9r3U}D%2V{Rp_tE;=u$9I7utHj@Sdx;x_q(xLZP)tCPw`M}~ z{FBP@HK>nRF^dN+z)S4hb5uY@h@5-tz59Cw0`i?tUc=??IFQ#&;hIw)I#bR1+jcY> zT2CRfbCqwjQOvl}xObI^_l_oOT-iV#;XcIu9->j(-=;a=QBzlE(KN>le9h1Y*~O{5 zBsZ>65(%|^Qf+Dsr%L#2fJaqMvBQ`%nMJVKK&VwA>%ZT8_!e@OwV}i^jBf{PlL(X# zmVJ_gH8JGXYnmx|5TQF&p|)wl#cX0M-Qh>dTtC+VQT`ItP}tG0-fIkq&83OPIAh_Fr*v|I;0&27Evrud^0LYj4$?QxglQKe&UNITu`5^G&sd){DVQ3$ z*vd42114V}0&@CVVS-pIb%!569Ql0U3U)1SccUPVy7c=%NpFp0b~BF>ev)$mZOOrN z6tOZcClx*hN>KP2b5hFAUKZ^O5~lJ3%G+mSx*K*D^C}k&rjNbm9_KEvo5JNydfz)g z3iJEQocTn%yA!6pP3q1`;bnPGbZ^I)bfQ;L3I1MylNT2YcXdDGEZP?koadGDqqdLs zw6&{SA`Q7Ez1^t(|HK&o&%yq4<-ddlw6taOxpi&myaAU*pX4pee&~Ax&RKblW)Jz8 z5>8AS71ahOY%<6lgpX+(0nK8U2w=?_?`TyTKF1=b&gIv{R$6-`?b&s9*BQ*{RHFJ^ zJYgFhs>wVrH1bO`3=$2-9S#wPDi*)@MO|T&UbA;;5PUvRG?! z7H=bjz|8*DnSs6&=xc8rEyuctV%Io%S@_dZ{2*-{0w__=ql5Jfb0x!{9GpEZicnX( znVHlzKH0XSL}EaqAZ$k;dBsrof-W&2eQkL`!OkkhQc2?Kn1p@F_g=hkL@uh$>co3+ zb?1zr9U+ohp17O*VCJ&*OY4;L4yZ;?`le+k@5$jx3Ap<$O4&N0phQa!tGL7>-ab)2 zv0v-8p6raiH3@UIQIAWwd8C#05E^&76XEG1wLXpF^tlYkw$1d!2xr6`Pr_6sWo2N8E<7}ZC&dhYptiQ_sLpSVUM+h^|0y{Yc?DA$LFCGj1A z!v(^LThh^vF(0HeV+x+d-E~gLIkq%k3}bQmjd}*tHbZ7la!vS$$8rZC{S%Ly<~akE zEMEfnxa2LGqi+t9ld^SNW5rn+4Cs{7)RB~kz{`@~(>?4hoac!IYcBS;-Q_`Osh*UyMUr){16k`NG?LGBVJEqN&o{m2y?oN%9TTq zMa@lJ%!{0L3J0nP>l{AVMPTlMdIgS`286{sw#e(rdBM7g!`gq~?;Cpmmw3_sp2-;| zNUr|mlJR_)^T7VsU4$U#-vA0&Z@DJPWMAm7vjgmhusgl~WuuDLbp7A+?SH|I{NIn! zm)RIPlUkalON(^aD-_-ET8{$6Yg`=V#IfC>v>v$y%GIp4v%d!-w{@6I^1ggo^s1WZ z=uOJ{%3PTW&1=*<-3hCaqB`m_j*ce#0i(Pkvb7E=L6!Nutt`Z(J~2!KjY!i8YS+^lObZk-fanXE?2Ax zi!}B0>O72ZHars24L2m$@H^8L!XKM9sNw{LiLS{*k}c%&m+n?2W~73jJ@0h{HHnXG zKEA~GtVd^!0RrOsd6;1z_?#~f)UnB8ekJ9Q4ZSq7% z1z4BJMCTUC?WU&G^TV;FcAJ7X)2^fHK zN#X!HH_roT3u{Bv(S#h;ick+cT)8P#dQYz)@W3MA!&T3vo9tB8ADligJcOiW^JeZ;OlcU&B& zBiwjrJ6L)3PoJ~Ehk_F+ez$r+mac#x7UrXGITV(gu^)9Y)#Ynvd`O$3d4izYVR6b^ zkFsm7ml;J>DBl3<8j>iZXz5+r{f)f-NEcxxxD^r{YvJySnidEmFr>U38c{mI!Gh{ zME$U|>Unjb|4e+=>PV7X(*jX6)Oh0rQXC}i9M+XH3cxW)$Br5+EBGLm)t)fhIJ;-QC>@jYDvE3GOa=nE5a@Q}x{Q z)YN;=J2UtEKXuhPr@DKu)4k7Hdtd8!h5qc3sflCoEda^IpS(C7hK0JH?5r?y|LEHm z>)f1u-8v}B6gpf5W2Jk8(&w+O#s;fGC{FF$Lb&cf27&|FC%iCe;Ma;=1=!;j=R#D) zf#U4)1QfqQ=t;tJ)ii@hL`S5Fqp^!9u?Ub%q)maw22;Z~V*6tw!Tfa>M(i$i0~D2h z>>RffOI{pG<{DiBm4eas#<#)0n{0_9GQ*tOHmOO#vdgtsa2=acN$Y9)x()IrGMd?+ z=i5@5*}z`AWO7iTY=KIDX1No@rVSiYxF1{gvsE;Qsm6r|ALeg6Q=39nV69bear46r zNq;JGE>xgUl_M>Oph9}GOc9#lV5jdD@fvznl0fs$7U3X4trEokUQ&9fyDy=h+F*+_HJiKzuke z-n;Ot61T|`1eKmF0-Rs}^cpKA$;8B0&6W0!OBPEraWFw@!hH)OYvd{SSEf{WYThoCv7XM$mw6wtb?fL*;mYsqj7|7J2Nl zm~F2(9)TSza|>0Q&3diT{t7#w7^t* zx+}7nHp1ejXnenMujlXSf?*!j^$F}I(PKXlO`U=Rj7OV{Q9RQf(B&!b{QYWk@nYOr z{iNH4g&5NE7WLovZ3{}etdPy-{mON!83|SZDQqH6`OH-GvQYj39D3KKO}U8mQ&?yr ze%cOm8b9~EE~;aywo4*rrLqQGp6Cg2oI3~&6)q}4`ZOd4%hC#<1eXFCgN^N9DGO(r zCYohhUCPDbF-m{?W(AL?BrD%UJT{hytE4gn_JFr6xthqnxmjg}aV4NbS#=Yg0*bhn zf3L7kTz$xt1(}7NO@Sh+0S+Rtn3-DgwlI~#3B_e-V!gm!;_ z&tD=lJXA@cA_5Hd^KsEzAv;|ZUzeV^Qq2(j)(o^98fATTg+DfvUthQQ`BTa*SmvcR zClY77b*p_6u^crW&U_mE(RH=Sc?8!5)uXvt#I>eN5XC$Jz>gso_^K_=!QfMFa0P1? z5~WYu$`d@U+`p*kED8z1XB({B5Ws+CRHu71xJ#3m1%c&*+ z-g55+GFqB&;tXTGb6tXRkdW#S24KYXz9EdFdYp5P8O)-Y+R}%};I)`xDt(~Mg^df1 zEdmV6?$!evzII~_cR_v~ebt*2M_BA(f_Klf2fAUFbiA;Mip5)F z+2Eya`_|?8p?>GpAA7f&ipE;n!K*Jqs7Ro#ipWyr;aJ9xW2&YjE%HGrS^jTny>a*2 zC`EGhlS5{9m|0<3uae-fL}7Zr+er=*A^V65$s*U!s#bk~Iq8Ci7hJYtqYK@47%(d4 zCB4)tbD>$UOz08chKdG$jN3ahiqLag{A+^!+Y$KN>l%MYv#P)S@&EDqTXBIy=VX0R zx|svyn7h@-7jR!fsQhleZ9c^%l+G#Y#TDa9ZKe?)p9E+EKtPn!)6@G~dC_0T|BArB zi9qQpCf9-$ciE?wBT9}M2V-BKO)i`PC(w?-F$}+$Z_=PV&UMD8_4F%frTzA%&vxVy zZ%Wv^nl9M5-&C zU2daSQ%8)qQr4003Xa%Cx1vBUt_D)zz(aBn(Rk|i)RubD8pj)MPHhh9 z=}_UgN@%3@hhYLJsQhY6~c&B%5=YGX(Otl%* zh012Drw2wk1$tJjdoJxU4z`ZrC>RC9G=Xba&NhdW*1r}W%y2&G8kRDv*=Km7 z1`ETDd>m4duQz9FReP0I29~bEwkABvFCC2q>0+3dczaLZH|c%US?M>MH;bk|tNJOK=UJ#7FtC@y5qzrE6W8lP z`^}n$89U45Ihhh>U2QPpna|orK~aa2`lgC7jA zNkar(CXxbJgGZ%C@uOI#rg;)w1Km&b?w|JOtZuMQ-fqj$2^grC_jxf&4LERJy)pkMV%?Y!{U1QtWWaL#AArS+%TU2{gW?~6yYdsX z<-pg!Y2V@Z4cA3%&wz!KTuzZ@&pgZ|YU_iyIzq0XpQIecevqxx0)bqBSjEgybC3F%Ud(QELg|j0N^!|YiCsS zMkwWlSm0MJgA0aH4)$)%&Pq>?#CM*$ws+6UIXyST{YHvW6zBFmHfJYKE5ynO5BIFC zZBx%O!j(b`7#25=JP0r$9F=Hg_h*_8<^E&1cqa|Y4&E>Dvs%{_@|UeW*zi2^bN{jJ zFf&(~+CFk4tD9&W$|`f%lP7Z^Sei#- zM29~ZD_W*M_Rdy2vN_8AvqrNr&z&bc7Q4UkY>Zx--De$@Y+S=8(z@Jd(y0}sW7B$T zss#89W_IDXy|}ZKywn(jSVGkHUr`Q~ZDMRq(hA^;U~(fEN|;NMx(<6co&GLMG1zb232=BK?e8dXh0?Wa`V%+A-Ti)UkcF}peQeR z^kK5@8qPMY3z~_C4=nxLJ z`Lfl1dJ;*~eA+gXLSRw!Mu-a4gKY#UvJ!>aSK+F zC*ZGkQvH4bZ^6MD_`+&%^0rHX*hf{UUW4(IyaLY&IPIAeN|?Vy%!0tv($Z(XH_55) zv#b(T!;2dE&IR?(Ugax0#*_=XRT)euX1_VK^8%RZd@XI_BM&0p=Eeu|QqLl~lo}f% z<64JMooWD>E+aguOQ9wP_-*7Z`lA4x|h3N=QwbWDz*re zl63C6d-lC_5^$=uUDbv^Xi@6(4A6mvGM>C$m8js?-p6$j;3Sq5|sVdRgU=%SDNJ3nHBPxu|RQThT1vr6|D6d)r7`*M5RYhf02 zJLGFomYKXGD)HSO#-`9Khce3=Fl_<}G1QIp8<@`}J%P7|LOy~D7~F#xf2++!_tk-P z;6!K^`#Mg_ki;OZ(;cqivSbDK%ssz3FX78@^#ZR9E^DA2$t!pl7>SG8f{`2zm|avn z6pn-e_VrQLm*vQA3rJIg6RPro2vIa-HpcsVG|f|B)h)o10s> z-W0s)5y$C##A6R8A~R3}0xfOfV#F09JwGpR%R%hdj!c}?CX3}FcQ%0f*35K{aT0k+ zF(&k-UD!3m0CpSR9qsp-64Z~NkYBI zFfv^r!%jnKPmW$`HAURgItH=jS)nh-hLwOgaSMeN?Kh$vrk0TLkQK<+pt?dbrCKZ20Yj`gqjENaJGg24OwdfwTVUZdcI7 zsAZkR_u$@kfKiNh-5&Dhx93{qLQUg?ghX7JdWccndYj1sF5|frmd>JW(D>PuCf6xi zY;)qm1viw)v!Q=JLn#SXC&l?&qUG7nlHW)3)7lfKv0o0m-Ezx7|KdNNb<=ciCj5}y zy15ZKbbWwJ|Byi++|F0;MFs`3XpJQiz=pu>S3S3nvezwAz=sOPz?Hif1>x}NbaX;p zx4}O2g=McMq;vi@>V_O_D}>T$nsHNwrwTLPTPeMD>8>i4_9}c6+k7k|(+`N*xZ=tw zxkdVkGx8KY?qeYZ<*CP->vPiHo_yAV(hZt9;_sB$q@!8k?(8i1{i3WX)%SNW(59fd znrc81nB=Uc{g@==>yOw_r_RpL`q@DLutP zo&0OyLi4WkN_FwHb!Y!}>=i=2YruP>#r?N8iONhZWGg_Iprbv#XWFxow&U;i zwTX?9*7%6dEsm*MaOrKrs8h4r`=>`=Dowq3U^qDyp4CKU0$l~1zXlKC*bEzuZh7yu zaTh`l=IcrfhG!1DB5`S)86qHJDJX0Pmm|(cUam9w2?= z?a|9MK8+T4C|{8#jt`S~-w)Sn8K~rSwbNk#nao17(GkV`9Sj0&9Ir$A1E|dYs@*K% zCC(6GjN0#sm;p;J!3)W)PF-5wk7mvQp@zPGJj}F&e-iF5eknNFs1oCXqMSvDWc`kh}qVqk3&IY`yexgAw;i#!?YDtbZJsg9x z4=m=I^ZS(=1VVT!$Z&gzYv-W5Z!MSUJW1y*SYGIIunu4J?Twxn z_Bbu?Us;s(`m`&%y;RBb&Hr_kk?56RQ|_c4%@oM@hYUVZQc5OH@*yTl!&`>sr$<6x zQuD2gPR;$7q+YEDw}GjZC_E5pKIQg@n{ZM|Wwik2cQu5SCq=KZ@QO*bX(P{HvGyoo zynA~0c2fKJT7&Fs>msV-Qz+PDA2D^B1WdHEtl9OH^F*%|!`=nGw3axdw|NjcycnH* z+y<@HYBs`FmSogd0qMZJu!#@hjv@YhQ|{hqDHd+mDLy9E*EA7}mxy_G(~%SZNHhH3 ze=Pq)ni~JTH12M~U7D5$!AD-7ME9KbL$%rk^!-zGiVK7X*RA6VnQ4x}WgTJBUnrR5P z=Q2cQs>n$&+DDc974(JLjrf_1y1DcnS;7b3>h(zQtMVCuTiUBPgLj{I$WlnE`C@i8 zdy{H;E)q!1pqzzEHhX4{p2AvF~lh$Qy8mfXE-bD5a|4N}1 z^P&ceT<_g!v4nyuj@WKquL4LkM2oUnM7dhchx zB-f{rtkw9;;clfO4Mpuv9;gxIpjY;1wQ*>ZsaDHp+1;p=?na09x$J5E^Mb|uI@!(> zCHJ|gxlsfst&*7fGnxnEthj`14?rKbBl4m53@TsSK$BW8_UFFN4<)1m)2urtfTgU17Pm1NE+>WCM!K}-B{pP%=kX?4l^hWdVadHf07_KV=VM_V{$ zTcTyI3My+#OrGenvEg~DDDEu41$g-Y*NUk-Nh0)xHKI9{k#$)jL_EDVbr>v_&r9)2 z@1tF+JMW3~H?EeHWl|y^s}&?Op6^ue6FN~c9G>fR?Io7FDJ9rKi345YG<=;wi3;JJ z!{n_+d3E3XY7hI7P|hLzGiafv zbidqWcN(;GHfZVJnuh^)5zk9g-Z6pfALoe+?T%1%Cx%t)gEI6odY*94IGTYF zOcgX30q;#26L*)#LIgVYDH8XQC8Gz+I=GRGIPt8Hb#SpNR?wD3{wGD2vNJEL!&F?9DH*vLwZ#rMoLUXkEtT7s$C2KRxdf0Ju*%)z42D!+Sr zO_r3r|4Ve|CVAYG?bu}J!UeL6Rnc}-poMY9oXg{xhGb~hSU-XwjH{pS%T*ZG=Vfh^ zO?H-%%bgmQ+9PGj56s!n#Zdx$hqjPI8+uA8s>2%(pQ~bSckX*#RVU(DuMn!8gP1&T z?O?*4n;xh7+dIe@|YT zfH^Skaaa%_h~WSBV>1fM(UrB$)Ymn}IF9`i`#x^6V2!8j14dqTZfa~J*SxiR48qGk zq1#uta4K-l-V-qRx&PPN6g6bJJe8gnW6S>CqTt!y{S0c&qJ!Jl;bTlNTk=rtX=rHK z{Za&to77TVJuA9h`V~SE+&6mo*hb#B>}~j$#Z|fAXyqzVZmRh-SFv){tR{MY6KqjX z;O`F8ZU5A-nYmIV9C!@3Blg=Rl~oJCkP0C5>n-xY2pXphTOyD`vn5lT;QGM%*^H(% zpJj!wR8k+{5|ZwDg%eWPy6{{gc&WlD&x}m4M*M2FLr$lAQLG9|M*>W(nqYA&N;@)r zg}}{E{6To8MftT-va7UW?CXi9SQ-9QG`35eoZMB4fSLpboh{<4@{^u>`QO)ypb5s5 z3@;O;U4YqT`Aj`f>wo>@@c)rv{FVFtl{)^9ivTb=9>&M%9aqD`@~(0i+sow1CDmu6 zIOP_;h24A8ou`!U>@zg6Z>lx*<0SKHELpkB?M9tjv<*zMLAN4rfp{H9N;jsm=yN&` zHs@D@y%Rk`XpwbZXZ7+xEn9aWyZ&!_*Jz_5-8ybu_7fHNN~$-?jd@|_Y1~rI{10k- z%?ag2pwG4O&LLs&CPg0Y;QBoA&!88_@MZSvYIz)!y*9l_=U>cGtK0sA2ZyZ zL9k@Du}0E_uU%)E;48LVY{Qe|Wy99`!GAN3eg;R+{MIMaq zWE3hdFR7ZSIhoyt3o+_Ugx?%uel?c&X)J55?aYkVa3mgss$#az#e?s2teatsZ&m+M zJi>Zv`pT-@cAYZHZ}0XZ!7aU3C(Mf%qtFtwsjIsH!X}!uS_rPJrpUt}(&i&W+s)U@ zXt8_!@H+erlnX^dzErcKgP(?k++hM@ankmbGBe>+2n4K)(hB!aHp-L}TYy}W5Hk8* z96M`ogxa`Vv~v4#^7D*$wXS0oGqoAr_f(7w_pa1p9}A4qpF5)QfEM0}J#*elk2+eFR@@Izze@*B)NUeAmPNPVe(BA_zXHB^bZYGvIS^(Ih62cJ zew!pPlY^~B)g)01rZD4lnpZdJf)sdDL_z8k%E7e@e zC@(vv0oHMOVDac@U3`aOx3XxV?!mrgs_HOKNtLYcsIh{|-pSO_7+xl3`2A)4@<7`U zP<~G})?z$zXX&OB)gk*l9fgH2l?(cV-lK!XE>~B%KvX+(hRA@nKn7m&$-^z%0m_Cj zrSpWh0elzH9x%PIY*HfOOMbs4PCJPx!7+#K8fGqJn!$qprlXn0Xycy^iCx(*>ISK= z9&eD*q?`T#-uBi_)+v70f}r?(%m2JEVNyBPLUy1+tv2dhJnAy=2XHW>*#^xj>Nmpw z+-N#y*(LB0uLWuqEb1!e%FRM;>AVp5RlD0nJt3FRA49+FvHLLXuZ(gaY-9ew8<6t! z`7Gol`C0}uCz#WHWiS)>iswLoA4vsy0ItlNQ*Pibuzp7q&`@+$y4WVvqZGEO1|H|E zl$r>I-z0zE`J17rx-KYr{S(m?F7a7L9{YJf_57GqgGU09(5eCIMr-|ndfS;$i7n1$ z$wfbvVqDB}7`fBZGi?oRvVI~#54R+V@Y@UiHn9uCwoqmQ-K2Uc9FfKN$v=Qu!>f)j zW|iaHwe_(>j-%A3 zKN4o37ai4O)NBE3G$loFsfjbX2+)6H2Sw*bX+P4F;N(_or1w-`LSRQzieGG#_Zr|< z{@!Tvh@#df1O3h!sR=so?1=m@bhM91CfDEGP}_{4&pklqh2RonKselfQESgRl)G5c zom(T))oyS_)p#Gk&~Pk#uP$BztjwFcG=UoXmHe2lPG_>Wy8rcSxwx@BVS{l6y^*M7 z1qFyqZ~M}b0_KH}+Y@l+Ah*1)XkS~;ua@dTPTw^uMPQs?@!$f94-HRrG zF*C}qSwdB*;ofIc&^G0?J*P;|DDSdCZ2&HQN9tIAn6&lgrgZKKp&=DZ z7j}j^ZHzR(58>NqH{zaOiZyd*G$-LW8uPxqr3K%QSVY7mGMxHEE=5W-2J#V?L>}T> z$c(b{MP9s{Xmu{eW-RTmMqRITYDAaYwvdc8O=)-|DEWE8(8zcXd{(MD=s_tX%h2o8 zh-!(`u@(-hsQtFcq^W1C5ho!zz(W;Z`{0|aF}XkTI+qbwC)Ia#%%4Uf z*a31xx-8){{&KuIK@KG_OUk23675@6Qr2s}w`;(F2~dArAB>OE6h_-~wkC9HfJ%C> zp|y~or*OmYw=(8cV~o34-wP0EsTTi6!;^xL*H3K}hs$_9wr zlN3t(&*{+kdrFu8E59MmZU*)rz|3C9@yn!GJ^OFO-T%7J{r}p+{x1hL>X_vJD@XAk zmy7@PZU0+DfGt;ooWB_Q8@bQAbUzot1bHuhCen}Z->;HW8b4Rlp1sTEmcl9@EU@MI z{mx8zgI@+T{Kniuy)M-i0b2JxOr>)RGuHN#j0I`|c~pq}OM3WE^6GCJFAmA;2Rd6& z+GJf>%sw;@aBg}}+}#wOHqb6Gt}#8h`f3?Og1byAGh8hrBY{W&Q%@d@>En>2jMdGi z0_dVZg)_(oYsd_om~nCD__oe%j=mqcSfZZbN9T$>z*!RPmbnPgoGF#MHNB~usvqx@ z{{F2`6cFs^l6s-k*p{%pOjMo!O7+NqJG^d0i3wzt-3WmXtqgB{EvQ^>`>U3}siM}F zza(^#u?pc+l*_^GRhbM)ZV*WrG6C0uH80nv+R*p zQUZ1jMlmmS`Wvs|3^Y)RItBAOuH;2yd@D;nThudGJPBlUJ1d?0o3NKYPdR}v@eN+? z;!5YWPy2nn4!VNt3y?IO^K`ne&faDQt=*!M0dw)y_sy)Ujj~m}+Wjzg_O=-B`7^1| zk0ozf^w>`VF)+b!aNy@wjdtYIqrMPp$5~LmZE%wW?^^_?u?kvf_k7u}hPb8{v}w9f zSKZogUeQDJP*e6|XKd8x*69gJVmjC>7llsz2w$l3&79a~4%F z49CN$0rH|Is|)HLp?u6t1$Dv4zzQ}Em~sif&I>G-&{IY`wp%}x`jw=m&&X_EIQmvQ zrBCCA`uMq^Cj^+$wDe{Y5anciQUoJqrz#WMKtf{oSw*i%uQ1|f@!j$FLbT%LE5Qdx z^p9#-+a))n$`vg3>q2wT`S+4U{7) zDAO;+9l9lB=kVWL4BV7^=r-w=!#l;-3xwif$ln}b__pNH6Fb2Bd~#+G2PPh0%48w3D8D4ePRK5hvv z6|Eyd+18aR(bt>}uE1v3wPn8cTa^c4@EXqBWi&tAnsHyN`{;zl75(fnP}bMP*m7i; z?`HB>9jtw1;?!5T;;(-D%sk~Sk45Hz>2WAMd7Yd3CRY(V$jUy6XD-?{-&xbxesVlK ztST23lp)w#ok>(s zc6UBftIFPxrg;HbuWTH70&T{5&p8nOa{^k16Ic7u*KNV@jixc|gOhxGsv0PV&|&`< zggd_{hS_MdDXtVn7WLp2?K;TPd&1W%`sqsktu8Oto!zn%^qR6_ZHPQv@f*3V(@d8S zx1ZuNJw-ocrbT)G0Vw$u8zXztFq6w48PThJ9n^1l6?oiJVKi0m{_49dD^)u7&HYq5 z@oBLl*7Ck;e}TIi?i%aWr`gl;=am&AoiAFaFjTW_sVtGlH&(^Ps0^W`f!b!MY%fjE z!JN^El9B-Q?nXhI1$jd?w?sN0JgqClW4!<}%71alPSL*Yy80SM7LGrrlm1yVB zf2Pj~mT<^^YzAc~W|&6IdlQ>EG#+JiXVtXXdWBR+AaP z{7HbnhQq4}bb~XAzLBy5^O>`_A+I-ig%bR$S9|Xz0uGq3Q8#Ib$kKU=2a5Fg?RC9w z5K2~z7U%k?o8pI}(pn-RuWqOcHq{PH46G905r#0&zu&p}g z^sd(5qPGakb?FfBWKBC&nptVP+Z1zH%&}7fQon@hxyXGZh^?GIP=Mag06E+*3g$b< zexOOOgX^}R>6?V8XC9wED7t^cP$le>r$ra=Hov+lO{ss-c*t7XBmrz`zuG_^Z7RN7 z@Pw|zo4{-EC!Sv3+>|M!dQz!;Q?2djpoe#2{Vtu_L=}tvNnKAzhDMgUJJ1Dum6hqK zcFw=(Gt^y0ZAXoqTJ>^4p5%u9Btn~a++1TMJFjS<(zt9fVXBP7)`A%6i%@v3X_lF> zoH@sk(u-+Lm@zI2u20Fcv<$D8u+)2`vY{~{Zd()gAwmp!51bJ#Z^>BMYE8suunF?y z8rJvxs-9s0His(6e;^exRfpfif3BO{Pu7H{xvKGx#r{fx z)?k(?zaD&Liy6lK4y$T`zg)@be)^r|&x<|Ci=nk0Mt2AY-}aBV_&`q*8)UoE1q%q% zXd}UrC*7WbaO8P8Ec0RaHCEE1k8Yj_qQ<3s?I96^$W~w>@ zBl`BTa8a!Rvjs$6*bnbkdc@t!=U?DFb3OWO0v|XO-Lp`I7|uljl)VnQu-s-~6$-AB zD)J<5NSUoicw;jmU%Jozu>xgyxM^t^0f{t;qzOBw&~OA!Y66Q7f`BHBHl@y3_w4|i z9(9$1ccI5Ie{pqbkzQx(&Lx%9s=RMfBTSK*Xed!eJR~)JZ8K)DVXZCtFsCsID%g{n z{ye2|ojB7_+_zv6@1VjL)14cPf9C?uJU%E6 z{6c^L`f2fQzr_fp1ME~;%;wfkXVSyBBsiu^=w;^WNrLVqi2HQR^~5UP~4e=94{ zS#Lf%4L9djjGY%fDpnZS2;tSY{7Tl4-8tU| z_7sr0#!bF7wQkCLx$5=HV>0Bf$2h4U@TH9wWb4c3+~pi$-8nRbssC06`Ai!l?l%ys zHY%Fld`#gka<^od|IL3XoTokM29Gwt_=TCB+j}aRnh+d;Jyv3XSUcZ{@pF*)iQ{E- zWeykz5ZOFY=1}J-y4VsW1m8*onXcddb!lFCJ*Xqj)L0^i?uGm4S;csSyi;_toGFD{ z!5*gOu=`dw#EU{-wsJ$s-JVh-#;W2Iu$BSq?iQei>>OowbkpT!d5CO% zNA*bCa`&U!cfdQ)tx`rl%CPz%Fi8rIcgP0r#|7g9Zy--+g@SouPq78zyXA9 zg1sLH3BA-P&i4BVT%Jeym)@*slI50#}Na8)>=0dBw*e#QbTVG6BHlJJ6@+(Oci zsAa#tZxQaf+UpVhrivOS+#+9T>>BfK%UT)Z7<$)p4Eq2pBKfIN^M!3 zQJzcTC+~|G(r+_ojNMzjF)=kU>x)5?X9p*;g?AY?_II$;Ct&E=*9CA3c`C1xK!#%D z(e2FlkMaf9GxDA^wgt*;n{-x^V}t0vYns4D(+0K^ZFbhBp;Q2{_ViMl%?di;E*r}t z#;WN9cdG@?*&suEQ}1)cGS;jDkPQDg&gbkD_Zuk1e<1rw+REQz6T}gv$%0{QjhpEB z?SQ3rsq&3T)cdIW^bWZL*JZKA7dcIesf4-T43XRe=0Yt+I`8Hx(d&-E`Z7BC6mux2 zs}`BttS!1JeVokzJZfCedDlEq*wwbq>Q1Z>9{p~BBfJ$R(@=oNYy=L3^Cx*hIwQ16F z7r4hd!_1kbHoZpN*nq*af)4jcaSJ%jiFRbaBjFZ1FAL z4B7WOnmjM_!{WG>Ma}e5@O(G9Qc!Hf@O8k}Etp1LF*k-uL~VYzpq`<OXYD*$Suak(`|IQiXP8Z)rSxRCEqpn`BuYWo8<8K=)(}Zfo#*>uQHQM3oP`*WecAce2tz1Gh(Mi>`=a8`I6uf?h#lSs zoYAShJ+L!N!KUGz0ejJ!5jh>i7Z)%6y{5#vD*NC~pc(ngP37Sj)(6I3p(`^yUsUQR+hiG~^&Fz?GN)?w1>Glx%YOb+<}4C;%@0m<(Mb zQKrk2la^ZqP2><87JDZfNvJDV+Bz_$2t*sbyn>N5M0{j-_GFX)m2-?Se0T)k;M^rd z(!qGl>^0CBKX7cQ&4a8{D{Tt;i=l2n!7{psQ-j^MS>Z)-RH&a7Ya!|={A3)>u&iHQ zgq2TJz@@dTR$ZH9+SnXPPn2B)utoVfF6UsHW;QIv3Rid%KlQ8`SKhm`PL1*%7!|Zq z3YL=8sIRZCsgK-LmyIg#%>@P}kc|3Ws7StRYo%w}=r;swu`PKg>n%-PAyqtbJ8@N- z@@T2CYR9F{By3L@T4N2MIdxJsZmAk5TzlZ13fATNo0szt0L%m+%5p#JqVv);5tQ{t z&)&4o>M;%D{)(W91r#d@hiiD(SCv>pPpzRhREPCvNW%Sua0o^&gu6`+b>%7t~SsqpHWJqPo0nscdp?j&ctyjyo3(U*)Hq4MRMIY5THV z&+Fy^Y?1oeQH~h#nC>Kxon=w%!Q3bekzIsh+dlyNS=W|#Vn`&DwuoR>rJ0wjls;6C zALpD3?%SxN;CMlGw4Al!y}>!c6X8coFQu~sBkmhgzEzq_mKHLxCgu09C4WaSPjtksz~5{Uc6n0?f?aLqav~aG`M^X{dFIohqG{@Gj0z zS>TF6ZRy9>awU?l1cGC7J2YloJ(a?K5d&QAxE>LNGQ%J#U1UV-%l_gO+Nt~ru9#}; z)5V2WjBJ6k?yM~u1f@fBwBXR(x(Hu^s21D>Er#w_>FN= zB8cUi3$+!DQFvJTV;Ib66G#V|+mSM~Y^)Sho00qZ4Z-G4oT|;0Js=fl!GrSCg0DxH z`k3TV--`OoKkp-2T*EHPA>B*juc7#HflTQu(;3-TGYcA^UlbQK)KE~COXp*WcbQh% ze_BcU-(uV2?=!mp{`QIgK*9b8tBwD^4F^YCrP0=hegP|gO{$NY?u3;X)z6049O)~( zL)QZGz70_WgkLuO%`!vZPrT`>&I;1t2em2YbpCHG5m;ss38`z~Ha;!XS$25m2{yVa zuH9tOju|e=V6S89Fs-L&olUE)7@IjTehw^=u$*U2^L6hP^=y`?g{>W4e?z2T4xkv| zq_;pekGrqku4$!3mDVG$g+5wl-7b=}umy)NK06zk-cWlmLSqk#{zCgvW`17BcNlO* zJ(HIAGdwJyhC-}RCuAx}(&;Kh60{%pG0)bTIlbM^NeXi#ocKkB(D7V6*%r0j1Gf^HRQc!VS;1it@4LZBRcii46&#TUCo0k5k_SVDBog{F}jIK&CuR zX}W39j&JHQ97Onjy28K1Zt_QG*Vh&N6Y$i4xsIt95x9)92%{hHI5KYBr(LHn(jt&;=j zUU6Nu0h+jj1=nvE2~p%C(CEJ1e6b(;eA4B9A&#;)t1q>pr~HFQV3#tTbXH;_cRX7= z17eZE@9)otZue@oPOYUS63M(u)ADdE)#as}m707nJ2^8=+%)jnlnr9@rCu8|u#eW< zVu?f}R=Uixt+nvg+)tS$d^Dtz={Lnl6p$6{ZkfeFu)hg9)dgp|YcxSr+MtpX_)2d` zxpwk#th$#H>*Ed}_5ITaK zRO5A%p9-5NjWIbP`KUhHbi4#5>EypoETs;kXj0QR$T3N+ zWt6e;?(>ZXn1F>g36v^z_CBG9*pUytyOt?uE`Nn3f%;B{XwQBr3-AL#cg$T1)Gh;B<5k`=b6I zK*`ta>${kuooG`pg=kV45vRyqCQx4cnf-LxjKar(<`3a_E^!xR>SJVj#lsSDQiY@E z{ZuMBO{H^Y7v?Cjf+M#3dck|+xzR-Z=CM*)e##Y@8?{_9Xp6sRgN~rm;7@7iChOJC zFH*IJB8j+azqFffDQ<)pF9955g$dUO>)qyhO3S1nJnE0DDr^%uHItdp8F4$rMM8xi zUAz`B6+X+W!kj1reS7g{HxOQvfqMz`TI+gpgc0+Lb)^9dAa0ekPB*V6%!}!G8^WKr zP6c<{Ml^Xh!T5^u`!In4dV(4BolBhKd-+ zp+L_)x%lQ+Ngs)97eehpD{BGdljhqmINebu+Hz*bjD3K&Qmj1LO`ZZk$HZduWMK+@ z{xA>B%TlXaU9C0TINwT_xT#V%gz-!InF-XeGk`W6$@BMSXo?flqCD1#Hq4Rj zCyoVe-RaLLCjrsp{JY%qK5jBKzh;dM&&w2@4YySwYHS^$IJUFQ9$`tp^C2Rm6WBJb zeX5|T%_kmMFf8XgH^;9f`E#mfskUW~_%V|-I9*0X9s{ryzff;>I-0bBWM3!WpR2nq zGGS+*Z7e3;*tLn%f>Y|j`jc}|A}X^UjolW_5Kdu+TK|s9(R_Ey4jI9dZl7(UnZ7hT zUdfjFHBXekUMQKXS!4|-kf}xdMl;b0X0rh1jN}4yy$uIwk^peV_Ew0KNAEzO{D@Cq zPWlC%u6_fVCuJZ+2~cgtaF?zFrO$8kegtNk$B(PmIpABb&TAJaHb-vkQ{R~}I68|^ zn|}%%^}H;ZT#@(`KV;z!mxU8r=$SlV$DE?v_=27_PH^-S=^rz&6!6yOS{uW%fgI4A zAJ<6oI3$`FmSV?B0WOQ7cv#3^%!u>_00{3s-8%FVru7xqPs&;9iaU1t=Drr)+|#Yc z(^lW-&^Njp^P_IV$LhWJ(Xu~{ZGwn73tb;iTlVm6=*zn~6!-5rpC7N1)6wBC9!06y zdP`pHt9HjC#0EBMt2rx+k6|y;>H;(cs@6z3RBqEJUVLODeNcF*LsiJ*@mB`~A6#B= zYYU#O{Od!i#Y@FrY^8Qfsr7tlyDv;JL&D1XdS%|KT7aT!tRm^RZ2f2{$96Ra4z~fA z=)sG0th7u_o&`&XNZJxEbOcVO5v>s61l0PriRmgX*mEts*U%CF!|p;Z9UKqcoHqYVbs;%s zFguX_Mz_6cD%(eXK5C@ewIjT^YvHB<1z3uE{ zNtaq~0kZ-HCzZjQcFH!$`LV?Ec%_I?$m?$<*<NihXDMlhxJGtH%0_2F>5x5k!R z0215srYRQFQ@An+BfEZM=mKgKoncSosMn*Sq5R0bky)=@S>@VBLtThho~K3+ZvVn}E8V(*Y%I)&dwK<-+I8l>o#LJw4 z{^=}wnR^GSy&>uO)uOoqJOlcx#iGZ?$NlH;U+|nA(FX-b+>BlaO&^1j>iasSGXDTD z5AFl>9Kfze3b&EK-}d=U4RcunbxaK;(jm>SZ|P$and3JH#-7-GU* zuV!n*SuSrw#sYh4*j*CtLms0YRhMiCB_?}_;}_(5%!`q=URz%U-Z*#OS$ zE$RLFKL3wnYA@3m5X=RFJSt_|SoU`R_y404>EG9&{jWLuZ(xFdvpehm!WR5jyurL8 zo4;tg|7(6Rf1m#q1QL0%N7L~&s>JKH-{GPavq zMIJ>OQpa4dgL3EF6Q3@WR>YxuIDDuZ!Kn1&MO(wck83I`I6_%mNFhSNyQaZmApYvL zF3yY2R4j*k8GhPlL~69Ru(=6L*%^y}E-8!5(HY+dHjC#cvff1`n@An+ZyY4`cA9|> zUZWgHHU~vSM08P+Ad^kROP`S>Kr6^n-piuweDpnS+T=NA*hz-Rd$moV_Hpw-C*E`L zCkq5AZY!#OHZhZQ&4VmKx4jlRxVM(%IOusp60O-7bhKl&g-`x$+AL>LGn|NI|1HAh z{O};zj)`(GpR((sZ>3giv@Lr{?A(}{l$(DGuK4}z>4@>v<(=8*CVUICrmt>vWjJXC z#>?}g!3)w|pB`FIVSAeP%$C8D7%e6ZOJj(q*)4^f4?yb`PbTt6?T690K-j*d>j5DU zS3^Zv?Tsi-LMc{l<)S!o^i7(|>j1*MvRe=3S&7frA6LmxC_mz(E9bHMSRJ#q62F+{ z7-~ya{~z|=I;aizUGolBC{QRAic2As0)-ZLE5+R%in|vtE(MB)65L%9iUfCeC=y(Q zL!fxk@8;~j^ZvIxJM-?Gch5P${F6y0nR)WW?&rR)`*Y2O29#6z1{3N}| z9%#7mEIsfZTR_7vm>{TFZ!T~w)X?9@OF;sEBkMyMdc`5%xUUsci~LS1`D*Mf zSRCn2k$uzzMMD|pHqj(PAACK}3Sk>~QbYJ2%)>1AUz$n=aT!PJ6Y*8n06l5A_4qQ{ zTG-onW;vtDrGp726amj4j%1aZ9DFTVxqm)1lNLfv#WIT~y(uHV0^W&o$(w-dcTC~#UYesmL2R63X*a7{z4_8%3(VyoDS&Z{D1xOAr z$rFKefyOv~zGBa75AK%5rZIP7bgit}(^}WQBuvBM5mzA~zrrQ`%}?(O;@BZ}R@Nt_Km9(cZ=M=ZoQjaP8W~lp+bt7kvUmqYk# zMXF!8SFx6q$nhYRS!{<`Y2JhVT49l5sp0alF!WZIGrqz<_EMm9U5__IW^CgWxk7to z{yM2ZCk0)Qf-MVqT?|9zb$*tAJzxjjaSGcbADpKS)aJ`{b8)PCGNcHQN6^km>C6Sy zK23B;t;oCH6KO?I`~!GfItTMtmiML+)%o^Cxyc!0rj!A6uO4&4)oIgxywgnTYZg1g zK?!_?T+$KY7hNMe%O$S#yoC<4F`>iJB8T-Pie$p>sqw*e>^X9-hdT-kt4<< zmYTqPeR)Af>|~T58s1!!1l6nLpx#T4+2l=lVZv8`F3BV84CE-@#??yB$@aU`b#bz? z%eO1g2-%LLr#1REA@NiK!ErjKw+sJ5YNAJ=>;u5H z*9a#35QNiRH<@1n_iJiuA`&s=511yfHyAr(M{e0eUV`Ni61Z6A+=Hf*ysElQO@`AU zt3(n@XafQJ&}5kmyUQ}B=E=0tVbK8}@f7>Tu{{;}(uq$X)iM;uo7FL%F)q8~FjU#h zr%!sopG%s5bHTFGPPxD$;e`2TsYIk`RxNx7lJe> zsO1mp8jI69x`?wX%E(i#m+?;#@f-L2*`w1&jZjz0?~`ltbg&)@FSB92bsp=89w!fbPI}} z^>b^!%ye1n0aok15$Z_P{`TlKQbi3&r zTF8+icJyWLFW|UqO=2OZ#V+;?sr#MItT$C7SLEKVAjZLgB2!Os`BlC@Po_+c^Y+CBmI2NU#|)H z`MIJUM+S@Gqwa8GmyPJ%oGI1vV9`+-xK=^?r)tfFwmfqPvR~2DhNfKaCs) zUj8yp1O5(rVyzcC_S$A7l#n`*v~=^w>oZLE(a2EpC*y65)IrHh1kb1_J5-VQUd@$2 zDC6)_*~&A`Ko7Yct+``ujY%XK?Tw18=g6y(PpYp-PP&cNPOFzON==bJ25gng z9iM!tQhMDevp9_HGvRiS=u`FlYe-+wDhq)W_DK@3W1!NOgxm#;N1_{@2%Dcj4~MkJDv1I43J~y)iS>)_idMRC>9I zTfK$tkPvwJvi(cDrDq3K@N>apCbhDKkJ!MCttJ|kYh@SBbxa2GKd;}BJk$ULG017$ z?VN20`iGVZ3*zT~2v1hqSAY{SU!3>j28=8kz~ZfiV2fFO3vP~_mf zu3G%sE?-((*w}|nBnfEk@vWxsh*+zOLFG|U8&oIe56T)g9BDd zOSc2A6mdN1Gb~S>m>5zLfG{rT-<_%8wkXRDI^jh;H3~eTw%g!w$w)3KHrDBgaRHSXRh4FigBF`PyF` zH=aM10QCvrm`L>;fy*b)eKDFd?f9kwQtNn>pD(V9DqfInk^$xfMKn=K_97DfAK9vY zi%8j3qEXf;2ItiZ44i6+Is{h1`~Sz@kz(zv@TpNi>Ffzahc8JR|N$# zSHi~9ddtmBf#<)vNk$EaoVz*s6s|D8#IW{3NZ!pTXp@qFy$~2jnly#)T|WwCIL0T* zOv`nv%jidUp|`;m%c?nceVAYE46kq}Pa4rW{A|{_mjWqT#Z$p67w!4{LSNT6$x9rf zZ3kj!%r*Z8hRG`4QoO;5C?O`sp%4%U3sgE4k6z2|l52(<8^%9Sg13AiPHVm^K)1ND z!Cw_%T?5{MT%6V!gZ)$X_9qSO1_HN1DMhR~SUm&F8?J`qnhDO!3cP2z)l2=JqB+}p zqg?o$6i3|$#;d7|MZcm93LtfFNzpgZ4q{eA3PG5*gG0R`F;j9D@kIExmE=^PLV~f2 z-7b8n6lR>C*4=!W7m0dtaL-k(Cf)vOQ&b_59z3-~tBU)-@=U4fm0@Jg&O|wj81SvZh7%&wv@NUN|5)DIyU<~h*YK08d%{R7_2KDB!N6 zKXnGj!F!~Db{N074cW)^e*k+qXRYVu@#V#4P_f_7*}5ISts^AcYr9j&6W9j){{h6Q z>U&d)mcWEf7S{k(K|XwY*q8m9;GP$sG~HZW0nEEp{=W@g>l$c&Zuz$ZgRV84njbMW zSLs_7gU6F(gw}}Z`451Sjd1XF=i(Y=qFuul4_I1%QATCL|IUV#X0^==4!QQ}IicsI zO$kBBbG-Uw1`yWo`Wf=u_o=Cx6SEwX7-wJFdOo|7>!YQ4RVjT~=nKdFI(ZRLx|Izp z0FY7bnlG`GXWeRwWn#3xVl2A4Qd=*VPnw0m78H2E=AHX%@R9fL?!{{nnGs0=E*o;% zr_Kn&sbw-tGF;q0&kr36!Bc$2Skr*2n2}@gDC#eCgBdPSPD?LL90zNtPmJ&}2LH|q zRmrd&h$&YLvW0Y4YX9~zKr+vSV~r{jV$$}CXtxu<$D=d9@U!~mtZh|5Gi@6oUT3`(Q)Pp$EW&V@gynLT8gRHIiZQ?}47?k}&FX)jyeuNg1uxR&)Ms&M(M>x!sW8cB2Ob*&kY+%5zla;2844)4OJ#8Wq&7x?H zw)r%Yy;$CfUXI4+EoWSBlNiifd){J}!OX+)v)fMMz`6ipJuCyXr)D@!Y}=40&c?E> z{UiF@4?F86)6q}MvYH`Tx||kueL``tA|)pdGTFNmOx=Mhi2ntnPcZc zfrE@1@h~3$w5?bDjBNpy1d=Wmx)w9+h-gjBq`A7`sz+6+nmH?ZjzaT&zDONLRJyc_ z7@d)nKQQ!!WM&r3vA9-w5bGX93E{}Gu=Zf6Vg#wGv8AVWlRf(zwHIXo9p^0T0E zC~lbl>hFsdKaOx|-Nl&)`sH}Yzb-YS&IGUi%;ArqfG(0Su*Po6JafpML$FsEjdK9G zn!kEXs6DRuTaf0KwZafJ=33|9=lr#8-xoa71@rh?dc7Y*1KNaKVvypga+m9-EYRlF z@ER8-h2S4cUh&$u(BU40tgI_l@li|Xe4mAvMB6lX%+QOo+=@OPIFy2MtOv76+?@{E zCHoyk)Q98tA@P}!Zqi~|67->5gyco^FC{6k%b=Z)!5z{{`gP6T~ z>$5Q8VIzOfDVDC6Bx-p~OrBw*)jizl+7ufd8t8_8;YWkpxF?;`alM%kPa2%n`8hLb zSIp8)Fkb)afuZq%jb_%5n$3%xPo?Ye({%;qXWdtaiI2H%uA4vn@XwX47J#)TgF7~^ zU*s=oj?5dsnQeLhd2nSy`EC0a8OKnwD)Tgc@JHlg11_ngHA*AK>sz|F9VV6{9_DFg z3|duV>7YBqe`5UY;?>8{7NM~DycrgnlhkM_}>}|ajoULJtyp0#qzCU_b?U=Dr5t5J?a)Yxk zJ6-R&Ds``d>@bJ-QW1T}NKiF_@#Xu>>ta?mg2na^bb#@Kx{hVgj#w_%^gmd0+pa51 zlO!zfR?bEfHFFlI&nE5LT}nsU+caJSeht4ciZ9(L1z!%E@u>IgG0~w?%rKI49_&h_ z3w`V-*lb7ncH-r4hpALe%b&EFuChK##MZ>!cKhY_qdR@*)qy9r1mcOU4quo^#??5-j)Nt;3!{3YRdo;$76zwMp7utlEu z>4ktj_IT_)Lll4t0|?~PWyBVlImwWQrRP%><0ug743Z+?87KL6l}yKtuM`>NTcnes zS10Mio}?|_Pt-+|gTMHW`=&r)Wd`4fRQxFskQvY~Nv%37Vje7}7Vk@v>Vu#v|{ zw;BZy()gholIfT->_VT;3sS#)I;BHN?sMAT`Sw zlePG)m78MDfJxG<;QEUEV5S<7gIX_1Lx+4UPCDN)V=(6oX)9Uq7OXbPaOI>`hN;N2 zr->f_iwzBSb&v^v>2%n#qwM2`;4n4i&~%$1?N2i>#kb+9hHJCnu8=_#aC7fSL4?^) zEtvT<17IdTQ+CoUxBWk(Ed3V{ z;{GMt{)b~o{)bzz!Y2vLOA@>dcb{5;Oo@5gX4x5xJ#<(2haI94Q3HJJwKitTxjw*zm#ZPGl^sgoT{kk z4!JxqdYf$)^jMA0DaEP+LU%86>^=;rT&nbHSNXRuJ!?s+AJ07nhr$V4FxhMlE8b{` zgX4KpD-Cu#>8N~=#};vea}BWh1BFqrD}m(Ea=STfY^h|~OG|fRn~|PYh>|7l#m)Cb z#T0oPtY>0Qd0%oJFCXnngtJ+sw%JBg$oBx|F^!mwfxs!)3o&^>THYcDXO2f$Lg|sN zBgfm$h71av_kdmty4)Qt*bly0p4K0FD~v-;8h$o{PbpotpSETUKTkiUeKOT>4eB_8&2KSnH0SbC;>J!Ii0cH@EJSNey zL}E`qBZ_vgpsQT90)zfAqs?W6v<2@BVj`$t{mmVLT|JK#3aB+`#Yv+}a}Am_0F~)c zT={#?6lJAq?)F#;ZoA3^qQGno1BUwMQ{FJ{yb$595KPa*lF)tLHU8kyaC~ z4(>f?E^pSz956yzuS_eowArN;>u1$|M4c~mc?wwFQHE|!dkfwgj4Ctz#LiYtLGk|9W5txhsCz+Kf$e+kuNEisaJO?W>u zoHfj5c|R@w<9(!8udv7dvmk?trJbe&>hFK(5C$pLR^FsnwUcN}R5hqs-pcEX;xPuj z$&mnmbxi>#a+Y4A?n~Y``)=^05LO8RplblzSm5M#vy75lm6YYT<<`BmW(vG?axw2c zOge+mwTZPQUbzDG#tuQ|*<4dU-vm3?G1AnUQ#oC%u$MTg&}3dy)S^v=YbWPTx$F=rMe4O*PacWgq*E^Mj)w zbNgy?bGn&oR5(s(Z!}#&;Pz+O{0ik{&T}WLMf#=QM9D{oB8Zb1UYx%&h=Td8qW3d? zkDR=1x{+#wevVSxv$k9wC0uTg`I>^->f@amXj~gKq9^i}O4L4kt{x1!btlTQVf88b zu5la^Vl^Nk5<9ELI_mm^4Jl*R9KA1`5zj=(`rVO5q^7vieYM-Mwzsr0BANAkNYchs zg0t(<-X=#NY|x(SMX&G%m^Np&ew9+rV$NyF^L^{UM?c!Zi=vq&p7yArqPSl2V}DB4 z@$0xU8U<}VT8R^dCOQ_(yX$i3*ZV@RjnCuF(fC4vU$sm=a4y*|eRBfFiGc5|$Vali zx)|;8TIhviMJ1kedPbEx?&Xjqcn{)elcn{tNMnd0Td$vaXjugZJ2t^hdzjwJEoo?6XLi#+ zPH2mKo3k#T>icwyIN{&7^D?toM@H>^Q8=-rOJpK8D_f11KHo-tPPVa(L*4|49%1yX z(Z;Ng%ocmu$qAFhVjXgyq6W6&cMtKj*4YF+kE5cmb@F@S@^R`gyIXa z&vN-Wr>-mGlpF8va;~(m6y4(DI>oYdbRZCDI&0~|iOtA~b)1+sok2!`zuav7RHvPl z;?Ji8Cb3Q_<(7^*&?~$V*|hdZuSff^#JrbgOmsBW}flHy1_O!WX1~bUu&ngguw|1__sQ8*Dr>5TZ{; zrmJ~52|vCoHU0~FW&9u&h4SrhdcCO*_4bn%^&o;z`E>`*bDBD}p zHquWhrF!Jk2?AQ-4OZWALcPcIr zF8Z2AX79#ady8|JeT8tO`4O)_rX;BDfK!UhoQbI;JZbVke>jLVk%3B^s&h%aysS+W zvf`kjGVYipNeg&0EO-fN{lP9i;DvYn6GV~c_)Lbv=!rTkVHzq0G^nDmdbjWD>Zjdq zAq|5wd~RBSP%zODGQyj)1JA{AKP}P5Oap5HH~jdL9!XuaSwTWEoBbJS^h3;G3wX1! z`GE+^ZY8AdnKW>)XK_It54hZ_`Nrws;T?E@DLrq-CAc5#0Sdw`ytd=$&>KW`OBB9$ zbC15Re9K@HjbMklP3T z+l3(>Cr9piABgT}LSD7p>?u}cw&=vvI3;nJ3O~QuEgac5ipkFS5ruj@M=P2&IKc4|fUDKEaB&Xeb=5_1`0V3_sr2K+$ub+9j*>>ph|^R9Al; z3sD|Ikeje@X&2&t^8JdBtZj*_hWo50TuF*z@~G%gw_Il!#vqTumz4C! z=K5MB?4eS)EK76Q3DNAt$P}fVPHohs2(;f4qrd6f5euA-> zS2ZcG-U^V+4M;i%UtU#eio|^Cr``P$Xf2?9cLSZ5KO;6>fwU!}m=?Ekj-f#uYb(KHKS;@8dZoK9VJI38emIH6Vn>DvO-@n%{em#=aAG!cK^-E4H&k^e59WNN&=mzzJ8H>u;aTh+Ubnk zRqVHWD}~$O&fWAxI==kZ-1bzvC>@`5acBbZS7XC@;#q)05i}D+uxk<`>+!L%0c`%9 zDBm3W8~%)SqBP!+3~CZ0fW;y0*BS3@OSs!uQd*H~tJW8&vRO$pmz;r?PC`prH=RPf z@8^&5EEG7!QP-q%@NJE%-@uxz!eGhk;z+F=5-|5qS8_W0%7C(^gTs{%k3L#%8HDDB5#N< zHEt@w$B8!QyKCN22ZMl)T)#JwUV&&|dVf~YC@itbKQtA67?2Z{)&~Z=ayf`uNvD9B zQ9d+I^2rX?JV6Xb?CH!`!k-}wj_zE#g58(n_KWs(4xuz}cbQ&t;>HJ`=rD(!4nNvI zw$JBMdvO)qS_W6OOJ0@EOBUG@9n{tDTJWUxJhV!XmRpoCWkpK8@fyG;?G$|yHHQIMl2xn52Kvt(H!q4V)JSN$Mg9SNM;LvTyWlEd zBy)}t6ogu7XZPg2m+u02R+cF;fHjI@QW!| zM@GoCT+y=k8|2+xU;KWn>2Ut_3`&Q_ryc(?;y; zj{gymB<-}%=V!s&cCmuymKCKR$wf$-U)*awAP7vo$@>$2mLVZIA*?B)303rP5RJ@2 z;yEwPI1fl1Vwb{;JLV#HBbmOSb$>(IFAE_Ct0s2Sl2+nLY(ZpYmy<3KcN5B*j#=p5 zLc)hdgt*ThHe*Q6UhYUG`PJrClfp_t4WWkz1r$qJ;K*=i-jdu2(}jrT3@k#*}p=LdiK z&R)lKpy^0ra91cAcIXS=!Prw6(;08iD9>-41z$A1V6qxIgx0Bv`qwT{Uci*VR@+|R zjqYf*QuX_qsG;-F8o&p0HiJ9GVd=_4d?loFc-XAlikB!#t}iT4f6U8&%DORjkS=zDNCFKgy+%U&2hf`QddIQS310Q%pvZPd8r zks1cJ1~xgV;O(KJ6A5ar0NH;4(A?Llwsp@FUrk6%XlckYD1;z9zdB2?4DoWbmWidC z?S87T^j_i;piv1@@GAZa_gY4%>1WzbN-7%d<_1`Z&ek3NdgIFfT}=j&6CwMo*2&qa zD1EyFLAo}?a8LUmND_8 z#6na%l;SpsvEMqI;9V)P{MXNb?Kglxs!GB7#jIO*Q@QK*EmkhseCHw^-6$SBtXi^$ z*RIk)qh#C&EJ71(?4yH>OdXg^-NhSeDixKrt&;BwHK7O2^spEfZH3Snvan;e?7A6< zr{cZ8Lzj|tbFY&qq2I_%X#2M9KLF1rk*#FSX{4u~23s+cdLw!%W*~1nf)KBxv}AFx z1P5_X&YJWpSPRivjRA39TcR+ReEn=9Y^kFKh?K19f4fH#4LcQ+;G{k z1$cLZ0p!jlFM>OCMi7+&%Y5%Zxr&Y}x8l7`EI7axBp<)r4JWi?`_Vd_`7I`k^{fk} zQi;IRo4+eby$Zb6*)H3E_vbq9F~R&yINB>}qBZ}+s4sF@-t*l9t&b5cs^z(Ih8=CyV#}BVj9E@j0cJlv zEtDFswLd6iq8|%(i8=&n%yrhD=H2Ve)ut%i1`}&PKGw*WEE%o0ap@1fkrCAYlvs!j z4I-S*YpiPqX`Lh7CJE!QJM6Duv;3~_;pfj8onwXPmA7cz4Ozs7o%4j>8c00|Ufw{Q zbk+U6&lxb>jKHEDDWY!kWS@Xssw}vwz^rXJ`T841rs(Yp?Nb+&S)SWifiZ#J@(z9z zhN3AaW4zJWM}J06IO+HgAjd&fiiP?d+qJ3-(0n8fn>M;29e*h?mv*<{ysd0v5u~ThEmpQEwVQ9rQZ46>N+k9|$@s4Rhy$Ja zn{ed@#2cZ#11*^Hvl*?n(5>yxlSo|X$6mp1Xo-A%T%eS5+0+VM^BV~~iw6G$3jZxxT5VIkM@w0OqzOy zisR{5SxF%=BlDj7^>*xiNo|ov+bkFW?|%SdR~}uR*0aSK$0&-?M>HHnqN~U4Y>Q}y zZ;7>I%U2_}Nx$UJ@C~?zbtHdmpTwK9sY~&E$Ugwm#Z}Y4UMFj#Z>=$K3Ge`tjAx{! zJ_$^9D0kEKRq+`qtbaG!4GXGN^$Z}ef`*we(o~JMzf?p^Z+7QZX_9h9}d8o_8 z?;`B4HjUVG@HVH8HeipD0&3VpX`N3{TQ`jPON#QB`=PFvEJjY-#uMoQtkQVrKZ7mu z-_2h9FD%l(uuK0QJ3u<()8YTKDTaQcVTZfB%X(4}m27MbvPyaRnuv6gW}H-}A9;C? z)d70L^gA(1JAr&~ZE&79g}sJv^i8U*^}v|BZ1r^6;cJhztv4P!hQIn^DlhmEhAp%$ zE@Me66*6Z~WY?zL!CJ$m|C~?Y#7G%~dRPw5flgio`-YswM4wtAFIiN5W}J*1lpS|C zVoBGP<|1jioe~z?=nU%q-h-fcv{z`|d=xSk>Ia#N3_A5;B(_|| zW_5~<3({%_PKIh3g|5`Pl9UNzQG-OkP!?WwJ-_Q#zFhZ)>-~s^NNcy;E*a@f z;G>7pK(RA>H4Ujk{*E0hjbhJI_DK?Eg93Cy-bXLO(W_e|C`qwoxqT{Si-I#z=n87n z$8c{D>Y&W4z7;6}ybb1X%Xy9ndt@v4qx0P(|G=FI@gbWhbA)9`GS&hWA!6YSKYRHP zz&GAiSbGxBj{AU*AQk9a+p(3m{ek@9$oXj*3Dz_`S>EM9yX0jb6Sr;jsf+8SfK*%@ zSmgAaC4!PaugX@JLG|H^b|WC2&!W=oU*WW@5d{oaxB>&X_>m}U&O!sckmV_{SkGHM z>$ky$z0TP|TwvVpXEH{b+W~Q*cBU0YYiF%f)ahp}W_0?Sj7zn2+Z5({c7%%WC9le$ z4nG+>3nO0ZNuq5P8`A#dY?WEO$>OOJ@Fl=+Z>^5Q^Z5w{Sn9qua92oO`?NoUN73B) z_s_bv?};8g2)aFnba!^`mxkOmBB0gw3Wk}0qIc5gUY0c?CzAle$;(oML#_iquY5Ph z^#c=wDg_zY_h>G?ynN{b!7AOq?gQ2Tp4&qkJ>2Yym3b>p(#$ktH$W%>HdPYNi)RSC z4>(R$mvi0hx>YT!(1p&c?pn3|zA+Smf9{}ewN>tGiLG0$*xRMHnery}-r}8Gg+-+P zoM<>@dF3Rq8j@JLaB-{CzcP>K;DldP75CccE{9wDK{>$N&Y`$yxq**zgAl<;$MWcq z6}rV1vHB{K=4`2EQD1e8)0hdI?@%V&F>U!1z#TU7C~M!q%PuoLVC%>sQW7Z1C~)Uz z1Fu>#8Fj3R8gvNERj{rk-G&phhi_5bMY3hx`v=&chVnjtkl=9gL}9}K1fZtdRMA@T zNzzKYA5+8b1rlwcelfj&tRcRE&h)Jw?_!_nGA88=wAvz_5TCUod7s||s`s1fnML!y}+= zwVmKDSm779*q-Wz{%+G%aIjtWz4d3-S{@r}t}BxM=?|qTlvG}vBN&AVtzkoZFrW^Z zYTe5Qa&)T3z0a6wYB?yj2wQ)HvGOcW9L_gE^5Ls~;1-cj*K5wm+XRoJ7*MO?&+|*W zpgJ`c&Rx+k3PjmW|5wdWo}2r3dp5%j8D(WS(s;?qA@L4BWX7d?Z@!%u_wj~$ zO&lPhI%@J1w~>A^p2}qosg3<9MV$Rv;VRFnE#<9#tyhsP=g!5AMA=@rAUcm;m&iME(jMCovns9A86<^2; zZ24q`S4PG|F$xI4pTgOURHYh+4_x|-l8iJy7;8`+-56(!mZnHK4r0AePCbn>lOloI ziU#R6V7_>5pB@C@Qhbj@hnfbE+e;?{F7&hO0i)N*DKrWlmE`I+3w@wiKUNxk#30|GT!EY0|2TnRzIXvGa4-o^ivI~Ir(yW>D6UUB3gw`gyfkX+o|IVet zz;_n!RjKOSdtFCnDMkCo;i;I7N#vzw7v)B@1l$RMFhvd12pkT4HMuS6R4&y6*5Si0jOFk-dpC@?w9a4iAI z7gHIfyv;hcP)4fn_&6n&m=%<=X?R1L)t)p*=bb>MPJ8yiCRDRIf zTQ$TcWKLIU#~E1XYBncjx^lfNeat$$(@LGDaCvL)aX}p*tgfwwen{Y|DsZG$h7~M0 z;I9EM`0JrzH)f(yMmI2iJ~ub*yks1Ko^blApSP9-31|?&+<2wm=*{Khq2B0!Z$m(s zRt9bUBE0=7B-h}pPOZK%BkF~^qvyaPAU<(Cz0~eYBS4o-JF9)d8quQy(7NK;NeWFD zS1<6rE+bXqSz>_X7A&T}>=RWAtJ?`n^^5PZJlkm$3_fX48GaT7{at)YWu7fx3s#|| zqw(UzUea9+>W?;KZIykh!g8-%^hj2An*Gw>ItG2rXlJ#h24av={~t~``Tu>vY?ERv zD9qSQ&}#BO0IUD0R(J~OM8N58u+O1)%TJPhv>rfRy*@&avOm(s-&Ynm8P@DA2w#{e@OE7 z{XSi-ufYJiiW8K^?6hfDA9|7fv0IYP%hAZ=1JV@Qep(AwOEZ_Vq%^ zy_XPKk3p>NMgb74F7q`TLEbu-uc=c^V~+Zk6vIcCP*gfyN0>%U+as8gV-#>NV!0`3g3wM}dHQ(uvy!+P zr7&W21mW&gv7cZ)_{eHNz(5O(cG8-y0M@M&Jt%wO1uk?zeZCs|s`769XI+aT?n&2@ z#F4Z{vPxU+pRqN&#TIRAC6oZCx|c9g*%P?E?G)!Sb*|N^!cKX9#r}C9LR24Z7xDBcfr1mD79Jv5L z@x~>2yL{1>JJ9I;GS8v&%miCFPIYtK$8&REI9X56OYz=Y@MqN0N^W za|%$kSZ*gLD9mK29%qXQOnpzx!hf=a@0hJR+{}ZW@MxL@ZDNU1F2&>7gq&1%w^jyx zx`!bT!MYqK=ACWo1cUh>nsxQ+_^w>_VTFZ&a%Cd32m*6L( zHliI&|3~svhbNPrYtypR8}tsj?D2V!-p^x23#@bP9@%VdsTZ&Re26ydrl5MVYQLq3 z*kG-YT+f|J1;u8Wn(HmsTL}e)0=8e9?!-i%Osw5uyqHydZ&UM>lQN!ZETpfoE(>!A z=!+HJ^Fwd)I`4jl#pds0(6y@{+<8JX@uY3Yz*XL~^1}~yUto&Nw=FGhjvbv zUBLd8a=L;&Al$3m7hqLSndRldN!9XtFmLixfJ19zsIY@iUX=O&6Bp{j8ZJ@Ug@IoG zCi+bTL#!Fmr|KzPnl0rI$GRrp6uw2rdpyD~p+UN}_abZN7(30AQ+Jw&;j}DmQ*aVa zSzhozfGp}$qdRKEWvN|DSXYiZRU-ApETMh>Jj}5D%|?@6A$T}`Sm+dcxlzHn;I~-5 zV5m5U=pLTd$p}^XH`r2ZJeFfvQqmgi8*h=Jz0UcHQ34nBPc^94Hm#p|Jx{jaTN&`C z6w9VekU<1f=r!|s(fj-uP8n87?4+b|>%&ne+ENjn9}Vv^t+`Ygt4;li%Veo_K|nLR`{afNr+;i z%9b30dAj)@z^lJ8PON%)^`Gr%yz82VFo0kD|2P=RoXLMz3g*ZrvE1$$r04Gx6i?>` z`c)W|#jsfc0*?8(4O*DCZ|=1C`8f;Z=_knYX%b*csTo$iE9=k{bbF%ewCcD;K@9I% z8=k9KbFg5sp{M?D?+>oNN2)^FA+7hOiwqYR+4sJa)$?o3t2!qZ`14GI8NTl&ws-3= zC`wnE$Mm*yQeJm@@`4lG+?)?V^yKfkS8&6WvWyHVyee7+Kh%GDeU!lwH2*4?O8~v% zufmRVeGtDbak|l%!~lwW^bl2P|2G+T!L!$GHYiKWf)QU}FU%1AGgrM~ho@SfKQ-c( z@;Bvf5iT99d3lE3 zQkQB7>7?DA_C!%FRX}d*fD|8h#t5u7<5=*A%kVriiT3h10S&*rLnch8-TL_}{OsT> zx#CXQ9x8Tqv)R&E2I^l*4YHflSq(MIYd9y{Y8CFEws&KK zqN*TBjxE06sNz_G?x6?qNk6Yj7m`=`!Dckx1n%OvY+0=hISxa>cuC6si)S>F6;mS& zmG*973{Z^Z9DQzXhyj$zFl{-PW2!O#+;m#W+h|2 z1L7Y|eDH1xH%R@AiAI;=u0B)_Sqh3Z9U2fIFnm*TNX&k& zD7yTdKQ>k5NoxSC|8{L`Z2KZ3RU>lXNExh{^y-rZ`|cN4&hBT`2N}GNJ1JE)l2JRc zFZ+T4l^9AbNgR+Wg zw2**u@#qiiYoa~OoCW-7FX>YM42s>B^S;DK?h?0(Y16J&CN|E&>bBTM~C zv3zLcctPOletNEf;dmJtRM`Tqyk-|a{w;UlCiYR65pO+rI=N@*8v0GrhHROGgV>Ig zR*<@BdF5gTBkNu#qPTC}PO?3Xvm0|SX7@q7@=#X4Apyy)R4X7W>)z|)F`2jb2Rh&1 z@rst-$0l+^%8mH=Hc~rP@Ife}-Ph(CR0V+r4~ia@pV57nsXZH~?qMmU3N%aXg}7(X zMI+ljp6f|FI-6anW{M$;6O=0M+*U4-+UzIr*|eS$qw~chqwD1A?!3CiL9`t4kGILr z-4-+SenB~GGlfZ3(Lkx6t`w?eycIa~L{syA@wGD0@2p)f1Ej^k+5a92&wno2^Ex0=bcXtaA2oz8R2<}cnaEG9UJB4fE?oQ>_H}m6uy{5bG z++N*t*Ua3tf7GdWpR-ENS*LbA?<01mzspv$3Ldo?HgQP1ii~(-lbYL_)$4!HYdWc@ z1(b8a@z)5gTqbZ*5bohq{~+X8J<)YAjlps#Kf>D!;XW1PcMATRnGkVN<7YpGr%U}n z!gKD$qek{+-5)lr-H8#xyCUCVg${Sxz0-Cz%RMe%SqmC5l!-1gZfJl>jT^>qlq!ea zUibZK04+1}n~%B80|m9R+RK8>ZQdB)i+UwBZml#5muf|I8O}8J+cwIM-_8! z)jG$jw-nf%%KVh!CVv6lKK9(-9Rr6(y@UvXvy?9DFj`p1oz}l~#Mg0{L*QiFVf&lG zhVk=0>L3^-ZmQPmyrI*2)nhekYsuw;6sy^uu3^`WU4G^-0d1O+8=l;aY!ayoLrmr-(NTA|S~R9D z4~qVlmV?9cO7mU{HbJfY=WM&;VK28jUZg*}(||++$ai4_cd*Q(=$uOO$*#3Ma>#fQ z5`A+Vu6NVEUffMUWj5=|*u*>-gTuOpR&Mh|h^TC)69Zl_6q!;&M%HA$lU5R$g z)^2nX>m9mb>YDM+eYX74upN|peN&m%V!L0SHrJ-Z?O?m6%p2&GQMPK8-sq?U{3V)I zKWewiLc&|`R7@(yH>T;tHM-QuBBMyDqPJ6j`t~)cUgLCZcKR+;W9rWFQZh{uPBB=0 z#@~s?aL_K-=EC+h_e z=3Mpm^!(GpWP0Swblr~Lfkf^K8qXbzo$^IF87+Ih6OPBU; zQ(7v2(STmzX|aOw-q((gX3HxCy*==J5ZOK{FGAl6{jB-J?z61@<)zhB(GS+S**Js{q&v6n}8^{?+$f3 zz6wf5Tf{1Js%}5szzF-Z$?e{aKx8Ks6H5E#g*&sB&6=iRV6*#Q`y`AY9{xd@l*^?U zb$y87O8o0m>-2Xs`F#%=)Bl6#a{+Je6KG(hKgqQJl`SpL)+$|V-fsN{y0!0{%DR66 z53ARim4{Hh*T0jGqr5ix6O)7BjqO%;E!W9m_H?aghIc^P?QFz~`qZHKiGJ~V{nC8} zt4*}S00hr}(X4W(RDamDv)@AO9pK+g_bmO=HTx{xZZ2$e+MV<4;oeo@AqJzl;O<7i z>19^6wY?>S&>~vWv+YhIY<|1#t#4m$_0P=KK_Q910BZ{E>+&>}IP>H)-NUyL z{c4`4w8v=AIX=eFR;Hu!nhImQyX5Jfv#K<#IeaV#Mh3o!+!6D;-axv#vnS@AFrMBp zj_zWVI5j_vzH`bb2VpUc`+jjsKZB zEwaDkefI6yUZ6wi7D4nU$YW33)W5zj)8id{?ve61VKKQ$S4)63O{viJ)d0T$nR%gc z5ar@ufcbNcklGw36NIi-uC7u-01b^M#=ZO$_Go+*)F5N4w_&NX)nLP$G{8W$zU2Z; zn%rxNyJ<*t3fT5Ba%T8?Jii5OqWGVxy4jwv);ZFkz%n#P%bOY|n_}JoU9@>CHeqTVPEwT8~UX!Ro9TNS7yg( zB@SBcP;BRutZ~o@7W^?hcv^JnXzYh*SDL80xUi0-Z##^}sLjc3P}cC&l>qn{p1*F) z`k61**L|ewK0vjEvjAt*pH!?08d6+Ph6%XjI~ik9pTZLEEHrL6htRWq`)ly}XGvo< zW?9tLrnl?7KG0Hnds1y_lUvzO>Z9LAa#Z@7Q~xIA1*nr;r$C_Kj$N#E8lZ z*q95;4lt7^Wkw7vulzK~@2uXdC3<4LA=Ki8D%CeVQS+KLs?b`buwHtg(cY5)HuJwgnZHw z9FIuU_Dn5eAJGqe=e!imF0-Kny(zaL3gPt3AiJio-J&Z%Sn6qIr4;uS>C#ouMOV>Q zANXb49@UY*up`a&aNUnDV5;$w@Z%%ajikmlm$F*lc&4JN(tWR` zx_iq6Sw}j|BrQU%hl{aY-pV8Pz&kQzEv64`q}IozMkCW@>@Gy4njP(~jL;Tg=Z1S) zPKrNR7^OkQJtY-#ZcTlpk@JEas)+3kchWv^B^MX>vIQAoW8IU7jSd=7*(-rY*i zYC)Huny`!?7fPrnP2%K8ypv5STu;CeuD)AyWcDAaTda@MoM;nLQ9a{IugiiXBS$R; z$lgS6lgi9g)b~$cEv0{pP<-=&7QpeO1EX#w)mP%`#ns(UIcsy%@5N`_w14|&SAhIH z>)LZ_hZII$SN+q{fSK_ydcU$w$jZ;s;pL#~8I>v%c||~gZ*LnZ4dZm?uz7Inp-4}~+VupD?sK-^qV%wj;fUgP`Q3J+8On(8YKpdEY z2TwC`!*}V0Bey=&3eLkhh38^Z7Y8TYitZzhj4VLk zTi!LtU;U?zw!8dbAwxl-R~%+3=Rz6S{leSloP+nqiA*dO>$8-F?Ue^2~Q%yE%sWlf3;dQ2$mKoI+%ge5<2ou4!E|_qS zn1aO*Q@7P5WVQn+;Vs<5Z!F^4dl2b|5>f^;_S7>Dif=5iw%8AFz$#w$DbByP28ah} zs`JWiNN5H1+kVg}(7bN_-j6nwTRoj$`_-gxImQC<>tJviRd>(ol(rr}Io2DtOtD#0 z|3f=sYe-q`Q^JvT8ASb!O{t|Dt|{^Ja#^4SrSe}up`;T^@((9Yi}&|r9>(W+C5m`# z+Ytg}y(+>tm)X)!hY zaAO0Pl4IY0POfcr4n~cgA!Fyife38YR|IlIuMLzz$cYE;}9NYW@2nNbNGZ}t7#+x^2L5{KHKV6Py2BcON(q*(} zDF_9r>!yON?tfJuCX2!3LNeAZZu4I4)MdzFh106fs@#v_4btMH#YX==dCjW)P`|sV zv}DloTd3o92(3tj$+%&;4(^n;beV-isiPhR$)u?C=M=iYQM`6@e7&tcG7oi~bljYHgi#*AJemz*V6dKiRzhe$7wL*g-cJ z8y7K_UsR*8lccjnRx%zmeOb`bbaCuI|6O36#IdzJT?yy7fu+Xh8%kh~RlCo4_|?Gm zv)@3g7e-g18xgYb)y<)9asuX$*h*y_2EyLGx34PfC^U!JwNRx$0LJgMtMgc_OM+jj z2BVqkn*i7aF|KC6L=#FABJ%$T4)i?!zlAXV`{>et&aBUFpV)Pa(_>B0QT=W{zStp4FV@W(D_Q*Oe6Ev{9%>dI0tOAR`XrV*+Xq zAL7ie);@iNq1PWqv064h3LmuoL|Z2FIua;3{wUQ$z+B$~^qu{PbC)b)=G6$Iq&MOF zuoB<%WLI47(fMC+uCqln6xp_Ol4Fvc!|!^k%X^KK>`_fyjKu0kK%NzO_#4WobV147DS}$D7z* zb5DW3|6bHWiwoLPZGV9L1>_<$a<_0P(zS}92NLK%*Ug`}r*icf>jiMR$9DJ!DLuq@ zDb<4HA$K-~k`3y%u2eReBQ7A(gOD-N0(er~*|dDPebLO8__WY37;V5Taz=pMGjn|w z(TR!oSMl?KLR}Tf*PgXK`iI6kNr1exy)%S2hJ- zk*qBna#@*efs1Pw{bQ(0QrGfpVy*d94?ax)8{4zGJh7bgOKkaHWA~;@#Ky`7D{K;Y z7d%M0ouo4*;2v5^=891b!zBy(zT5{mmsP^&y$!f_TE1BcZjP569~t5g{s_i@BJ)D1 zhsK3nSih0XW)&A0EVYd&3~mYj#xB5xkdFn)UyhS0n4;EOmF~aeJ>|l`@WHM?E=A#h zxMfeZ^D?4T)Bc#8VMkmiX7LL6vhC!baQgqbJkmkO7=*u+7gMWGg}mmbo$4>I2B&M3 zBL3PSj@GeL#d-I$@JYcjnsd}X7$;!VD7?TX-iE|oEBOE*&c{yH>ScanH&GYM`I<&K zdTK=+9d!pe#qlU$)c0*dkwk&bq@I2W5@7j{>+wolbNQ@qm3G>l6I@>N_Sv)PFJL0p zrwSoa*+Sqqb9Qmh=yU&a(pU2z!}%|OKuf)NzGin2|98-BH3U9tAl66tbN_~=`Qmd0 z_}JbBD|R1{^zpQJ|?JS4sgEY5m9 zbn0HrN_ER$)L&4-Z*yU)6F~YIGES}{@(5iEuNUFmm8om2Nu%@L)US!{ki`eC@~?p;m#U-i6|9=kk*?$pj+{ohD2ik<#>s9FT)}Fu@M~f_Pv@6K5XY z$eK1jADKRA#4XIAi+*LmR8!hrCE7elsdsT_RX6wK#{B^9Wwd%_7OIU=QRZAeUg;_W zZR;RD=J^XC8x<{g!O-p2$w}%iXUbr@Jj_VAt`ax2laaz`$&4-`0w@RLK--Z#z~lr> z2cArb1s!6ot#|bjTIc?yymY=SBp786!RAgmKipdc0v%1-Ye5&NhFJzfBSXsQe}sW< zciJCebCdPtWXoNCR&-lft-{SosXsIDf_=yk&0S+iRlcpDKy&ELn8EDlsR3uHIYE{2~R!oT{S>ldI*fNp6{HV<<%r+RkA*X$dIyaJ>y@p4~Z@u6~Qc@{?gfEpfKpw54iiqZ z;7Jpo3jYEuY;Mk&@Pb_-r6S`yb`@uiDIN|5+$5pwk^Hi>2F`)!G#zq#0n8N>*DBT4 z>gD&gzn>ZoV=Q3&@XlDfdE+`g{~~E(WtwrfeM1&i85BD*jtZA5Db-*xj8e-@p?Sxi z<$V6Zru^&Cb1-S_l^E6*-6`jds&N3!|I8S}zrYFjjk0Ll>h8Sq69Wpa> zyUJ|mzd#NHeP94&tH{GPXTVQ0uNZatoY^H%YucJ|Pbhl^sGtTE)YJ6I{95Z3Q@rD> zGoj$U0H#4&kldiif#3`>GIF7#wcelKH7&98_^5jtw~l`SQ+0JqhUM@aeVp6pXHLP=qPT>6;;t={(1Uj^HQveE zjb29Nf~Dnro(N8Q`ry{F=Kc3BM>MfmIrmV31wvT>mQ^RYtt=ia1YZ}chADVVnvc>z z>ojsSD=qI}=Y$93(OqnqkMP4_iY7h$Auz0;SKa^0G~N$t_ZB=UK_2m1Fj2AK3G5{Clv@@e+;U*>svHDVUeUYup0eX+Q47b0q!(wekW!BI> zgLs8@Xay&_h?L-}XJe|xmsjN9516h!e6dTdJh*yZr|AtypC%HS-eW9Ie>6#x1`2tE z4~!23t--I2h%32Uzk|n5iM@BLgyVW$v`+Iu`T}YnJ7g%}Guk{G$AZ^98VMVq57R=! zrGOasU9kDSRtlzXu8V2R`BZfO`HWKjs${bKWS`*MwXE1I$*h%5L7-(VpChYI zX5I;Ab_Nqti;H-1rvUxVq>5YjTX4*+4-5vQoXqBg|{75Wh&Mk6a>AI||TSLo1h zPyNVzMLft;y|JQ(xx%)fc;BRfesFyG7CCX;SZ}}TNq}Y%JlG_a^q#`nt&6Fny+y0w z;gf9C`=89;NG}m|5opEuw&aD&1*4-f8pU%tu)3+a#Zuk&PlNJNY!lgEz{jzg0x88O zSak37^-}X@RHb_<44+?>`uWeR^{i{KWvQXAa!nsIF8_z#W14A#N4sWl0X9%s>{}K( zc(Iv$HEq&h6&zmQQN*q$WmJ$QOCD!ryEkL*=VNg(C6sV0dz!7{Ta>2s%`3=OLH67I z*jf8?V0}iOezRdge6_|`E6+Uu1IRX%C51C1*>h1@?uyV+Ms_kwT0@GM1p>fOVEo4S zW!c=d|KKlx@h~iVUk>?Rz2F-@UPvxE*^8J8^Z}|}>O7W_?S&{;ueT>EV4f|j_yL_0 z27w|7J_I)o0K6wxakVQvOSVYfl%vsR4yCgke&P4EqRSWRS!>`Ue}ay2B>3i3)8og^ z^pT#$moZt*ad9P6TOsUxSJfYkN#7X`HTmi~+Y(Rt7!K7J+CU3Ew0~X^BbkWU)mKl~ z-9)VJ@aHFevuiL2I1z+FTm1Zl91`TK%h^>UAx45TWnD9z8DV-XlBHj%kOmp*b-gsy z#=0~-Xg8C7eHg|?E9J>_0?!*7JZCWiFS|hns1GFIxA_jj9cpry6sXc%3I-O*S;_?t z7MbttjyZQjP7NNrYL2?6KJ=J$coS%d(jk9t>C#FTkiN4(-ZtIOR#i8g=jJ-=%6HUk zFQ?R>+n9vwQYCF!j#%I%#X(zwP*;}YFzNbBj>-z7g|06igA4)*Ypvb`dxej~jdl@i zs9MAEyn>YUA6&fMEAlspP#04>K91D`R~5`;nSMSjb7aEs62+8vakTdG4IEh+sB?a0j{@BM(ybVSmR#TC4{o?Gsj$NX4iVBpU(~LdRx-KAud?pKn4T$ZIjPeo9)a z0fG*KJ0I464Ab)(tg~-fl^&`}_z^UK^yH)P9fV}ZjeYfcWUQAtIkd>p`FYYRh8EN` zfa_B;P~J8Bw&*o)6?T{lai1WaOp=xyimeLU%1cf;{13weU8=r`6WY2J5QpMBAFx~O zAL{dAT(|LU{l;!m2ijh64}c(U^HkxYC9H?Bi*l-|l-w+4VAmEK418%)#8Dp}VKvas6L_{2rTlu$j}oI}`CH3@rD@0|sTm&VCQUU9Rw@R9z6 z5fhegzE<1W+&(5cil*;UefsRhearKiFeHMYcq8Yd*sL3 zC8v7JgMQV(9Z6@L!TmT}X0zd5X!SyO3cdEP57=D@gO4l0Ue(2tfYT2~-Ul|7Iy3~t zCj1mdjqfw*V`*c1XktQzpi-5!LC-l^=E+zairz$9HWjM`QFzrXur`%DW(PSe6Q zS+feaWy_6>Q8k}R!+#+@9?SyGqpX)~@54VzmUUXhKmSknUXxsaMD!H)M6 z;kuZ4k;H1sT-r>pwRL|_uIoA#$bzr>yK!52l7gp!L06)PAl;U2IWF2{gOUz6gis7B@FFvPkxnB4LN?w^s2t0ADwm3yJbbMO#fi5v1xzM{M3BWE1 ztMl8Oc8vD9%}EojL~$H`n4YeZ@F&^@l3E~o~7xndRy!BU;gA`DS_pV1$8%Ewc{vF4yR*kMrEJ!D&UK7%54@$ zuYPdb&LKY3TMSGzFFN7%+~j;#mSZ^Q$yb|Q{9i8j`+wmZXuI6ZepumqUJ$#{yBv=W zI&@#VMJTow?EZyv@n6#={!=Zv@PEgb{I8T~jwA3y5&Rhgqh<_`z4-=>|5?h~o4NP4 zG3B)2pxFHY8)6zUbggy!7l5osfpIQ!y#2T@Sh^w@Ry}W8)2`Y67w`uf8H$jg`2;>F zmw74Y9C;GiS7|y0o$-KSYxe|Bfe69%o}Jz*;*V5<^z9)fJfi-eC%!x(Of(bpK0Y88 z&SSZE2pE;R&iH!vFCg>N*(F@cy7RCc@saiqiz~C*U|pRTq6cDfomU)2F6(Relpd4? zYY#+K{AW&2aV>FEPS04{`E&mm=})SE0r9bM&#q3pMo#B4a7kXV>%q=UpUytYd-Qn( zA%-52(h19{l0o2&{CmtJr^PeER+~0;^K4kWUy@;BA2j)wWzO+k?&nh1R7$+Zd#7!F z8xYy&vi1jlDRXIx+oXkER&jM?e>u;NDw;NFVqcKdk9o?~fVoii%0YJZRiF5#x{+8{ zV0^BH8iD#s4l-wJ3g{?}UNI{}Nvih_JoS`40W4JbE2~^yOqw#=b#ojr6&zjffFtOb z?(zs(Q&LDjM~n%Mx&b6sSj2MUwpL%j%8lK1Qwxo#pCedA$&G^h>^dcKb<}=4bEIoH zN{7sxs2_06b0FyT6y^|g7#;N9D8D#pfOihwRD41BPh|r%VCR;{bmw7)@dHPB@cG;m zu*K`y?Z;n$G>+}*PF(u3UB0Gb{4A_bDF)=m(Y^LlFSYGqWpG2{fJxWY9^>3+j)H5SVNDdma?WZji{NSHZK-*$1iB{1ir zSBsZs*aUlBhAeDJEXhYE++D#zUq3}gl>03YEX_JUOJ*@?;+583c&RHVd3t%#T z0om!NkH=znX<{1&2z$uO%YeUtfMSHuOXi6=mJ%N5)OlL>7qE$#3U|r>3m6~0eZI+l zQgYf$6X}RDzfsUmf~^{J;wr05;K;8{a{C3~LynTGm?3Q)7E~G8ITT5qdDb@%ba4x; zM%r;c{sR&MV&WpqVRw$garlNkRK207#=U}DLD}-_aMC!M-W2Bn4yurpbL?iy-Ki$A z2-v_m-7+VZES2Tcr%IXot16{`j7p(Rw+|Dn6EK$+HMB{61dw=_F~p7O&cE#VGvrDO z(LjN9FmT4cSi$BaEu;z{?Ch`ie}}jqWJW&5BAOs1vuDOn(Vmnie29xqyDR8|aXzSN z?Ew$mvIK6q9DjQUaxmJn_NCt^T(XFx)K$5afS;D0ULEdcv7XZ|wogo4DNQjq3{r^J zWS@jRuc@!?&y5S~Z_3sC<>v0C7ic?xgaa(aR?r?hiaV#lKYpN{$%HTN%(JBZJgOcP zUH*QV7?RaP8TC}|CpkfL2K{B=0D3TZWeHh9z z+`3FLb?yadqxgF6H2BNpkJI$f{RH$zH4euh1iAu{jv)C+t=|cYyp3@m${Wpq+2J0u zL%OL>9>|t2>qe=_J#qODGp?TOc!lH&h0F~{R|1$h*Yu(TR4cSrH=H?ZxGk7{Z(xUd zYWsPwK%_qgsDD0R3jxPV0ddfb8#95o0#>nP&)Wh*LGPnyO4}?6*5fXN}fS)2n2Dwusd|b=QnqJ8R zXe9(n&4llS&C0!XE9#k_rLXf;M7D>@0rJIFchJtX*Dc+604>%gY#-zpgfC`2w!lha zq9Lsd^|Bl_m+On9m5Z>eBVY9or3X4z$*U5yZ00VLDPbx=nW&!rJ{>OtRi3W2^$|_v zH;Ex#B4Z+Zpii_NrWiyQ-s=ee-#s^#Y#q_}vnU+;Nlmf5_UiF>475SvBDrZcY2sM1 z%#Pi_tl5yfgEL-miB2me|FY2gtqA0pK%#P|99#L4!0}92j_Jj-ja5Bp$>djmc)z$6 z06H_Lb#wIA1{RYQ@(XJ=)=Z|hx?yVTvsWx%ZN?eG8Z~-z5X=_ng;En1zOTu8b^1{w z>v8l?3aP3fUI;WhZ_%bD2@w;*t^I2t(M5owr+7n;#Q$xI3pYz4L;t5;u=R{!Llj#Q zhHOn;*;e^)gO~fj=lzsLw*~1+vL^coDsuv@#&0C+izBqlrYJr>*Sm-enwwBd_Q6h3 zBk2)%Q(mIZIa!1DsFM?cJx%|P|Ap^)w8FFQ@rj};b!NMQS6-FAifN@D(SAEzJyfJJk#)5H(C^!428Nycp8?E2)4QkTJjK07o9#~=Wf3g;;hF0}?xh|pb1aTN zY*gO!!$0INbyJ3>Jsg*F~t+@Eg6K z|Eio9cS(f{vw4K0ESZ)~?xqxM9xaES^@#*2BLoqqq}VJgysVpQB<$Ai>~^PasdEco zZ)Fqv?xvt|YT^T?3(+^{MHB5Mqa6C4J>=Fz@v9HP^7c>5N@1-~W3k^N`@O!tOPFY9 z7f@{Y=WC-i6!u;r1`U~*=Bk{l-#_>WU5i-#(7-Y5ZGQgYg!Ik!>n;vF2Bcu&_<-`7 zvzaqU$-TNH?VGrd3>jYgcNm7$vQwuP--+aTFBjV}Qo*U~vlcPP`1yhR>e~QM8iG+J zLnqqU6kFtr5+b4-f|#-c%^5_-b4-|SrEcbr=S|nZdUYZgQ6G9PF4GnAiV0~ z$M@nQN*Xm0+O09v8RajO1>FpV-?1Cyc+<%d#Ds#=4Aqtw543R;Gb?>Wx^~ii@FGrh z2FFOj3WnMdi^Nyn8(wm{ujoiYZ5b~5X+rt)dGD}>GpdTY=cHUBzw}JR%s#6PtnFVc zF{7}cPA^gIR2VHi7tL;>*@(Pm^|?y8cjLD2VOo+SXHKzPcrPh~h7zr(---9Bq>Y!O zT!y@4fmF!tqdlxqImr{QPKyhnP;_DAIy}|!b&6tjs%Hd~3rBux;G%JTT8}fIjy55t z@_m>^8tijaFZe`CXm8=sg?6u2^V6QpYJMj=Te;_dp(Qgt(s`o@&c>f}<>J-x88xrgoZFN57eJ+Lw1%2>QOYa#VJ z1~=&y71IQ5kLE3%O*{d-Y5^wSY{Z_o76Xy)}#(3l_~--V|RB@vfYP&2XO-?w7S=VTyGi_o5-Y4+5uM$a z81{5|r^&oR`D&-@JqzwTEpmNIUYsJg=wHh;q6D}&ZihPW;8G&mGZFZ4?3XZ=Uz5xIpzZ|Fj>^sX0cjE=`w^LU#({+K#&EZdKS9 zJ87m8A!{f;GOVdT;5T*<)9f_&jr60`z$+p(X_Z5zmip+Ek8JK(?ang(c-hcq?L@y|i?4FLFT}eBpO~Xky5^uohK?*|X0Po2v&t#UTAKdMNhR zhBF6bPAth?3%4@2c+e6{o^AGN*8Ir?%VFHHo~{ftUAr+>{1@+8P#c;Pw8a&PD*hEY zPNDu;_RqA6vbirlT^K&hOv%ERe?+Ra>g+tDw-==~g*=a!U-9U%N)P#kIT6l${BRJx zo`wJ+547eM*AfX7Y3nJ}hg&0d`61N|X^Cn{7TMNlj9sGjJ>FvvnC;rbKu(Hf9|!ut z+XfM!UD4FhSi-u%?iBe>mUYa=(E3(L2DUHGjAlvV;PgT+U24S9W@Fu>)6p8b0&m(e z?G$7uf;kK%gkx&Z7+O!R^rV1d<03rBDC*jp{L*Dm0PdmIud6f_Jo4vxUC9kj*wN|W zNuXPpWWg|s)f>t&GgqP2=gayZV^;nD`>6k|asDqrmHr=d?*2bt=w~LQAcC<1#&)n$ z1}Yd?)w6VQ-FEfM%!L@tt~L|SY@k@0tBJe%!U?;*^PW9ih$1*quso}s+OWAG>&(Mo zhpf#DA!l?e=i;TA>_XWe?XxnzyR5l3C+6QgJdyVg>?Ya-O70Y-lQtV#oP?jiZk&|a z^JFGCLv`T7+Ti%OOLvwYu%|EGZ6tRy2|pW7S|B2aEYK+z@}kwk(p_5`3(xvYg#U9?h}f?emE*AX7avk#Ek3`n@u*Ey zb^w)LUA!qIRnd+-bKfB^+JHD_rwMhw>u(->=Y-bN6=Jpf_(%?W%|nc2(5k%#7Db$2 zJ&V|7o+%afmv5%WdTet1siFy+Z{2dBXhMN!)ldI-3gVp7KOuXDeE-B#{AbKAa%1-? zdNOEE8}5PAGk#B)hX9qTp8f&kwfJ7fq5cI7JeEAuBdBG4K9I1Wt$>-p#GpO1e*yBv z8U17YTln1$f^HVHMPvRKV32-4mk$hWCvzwK2dKwd*X)FPH?+0Vy@R&gOs;6}O5}S* z3HEU)9QNkx*QP&t$1b`(aA0m8-!qrYgC~@%4RjS z&vq0?yw+bp^v91LcHisuOB8u8PZTV~Rt$o)4zH`k&ZV|bz%FmgJzhnu1`OIqK9RR!yOFdi$wFpDPbsX zZ2ylXn)Yto^uzk;ljB1FDQ(3Itki4^( zoPKljgWBp9HxSqI9WevtYIz6!In$l5-+R{PH0IdH=muiX|jRI@xSy`IKC63Lrzg|1cV=9Q~%y!zCDl!o?fKNECsosC}QINfaoxUb`(k>Hd#sa z+WkUL;YoI2jO$2)LX2tk_c=V)1z=YUnlj#$mUhWD%Q?SSGxiss1jFbT3R?q*zH0>1 zRX@ej-S)mFS><#{({DT~376T_ryQ92S9WvUn;R|N=e?~6<>EY9#C93|9~YdBXzE(d zI-^tc>SOBbrrzBQJ`MzM4I(=4>uoYN9veY~Z1r};b~Xhcj}vd>AFM!P53^m~z^TJ- zIFq3*loL8FYrMp8`u=L1Ht4#%FbtB0_4^fV6X+eyuSI&=Kq|b4IR&&hZekOBITu&j#t3Tw_kw4hB{1E9I}uY|i`nL`7sA0BpbxFsFYXmLu7LG+x@y7Txd zbC`E7|G_E4(@+G9R{i&1fJ^5U&A<0d#~4po!o{jqzF{QL-ETXdLLMYNc~1s?*Q-%KE{*?9^`)Kx;nsqfj);J z@{U!%(MWW{iFI{#XXrvF!!#jZ5(h3~i23w+(onu0ypLAGfq``qI2@XCB_+=8rQ&6t@icHgrhBWh8niJEnWxL<_F~D_GjH@(iMA}AVAmDIXSGCJLC!stQvIuoY&}j@cM(`jAcvtp z1Ts)yVzG{SMdLia5p$_W0!{b8&KLUi3ql*7EG}xK;5uuW&#`2arfIdD zX-rmLrKVkZQEdarLgOnkfPY{&I-t3?hl$9oisYE zeJ_&Z<%V+KoAW#;Gt^BV2w@;})Ul9i-2ssr6n8dsAQ)4tCsnHCgsXN8RAFc=rf(I) zx?|;5xb)+hTcrD<#s0?H;9A6Yvkq)8)t0_+OA!{MEs^VO=w9CA!3xQwLZQM8=9-xm z!&h+U@j;0>D3AMyhD>z8{6 zTWEs)F@G~Hi|Q_GwiCQvIKE7d9~opj3Zqofc&1xer)@wQE_@fRyJmMVjA$G34nGW#O9`q@QnlT$2R zQ_5-jC(Ep8&(oN22r2I}eqVGy{TRPRv|5WdR1bsB;eiJJ`H0AQW_5WyI})V&B=gJn zLpf+DCeCYMz1nDU$sYUfS?(rz30)9d(`c>_Db|;O{*$|_3QPy46T8_)J0F6R9#9j8 zPzF?|w%G>D4)gSi=3$c2GAf#F7cwNGB-E$6c;`y$JxQCU0CGKYsn#YJxu;ArYM!2g zu4vO0xeR*}9Y!N^P+)n_F$y@pzA}s~x&#Bp#NKE)R9eK@PspCvvVCIg>Z;iC3crZa zj3h>_Ur%ymg{5-7mBQkhL-|^6u*%8PLuf<7-v+hc1$Fs6DajGAI#WoSLug?FyMF;y zx`G{TEe4?O-`xHDD9x{@S?iGRXYa0wPgssOJ9H||Uf> zlBRJEtY6p2r{#IE#-3$Frv^ZJwYv@Bhr54rzDWu7Muke#eZqn*<^r`s*X4-1)c*+2 zJ{++=NffQ{lD9Re##;h#{y+<%&G@|}L?m)te@uf#4vO;KMCy=^kJB~=`zM!{XChUm zoEC~M@)EK^11kY{JX+)`%1usJOHTA!{Iph-dikiGB=IR{at`#)uEh@e2%!IWOkte_ z($*y?h-4|6+3o{zJc~<6^RMhfy(#&GUQ17mXil9h4kNEJn~6_lM&;c^ZDYMng95Y& zw(ZUd8_FEM+#67Ml=^YnNbQt+X^v0wDszbQ$c~IbB1k3hS7l0978!}K>q=XAL9=Q- zM{Lv_#ZF`0up1e*_Tx!@oc!0w9GU!OgQ1$Ym&$lSEvf5%X_IqTNu3Qg@1vBaU^aU? zzG}9f)I0c-IhT>2rfH@c>}Vuw)0JBJyn6Mz%d~GJNu!RV^NYIZ3U>Umm-~?@Ir?Ez z9$!Yu&ZFLYxFfVpQVtMm*$|is0;ZAjDUK>kQGuQu=hS(9`IL{95J7x(J@CYw(cT#K z(E^hSv$e?AtIusQ?)yj9lM}E5_aK`dnVH6*YCH}V_L^Q|j}cA;eC)NcYAZ+gh;AO! zJ5SQbg*}&j?PL3VYb_g##mp{5@N+C?53(wdRr{O??_o({vy!(2_+02Q8r~q~5cpGv!SO)S*mymMq)U;5u_zyAgM(tGPHS z^!rUl*y&YSlpPBQeb!a(Per*g{(hr0L9~p`wn-jtq_gOAQ?U}q4>b^En3eW(6uR(x z%CRUDYp++{ru|5#YHT%$hm;F9)!aqG_ZRMXe*saWbhD@P`Dml=Y?7HV5;Vdegzm;wJiMeTJb^J3Ba{=(Hf#*2c07Jo$#&nq(b= z5@H4i;|S~BD9i9!m`jqS0BXJa+ed1HQQrbxfUL@+)MHT@3xtH2{%RyG1e{9Pq1!n$QWo1}hz`_yUB?QRn)!&LqO z(bfCSO6#e@+M`&2wh5_7kl^+P=(H#8h}V_db#di4X~Qv1Mqm=uJ4`7>wMHX2j)T%! z{f{HFf<~QRxpTF-Yr_%6_1qk$E5-B|zgKy^1G&p51BzE<9<(IgX)DKFx)GE>uBp=3 zpXDmZML!G^!Pc0weq@*gD7z^GrK6=1u!+%40{SFlRA+y#mkyNAFtt@mhK`0Ri!^fs z#+F~G&_{o!5(IG~ZD9}u|H(?t1KXNQPp5IX1qjjg`a${4QKi%;K+0mznz=X378VJ` z=A^M*l27xu6|qhR)zcvjO&|%{67MJpOSG8FJ{={&>7$9Y=XToa_@_dR``81Hc{uk1 z3-vdaac3-)^N>qQd#&)K8SRFIWeF+}9~nE*Gt*TEb;RUnJ*5ik$Yi#5c<{`483gdk z9HZ=-F48wE&lOHppXT%dbIDnBSMJ?3Mc%KadRdFauWZ!FCs(F$Zq3(13QaR?Z9mn5 z18yxhY{7*Yjp{zO7$LX6fG-KF-O3hiyfAHE#<=+am}UQ<(gqU#H}>8#D6Vf^_w57% zA-KD1Xxv>BG`MSUcb8zn9h%Stcc&X`+=Dy8-6eQ%4gb^Y%C2?q-RIW6r}kZEon5Q= zKrt9?vu4jZ#{2%B=Lxi5WZA=#bpZM5zF8^f(H`Fpvi}vwy5gJ_k|u7r`JxL!4Sx#E z`XEbT_LUp%rM*)E>zX#A_kc6DKqQJRxG@((kEhuvEl@^Tf@+#V2FN_~3B2>UL6O&a zqxNcO_+86h;={4J{)fBjJw6qYreb#;p`csuXcnvKsZbYPLq ztPR@`+>$|~JDf?n4A2*yyx4EmAR1j{lkQH9PAGZwoD$%Ire)FVCFQe(>z7l{B~9UymY z8F|%j8n9|;*#C58L6S~alb}v4yZ&Rh`}oCO;j^pZ&FfYrl^@!z;ZH^=yJ0P3F?nRU zYKKOw#V~EJWCMvdvuG)%TYOtA8o1IM>5Y?tBsrwrMXJp_UaIUCuhu{km0KFkwf?dn z@AZcM>k5#T1S9kfi_1q`Zdn{^=XNcL$@$_^&2RtULfe0dm;09<{D0w!uUjD1MIQ-Y zkwYQOJ}r^L-ZxWf*BT)-hdF!vv4q_q1@18LkOEbeBUtvJJ#JNxYBrz#KMz-*= z(Ya*>mdr9+={uq&@+kg?cQsA?+~cfV2fiQS%-n2h9-M~ z=@&f8pfb@-kw5Wyo3XkowMP4_KU@-3G|%5|~;e;ocmF8b$qgmrgOSgqPr|nH<4%nGJGl5VbFWab=XQ?oOMQ_n}Q}ege*9?l@*9T zwIX8{f}J2owqR7KFK=FKVDM?BD4wG)h`qPiX12lWKe-&uR8U{d!^-?8)=1Z2=%DiC zRpPc_)$us0LYLWsHo>Lf(G$S@KEYn-N@}`#v0*N}J;ATDo9Kl$qY^oc=Pjk5ZEecS z3U}fVh`EXaYl6D>wj;8cwWRt>O925rPFyX+QU0I!fgi=3P|nfHI%e=#+%HZrcJ+E)i=dhPaNXVYYXn~hGal6axI-*7O?bI`neB0qH!w8F5M)?^(uL- zDvfOCb*doV*3K>?93{G`QIt^?3;Ld^$3UZ{l~t~|_h5Q6CM(GgUW4Vcv~D;m##D6Jn7K|B(^vm3u|W#*{C94j;P?^V!51=(Gd6 z!eFGf{DUQZ1}_;nJC|l3|D8$k6SF`QVl>V&+RD{kKwIl&o#G#w@nN@`S8FloKgMEjd7jZj5FxGKIxH^Ai8 z-wz``dZvyYi}q)?PeWf1(;WLr1aH)Uvb>(NE)MFu^`y z5qMoGG^5fb133n15o7AL?Z)xcK7pC`r9cI(6cxQxGxRZnNkwJQ$cmbw|IJPAJu@)> zsEP|+!-!Bp4>7hx4;h&{3?(VRJ*EWSu&+ZQ{8Wd5A^jGY>hSfJE0oU8Fhlqq*k&t=FnVatPfiO=c+W>&zwq{2+HJ29fIFRAI#w=V4KI-JzV(toPH;kNhE&4P4ygtpUfY|gOclhSR4A%%Y`|F{r{NY*DE^%o5=(Um^zrd zRgcLoK#XgCUZ3*pk4z955Uo~TVc#6$95z0Xqe3sm&#Pumzi=1mW0}r4UJ4N!?FMb=LH?$L2ap5)@4riw z&~&hQG-N5Cq?5D7@}R(7JsH8 z0c4K9j3;`R?88O8Uk)gX;XRrk)OjfF8z+l*eol^EcN4CU|L#PKXPF2l5q4)h&LUUH zNcaaLfTPpbx9_Lj)qK}aE7KQ1Nd*HU3;NK?~&RT9_36gPyo1c`qEBq}MNkyU<|b=AOv9^dHx z1=th{3jg>gyb#{gVjlbp@M`9n>E#C;=KX~a|NOy+FL*(wa3c5mT`e4%_ke@E;(z}@ zy@vXRIw>%CYNe1YHTw0}0G~>}Mz2OW_333&4|7m0m8wcLzSaE1{O~({18Ju?H*LCi zC^y@sQw!Yb@kS*$tK~6>)(Ql{wVpL zC)~W5w`xjJYJO5Gl9R47c}~nJ3g8UAsalh<{8lNmRj4*u3kCMy4VTNKfi)iHf0m;W ziUY07DbskHes9|%C9}5AWUT~GfEZ5^Rd+$>UyreBy%aDeI~`l1&PZYD4OO) zRaA0yIsmKsqOEu%B!}j>8zK6!cBVUv-a^fC*6~>PD$dXk7rIJ2_!U|jo~#(qe1s*9 zQJ^`_*oP@Hha&1kU(X(9vE+8Wj|cHb^%ZY3O{v9wag`Li)%%o6*$!#k@rDfUgOO>)`T}_!J?>f>ykf6nH-6!O|F z9|=dK(+YCP?=>y_O$WVJ`@0{OVX$(PL{|;8*u2d7P6IpPnTuHq|MA{SP~b$ATdA|Q zX_hsocayT2@-@L5-PC=J4?=14R~s3T#Pc-OIjN zKc`gd;(=jXg0<&*TzJW-cB@0`;K`VY!rLR$h51B<3O9E;o@U?OugtMMH6lKnz8P!C~rzA3SMcWN)^ zv=Jh433SsVeg7p%QrgNkzPBQ-G38$tD);_`o0-XD?t382H^SbtblwIW zIb-y5XXbN`eR`dQ8T1!GOJfFdgW&Du`TCt)9-wqjTtyj}b3tFcVh4RO1j%grT1wIq z9X{+=s+1^NGJ-3nsh_F{tKqOO^l)C@)TV}d0)@VI_@Kk%QADG+*Z^*X!mE?y7m|a# z7xRnf8YIjF`rL`EqM}egQ|#87(-FYiS33xl#xi?diVP27jP)&F9R^ki#=q)D?RoxH zu^iY^rrW+baWzP1ZIT*Gv{6fZ8h4@SSgC~OK$ea|$+k0H&I%dR0r6<$)(;au7=qt3 z_|S^ht}M44w;liD#RENFDt<4T4S?7>0WJ!_^TO$1bGfQi_?t zuik{G$Abp5zFvkjoS6T0Fu|p0&Ne?^)-gN8~#JvY3juh8@1u9xJgfD@yVNMQ7!d8_;P-hl4o)sRY zJ6*arfEJA`+&7(mAx17Y`O7stY{%p2fzpHZe`OrqN`CRH0vk-lhG>U?^cn zu$$i}MfId_--^5Frhp^x?^>KoLpj0wqP5kmZNc{ZKYESi2~hiNPPPyOZvxGKA%6)9 zx3sS}l!O?r^}o*;-)C#yG`a0L)Pm|0pV4vF2q#DmFr%v*S$~{ez9|AZ3Qw%T!~qn2 zUdPv!8TRDrg8Dzn#{}5EKLm?j+-ts`wvw?hdh!e3oxe?i>K*M%I=1te5a<|E*xB3e zHZ;o@&T80JS1DgJC8`(eb~OUGMMk+b-=g zq?dp6lWXaq!^6e2{8tE?r}f84?HyE6Lr!UG;^q;2m9x)SrLB1FZG+Uhr1w#b!ws)b2Azbp8&iUK z>K|~DMibrw%qbN>0@FAHLb}4W`QSz|pWYqZtg&k)?BlvxGvQEihN~}5nkT%> zu`lj60r1Rptp%X8{Ro=voiU1;vLBd4inJI3SD`Np5RX=`mI_^61APUZFQ^S+u_+7r zj~>tONi|!-!=!DnpNlwl|JXN7ZRMbo%nO$=SjIW6&H5o+6H$gsm#@c5ovc16lGX*^ zpUDP|R2r|5wuFUCWtjxqZ4X499p!On)!yvj8#Arj5(v!J*LEXxNJUKxd_b%f0B9UH zz!^wP1DSE6DE3LVQId}q!ingJv%jpcL8 z`R+g@E1|UXf!ACEB*_)r$H2wm2N&u2xAo1^#M65;az-eH1rN5|RcLdKi+)pIt>=Mc z)sunAqpSQ2(9$P#|C~cS#0WCZ0&oaBRzEYE^J^>jP21tFe6IdjWj4y~5s8^(UGN;bW+5Z1LoGcWbKZZ3H8$^^_(tDlp=qI%C7 z;c@py#PQZ4+8*dhD2N`s4R+I_MXW-nsmY3oGK0Ll9WcJy2d@hj+l6=U%_0OeEab2Q z-Y9BleuMp&C>Q?E!(;!2rv5KnsI<7x)CwT{ka5Y>JJhU+K0~*|kl*^f2SI>X^o_Os za;&YNW|LZ;H<1={iTsiMn8%av7lUolG>xc|ECO2sk;HF)7GC(NB$=aG!ls$!ubY8P z78evvjoYSVDX;bBxCAJ&yzq~OEW?D1v(ZyKzclLra!OkmkK`dyg}Q~h3k3^>x_bSl zmiSeIOGMp_CVMNp26{UeQX27EdH;~KegC^aSt~0PmfjYxpk#WTV{X2 z&|G10XQH*|h76~CpM{J__!I@YSxN;H6SpSniF|3X>0sc6MNOT0x9 zuaD_Pll6!>c;mhb3exYxfk%xDptH2Yw+Nod(jg!hpA(y}t3&vFPVUPsm1|ZnJAF7M zks@&kE>Rp>QDjM&^Y$_Z4^2UUO)=bf!?z>{r!yn-!$KY^(oZr)h*!{CvQVd@3Mg9dqLCON>+B}IZeaO6Z z{`Gc%OVpmSZ<+F$j>Jtrpz!3$632O|E030F=%y+N*Cyz9lI^hVZDRfDH~B8Bm*6;a z+IfggtM{#PinICtL#G^V;9ARWPP$lw{l}TEI*&|$O8)UnNz&WXNxJLdn@WAQ>^~_* zD~k`E&Y-s&O-{ayB`OeOxRvAoQ9>@R`V2Q7x8>(=V(AO&cOS73Uz>r-9?AFZu>m+<)=TD6~em;jtWsgm3)cshI?+_32@9)RQW)&+ z7)pMJVcB_eL$(X6Y~Bs{QTRRWz!9vKPa#HU)z)uMHn(H{Al87WHNC2+Qc{5H@v%4rkkqz6rDsu zQ_qhxrhXqSvVpsnrg>_?^lL8{P7im7x|A!AX)7WdJb%t_bRPo0SBakrcfp+3{r-%( zUPGj(TNI7V9`Cf`tzUcRm=L|`5fnvw&k(gLH$M6OC41cWCO76LBFUgU!8-MKamqeo z4jjU(HclMb9>% zfFbT1 zu41ylM>5LX_h#U~0Frg>zI2P`sSj+WT~N3_0kV2JNB<#S^Zmh}!i0wG^5Z+cj6!J= z##CZJIaw1$L#FfAnwSTO=msCWr6T-RSTtrI=D@S)bfPUy%Thfd-gd7&kXhDSW>6J1 zL>31#?Ey~uRR(Pmoil_dW{?t~}Qcrs_l^Je&7l}w&Y;xZu-!bcy z?-MHjn(cMjF7CpkCIxkay<(Z;+`E;JI7P&VFpMCwIl;WvrVomFl7c4ONnGv*LgXoW zwl2NQuc(k|1qW0Vnzlx>+2z6pHV0j~!O4xg?0L6vXk$Jn+t<+%r>HMl{vnfGl)1k+ zwIfr(amFOIsRlVXAQFzddGhnC>92_PL84|Yk|*r-Z9zo(Isy{h8TH1dXrCY~bxr9! zP@~lwGbOhsob+Ng`Q5v!Q&Odu$&j(CI>WKOy>CI-la$4V87PoGM>-{no7GbIHLO6c zDAv>i*lZbaECWwr@Q*!5uOej8w#5n_U#KR2=7a+-tP@Ez-;!zBxcqK*cy!)<{qcAK zzj6_6S~GA`sUNpBiC}bG>G93`wl1;61d~~~r|6O-)E3)ujuyvGfs~#)o={ZXk7sV6 zGrbfbnpgL{;xK|S?A_*P4S=RioafV3HY8~J%<|EnribGvWRQ?efS?YL zw7bZ{XO`mVq~thk!`V~1l-e{gQ<2bPeW#xipifU~600XZn||{pp?H&pYe#(QIL6#$ zrtZ1z{FG2+ykq+mN0wUjAJ$5hiDgwQ&RB~^YqIEV^^u!m(?|h=`?VA9lloyd`wjLQ zAL7lgZeF`N61pTl_3dK*fF6$Ej<+&0yh`dUx-Kp|Z83yJd&c+W8F zBfr) zi1T=YTUTSl@Cc2u)y%?l0#MM=?5m*J1bK7xP{bNYNH|tj&^%AIceVCN2~o zqN!sVW~RycOYZb>Rsx4haPYI8c5^fXwnz+AMuIU#j?@K_EJix#`r6WSHIB$HGy^C-h$Rt19VV zNK%+MSHw(1n!l(y@LYpkTX*R7nV$Tg-c2i+aoT_sxT)#zKOZRRVP_}n5`1zZ?|1XY zfK%!pCx~Ow*L-;+fyk+g)@bqv5TW|r*@A?vEt_4I4h85(rtp~1B<^F~l>3b7^%r}Y zh^=+p2%8y~!<%ufuAHYQm7zwQULNFZ$XCoxA+0bO7^AzRi9bbZ2-J@7ai9v^>cBlP zzqH9mDq@jEhB3D>Zq3(Kuw=;cGqZW;wbtaD8ZLof?)rq+7z@ zzfsx|<&J*l2x>^MPooMAov%6a1JDH{!BZaqI4*)bI*N|l!Zvd(6;2rOm1td{sKT6KAECU7J7A%>3OO}cwn_aO3yz%67H(cBKPGCB7f z$EiUMUK3d94X$deUnB#4s(aWDdBByP-%ZA{9hXBYnOy(Z@;x^{lD;|CK0?PxZ45<# zP$IHi&l^50pJIO8tx}=-*(P|=KU?>Err!mqkB#> zPLKY0di<*-@IQ70bp9_{75}};z`v7R@oz)wvkm_e2>*i$3NDZQQM|&{WQK<`t7kBI zE3L#)wcw(iB<|(r0Mb5^?1cn2kHa%>eY$$)!M(C>b=M){Q$w-%ZFJ@1K|&#qJ7EZp zUaI-!tjT+UU9?;1c3aFDe9swr3NAS~*|3eAg!|AK44jWb`2Cz#TD<+NGa^s5UxJbk z`VWF8MR&aSG-fL7BF^SfSM*q+@DK>9-uLH99(q`napgzMz)T*tIS6ksVxD?)|EO?7$h1Ya;DmGb)2gZF~4yVP1l5$?;3g-Q@-VlsZ{cCxqiaRd~(^d%DKS{ z$(PKdAFD2w#B)!+A}`_-tzS7*uFDwFajrM_IUc|OnoTOy)TS0UG$b%Op}vA2OZ^tO z7^}7Fh06ZBV`dP#Bn!6AXgG+NR|%u_8)fDOg-D3uXg> zVt)pg7!t z-=xd#wFtn~s|n!{{o_WO{n;eR6*|W`%u;yUwT&EAzhS}Q<9In$XrtUp1*OVMhnEmR zgFSRHHd9>^*&(?}Q!Mw7-!z1Iy5K(@Jo`_1!omzV#Pe@qC75SZ1-THmf|n&Ank@964rekBWkqGZLD&#H!p;ffuoyy6B3*AFqYusZ+1hBT!Y%M7S4 z*(^OKI?QzeYzQCaTbnzjF|+WsX(SoRC&VZ91GAm!gqJ0oya8H?$ToKw-pdSapMhhq zw7J}T&g>$iL{NkLA>*FbEDWTP>t6<_@&NcFu7t(gcC>efN>gMNTdcAD9ql zY(|JZl{V*eL42a>3tLW*&?9Hlw2VUzhUHVW;bU51=_BB>N7 z@lJJzg`B=W_v{whUsKi z2R{8xmVWrR`82@y5_L#ISGXb#3ay1x9H;8qimb0aE3Etbu6ilCefrchigU4DtUvvZ zp|30_!+5h;X-e*N#meC^T7lGF?UXrwXwkh4xzHb{oV}9E33H=NmjD%!+1Ao{SSIVI zB}KG)qFno-inlW7vsW>C)msUlS;`$KnuJ#_h)DQdp_A^&~zA9m};LV{nT{i4l`B$%PH#t`T5cQMdIJZX3*6DuYNKmF* z>SWnuho5nAm7cBFIq&LGCXyEdSX>gWKM(tA5Nk#iYOxoLm{ms6Ftd6OV~wpFpvl4l zFuDvvtZdi0e%`#bP8ykdOX!a>ix9U%OFT*Ef2DDlw(xO{eTi7=4Chsx`^-(zRJ$sh z$VCuyg-qsM-DZNsAi3WHZrL#((xrDJNZ#Cvr6?cqAm>JEA8oZIzKNFXtKC=**K{gb ze)?j-6fS9((8)Cs)H4_PBl!i>rL{Ts#8jZhi@)}m3-3IbIw?u+M{cP?$JSG3<~W0J zY(sAwC*|4hcG1sgfm@win-&oNGDg2zGJF%^zyof|9Z#QY{4)hqZn6Ri?AX#%(Y)?U zp2E0(;BCWieG=v-K#%Wn%bPb`{u9Q}iaNCMzE{AojJJ=!PXJm1S_U%4TfCw9)~T!_ zr9&3zkz(hYAW3FHMqVoVlA30_L+U^N(NTY?FhYB=F6|&EooMH(>b-;GoLmLm2v8=9r&{$gSmm+DJh!8GbGd3Ds-LZ)^1pwahm#TS?lB(YmWQFf*t|wag+xdK32XW zdPO4{(SQEHLLGkPISri&dA(>AB*iPQR;>zAkjp|mI&tQpo{EBj3lSg&gfRrtke7z( z!)4LNTt_--u7|E;RJeJ-^`lm%;#-KR2c_6wfOdlp=LOa>8$X#!zsC92BJhy`l4xdX zVji5PTd2ATRrZG@jcG*>a3%#t)HOh8jPwZDn?yLQ*-7$*yGR!1cv$%cWz2DW9nnnH zM#-|wtIK)C3e6qBiA<`{3V=h7-5*H17r54-KRcO8Yk0UDYB%=rfS6JBVxTw%H-0Vn zd8o?XCCAhjV4((T83?Gb%Q$-s_#pi@8lMJ{&h`D^o^pj(F91*y4>SpUu1HT_@?RFWqFCpk+fn z{D?DLw0s#Is+SM0^6rS>E23Q@rv|Ihu_rPb-S+a=fZ-&s+#l)F1?9&jQw<;5l6j6} z^28%?CfJ7tB&mtZd9SJRXLqfeuPcx8@tDg19sslUqch&-S&9J*6UXllx#t+1 z89#h>8(BzST{Xcg2p+ecRC7V~Ms$}|!yt_r7J+w|@Xa;^pV{#i7sp|jyGDJjHf8q7 z^H`0}N@hHnrIs(b0uVcsJ;#g3B$>8{bUWD^WGa61X=K+iafQODhf*XHG-5 zad1)Qme=M-Uvc9%7bq2pkScBsM+FIXAF3peo{l>G2o^1;MPOMs9x$IoU`=?n4eC_{ zZ=ZR=qS~6*cc?r>eII9&Kk_aNlbkVeFEnU1&g8?onp?s-oN6qt5v_9vdD}2ozKEB~ z1F7Vl$e7CyYyej``%u~2c($KeWjd+BfU7v`j5ci%K^Fb+-D-D7TtljLdenqJgP)#e zTNR7vdSO%fsevY2u8$rCt-P>C+4aT?;9meADx`)mY@m~F5`URm1`y9jebMM4$K6+O zGPUV?ve-Hvhh04A`?|4vJ!OeaMd$mUUqjH zEaix~-)xCo{vg$XM5RGUV8Va%<~c~e*vtuiotm}%!253&o}GFb16z(044gIaHe*AA zxc*j(i6Ggs(#r5h9-|n@0v<~xh>gxyF64l zvEdRMhlYkc&fd1VuPjYc{I*PWO`cMp=SwX5TvCbJ&W_W!m7W2tN$H_ zUgE=W89O`c32V;&nPm(fSv}=diebP(Uo6uqq&RLSIPj__jmOt zw|E`C0`gDXw;vY~9qenH4m#)~(8;pV^4Eswgecvj>oNac3hJa_yy83Lk@AIw^^HMTL*t%3da zWN8lV&g%HwC;l(GRYn@yNSU^@y|!d&PVLTy%+`xJU0~qYr|I;u0hudBr;Qx<<6QlN znL(z{9$>ci10b6UJjD;*MX0jmstMv~Jl$%Ug!l#Zc1S%wDK-;=M9T3(%#$bNcan4N zcdnotz52PgjhVNp7+AHaUDf3F(;_HkYj#2w5SK|B+SB;e-KV&_LCS1TjdAjJB^GEo z30gg8H^O>mm3?`M;$BoQR$F#@4W8DT{bwbv&rC98;j1U(!3FI*138S-o|$U)&SaM& zDlH7%T6de5vU`U6r5CS%=%!M^GUSm3S@7LY{Bnoj#Uy7U+g1oj8XvZAAsh;N9lGk5 zeE)8rC#*I=;Z@nh@0Ex*OB&u=r#BUk&T+5@C^owpcu9V5Z~}Ru>rgjC=`<|ucf6GB zSM=`sH*hF=;SX@y^D}Nyvag?cqx`-h8urV$Cx6DYA{K#4(&|Gf5$$qE#Csf@dAV#O zG9+z{B-t`vZQ%z`bc_dfs1^>O4kb5aA%D>Dwha9+O`(;)zC!jb=Ds+qZSoPiZ0zI(qx0d(>MF5xU;@k=* z{vkn!UO2J1(!Rt7~yEuBw7;NjN*VEL4Y zwCy%x`YCNXA|29|CPvHOMl~cZ&`YCe|28TDLh#M|-$!Nk=TRFzwuIAbnjDc?Ai>uv zBtKgkzMiT%naXPs2}KgWij++c>k7G&QUKlnd9TRJf{+z?EyAo27a=J-y?& zL*sswo7!&P93;HE{RV{RH$`cDQlEOS&-*Crt>7{(cemgXu3K6w^dKa!!((ri3kXE$@49gcs`)pUbQFs zoW&nZEd6e}xz%=uBAk`-5SyfRB_erw<}E)QOf_u?19vyH7dam?j-VExp^&iu0hO9w z7b7fTyM>+dxV1Q^1`KY(S&^!ZMB;|E2p))K*J?J9Mys1d^=qPCthY=O9o^k7gH{a2 z>9CoR-e92OZtzxbITI|e9;!hhL5SXVatt!4%@|>~<0`g3@o#bE@&~JlA@Ka?g!KSH zLbdj2Hq9i(?6yerLk*v8U#s6)AltM04=0`G1Jao6Er|Zx@v?VdUkUi0`{vp9h0QCs z7kV3R8P^K7=tb`=yy$nOz_~!nK&u~S0x8NE`YKL!IhPXu1rVqpa7Xnek!l&)2^n|G zA)A{>6q%8W`(5X_EZpSe?_DkRdm|dsE=^Xzv`$b2;oc`OueEO6jAZ@ zFE7qsl+}}59%*KlnB434BO)Rq2Wk|1@xb6DFMf|bCFWlHJbg=JdyD{M(ojziOIQ|s zpL?k54Y4J^h==|pPLQL@Zn--&q^Qp~;A>)7lVJkutR5Zp<%{HG1zKp4vv=!f35~J4 zU9iHqkhy@nHOL|uUD@(~(pkbPxx#2kL&oW=L)piVqStvtl)v;loKgIMIloZL+E;Y6 zd&wC1WEIMHp}Dxo!7YQu<`OY7Ow;w{11($v)qdIYY@jNA0J7~x7;?iC5trL+EES_@ zGq}GcOPAjuTIk3JSrQc6&{_e1rbe0@hx*%MRGWmlzU!7J4Vg(0&-RI^N8js&xMc)8 z!_MhPg9w{%tH5C^xlGWtcC+?@mt}w9VUrg1I-q;b%U=M6l9-7tY(;J|0FpdUFuWbj z`x*{}tLjB}1KgDS2t!4vy)C~iWK?lhATB5Kk%3%?kC1>01H;o&S4;?G2;*LXgumy; zAuC39*feFISR5nha`xLMQuB=h5V!N&TVqDnO221UE_yOf=XJEjj4b`1e-(h}GWeqh z)Oxg$n|TM9^!@_ae6H)ElFaT6QssWk*kNUnDgSaIwS@)R5I5?YU7)u7eYU)!shHN` z>$u}`jPeOLGzr!EQAdp;qnyc{ni)f)-)8xq`7Q68Y-om@S`&*Uhe{r!_;kaK3i_bG z%S;bb_~Mt*CaT=eb_sWK2GO5to8uGJ5>TKO=_Nj9Odqe52+TLeN(n-Y#8qH3tl=HsP|+j~?C!*hZWy40Bp))(RSUAMt)0yQU+3v#INU>y z&yVF6yu7&-Ez(@lZ&PjNC5(bFU~$7#Hlgu-SP~*Jnr)4~V<@h;pE}GjP%(eB#5$uJ z7hn3e5#X5&NUTX*liTBv(`-5E1pXvx^bVab5kbqC$<_YZu)(busv2MF+!pMYDCHv_ z0uc^%4!|{tmf4wlnnQJTR%&g1;vuxT@>0tQjLd%q(g^#LGdyh1l0!*t)oa;NbJgVz zC{jVS2D+$KBq@6R;e5|y;C^(LC~d%x4ted_Lvx<&0*)CauCEGkMFgqzaNl4N)Mb~7_E z>({NU6i4T3Khqu-!@*KXuJ=>iAxW>! zV@|F+l*yG5=p@QDBD9Zyfg_i~L$P^q6ULr8Azya z;=ClaV?!*~d4uP5@sXu&kfd@%(u=Y`0m(jho+m)rhP9g2u{Z-aNu-!XGjtX6K+Rx) z4>Mgaw*cWPP;1y{iLJO3_QYp6z(v3fM$bC1Ihe^3gm@ zZek7AD${=f+_uj+udu@>X(<)D^o*&V$oOd{rkBhw#jXy(j9FT&8bb+c&WMs4!a`L84r4R1En;*vnMw+xn6x7PU zKs4*05-)8Z9cx-!-)^|PHaJwSNaHeS=*u;qT3GwY}4{&kE0KZ*W z_D_GNGhL%ze0M5`fy;DYusHVf*|36R1@ehIy{sA8l{~#Cd zFTng^*V*u2fVzeME|cOtzj1Ml`4sw~698AGQNRsIo5fTAZBVm;Rq?;hG5H4^_W$rp z7~5_?#l{Eq*Zisx#Qw0&A?!rUGWbh8`KFrZBA|buYkSc{mn^Elm596v8*F|}IrO-Y z;nTUj^ZCL-hy~+=nm`=JO4KOlBj#>^efu=bpPXjNv_3=w0d3cE^)Y3=Z5$|DJIx9v z6~O1HSn3D2m^UXrqri3Fw@;Q$h2I7}c1;`%s7TqW}Aqp%X=S=*;o|%YzO%TSrNTN}0Pk z9!ajhuUW=hXr!7D1)%$Va;h7sdp}ZxCKk~?DI09~EIFIUqN#8v-|R`e%5ChJ$95jg zKbmD_pZwV1TM~DuLjM6|2$gjJ<3~e%{NO0M=aYWPpc!p4DRu2*;y_co26}%< zq}PZblLXc6`J+|E@gwh9A}aBmSEH0rqqN^s%wZ={L*=kH?uv|)4n{pTcx5W&@ZRr9 zi!#D>kr&eI5MM35TMy7>L*o1e{JfR6M0qMVCFArh0w)=~GJ`M!*8&*b^fz?O1{#fF zD^cEANc3Lvt|}pF447xoC1YxNB2soQ__{j2fluh;9E-fSP=!zUU}&2mKv411;5~hq zodEGIIWQ(cu~Kl&QL^bna#XaKrm>&npsDkqi^~7TSd%1R*Zl^@*qmP9~XgeYN0~f4%^&%`7QaS3%9N@M@#!`L#01qxGC5u z)ETNw$Ybj!Uu=6$USyZ~9YH{{J=VQ@INwS$aELszUpaOGdJT?v~wDA(+t@vpF z?i>O0J*9B2dQh>2-8|0cPYX0Mwkzk(f@}A)LXdCzkhbC<5#JL*8h77r52kCs=xpK5 zeHm?&A~>+6H^@F{#B|=fF7tSwUI-!(L>YPEjov~n@nL9iPbNmo>As;nm@ogrHERK$ zK2}MttHHY1y++T1=M)Cv>Qk_EBKPPa`SAj{$m7P z=oK1vS$D7C=k&yK-Wi)5q8Zq+ZqDlHevP)Rwzkoq2f$`bb;U9Zc-$RlYs#YTj$H8h zn7hwUU&)$&75g*?yCo*h$@t+J$Uo@ETJ`uTqqwfhnQVyjjJyPS&9(u0M|l4sGW4V9Vo$32B{52QI!Eo7C+>OG?X&zpsx z|Gf7u{FKUj_fPXh^3Eo#G-$}$%&xy&=kqg`-w$!I(>C*Fm)n3z3air8aE>^Lbwd*s zy6o-W#1|Oo570(%hN~^-&oc0lIDn)=nE+~w7CVpA>s+=Z!hb;YTG0|8Pqv9F9KaJ^&%MXVuuGyE(GecM!mjKz-kRFn8~eb9cEFzYdEfln8SgAkA9&dlQb zCS|O{g(T#rfRUdj!78c}CjkK!vp+xj5-euBm5OMe6`PmdCk%bG^u+QpaN*nxRw=tH z7h&A8{Czo2mlA!(0Kh?d9kQstv>cG>%G#27T`nTLbx{y)w0C8d0!ZQjTtn@*9SD9J zFAtN~=SmR` z%?NrpLXXll#lqluoZ9}sXwycm1j%raU)Mt0)k^f&GyfZVZyglZ*LDjw79>a@!QDcF zyK4v@+@0XiSmUn2-CYt~8gJa)-QC^YGyQvO=9~N7np=0inwqM2YHsa+dO!Qn)zut0 zd#|;gwKS>ihn#C@cU7J~e)ndcG-C@Xzi0n4W%QG{gbXjRqJm}G$H>(v$D)HcVcdADx6HXoWb%lbrO*+0~9BjL=cA{au z=&FiD$p55P5W2O_y0_E^w5zIHi0wAs&l%Ndd+;%GDG^v`O8Sy#e~wbbpVc7HPN^(a zD1Mc^CQL(Zxw;-;&FJ*5;o~5Y&2kzuqVhZ<#HmU=rYBY$_74Es9C$#x0V53O%Z)uL z9{veP04T|!jTWXG_tf4*Q@htoy6R&AVfSIIqbT^kLx)<#a9#~rFuw2OdItrj%tBtc zigYcnbe71{cq+Xg5aa;+Leg>5ooolOgC)auHRC|cq%Vr-{2i>U9=<-j2-%A&V^?3l zup(Mb=Ug-&VnXlFE-8iNUP#)vlB4YOfwJVpq~=47_N@@@=&iz8~Y&Pm$9m>^q}o2 z0qnAzJ@^0`QfAl`m@cesF+jT(RAR|hT(Ah~@vY-lB_1_dJZxWBK4&D7+^`mtcmJJh zZe6&-el1=8umsJOcF)ZPZYwpIQw0yj6DFg@Sl(q9hd2I^CF$!EM|aS6s!?aOp@jJD zC2W3jP}EtG0NPxSEPTgjhf6CgJm#Fg#*5($Vx2KNE18JaynX(B9!tK|Fn&Ney%x(i)>*>ZNlUi z0T-dD+uhhQoq0;NG}Yr-e?w&l%txoLUnewR-606u)dlBkEE^?|Yfo$k=*j1q^A0`u zuOj64u!=F8CMEaTz5=uT_m~7(KZ_&-h@R(@4`aqSJghzvA}8+)APP9>R)jjG z{;09IVAiNhNZ3vq*zvHloGUGsjA#E4Q}PaP8|NAFnuM!dqWeMUSD>LgG$JqGq_;qm z1IkJrHCgLje=ymYFzYB#Bcs_cZ__>=1qCz=tJH z-;NDX^Kl)%Z1~+2 zQ>BAp&!-0I&1~(&U^hrS&j5)stJ3#C0aSrfg7H3rkK=dT)W$0CxB36L z3BB!+1ZZbGTJqNH7x15EkZEPfOq1=5lB{-~)1u>P&XISnWI6dlOrg9^IfiT@$BIR) zg$sd$ww9rW$m47<&#h4Mp|YiOpK6dSJAYLll0i-q+L1*2O_~0OPOrOCk5U#&H%p1L zKI!bj_M4LEqC+iuIeH^0Y9s%mo;NE8~S~W-Tj}eQi zWUz>vIzR5huB0!dsM}L^?{e)}-BC)eALDS1_2fW@3Y`9-M4={bP8NGChL~48$Njcl z%~1C!9frG*7&*_rP|s9WmIkBfsU|euOetCCzjH09pcSV|lqw5Iak?I112sr* zm}dA9Yq6P^;LqQ+s1aRx=G|D9USncQjx)ShU&{IIRls5%v$m4og6{hWGqD8S86A^o zW80Li*2dbfU76g$YN$F%p}^}Q`OI#R@bESICR98+SxXnmc_fuTQ;@XP_@ZBdBgV4n&hIjUOIj{*5I8CTcojmaukTK^HX?sQi{cqK1-c{rF zho<}MHhw?%vAQ>bz)2Erq@6B!xFTVE<+_<<-ML^&lwOYI*Eo^46PLdYQ@M4y-kFq1 zNafYiHz}HojjYwLZ}hmNOQIvIJEtW2h3(sEQ?J4vUD8SHn7vXQfto#MUcRd93Y6Z?!yhd*Bj{FW|&wbMzrt_&#q@TzirW3N6yL#>-mYd zqE8R>-Y2TG{xb2uaeBKTZv6`w=fv{sn-i zeOBZD0!%9S9}3^ix|1&;De5%`IdJO;qjPYU!3h5d1Q@XXiZ&ig*t0 zb@%#bd{WdP;X(DJ^r_+La=clC2KT}BlSluuy))yM%SxXYg2wpnuv|pGjqD>&PrL8m zloNK7U_vGH0rGJ>mZWB1zeW@$HdpeDZ5}_VqisebLA698g@g7S7&QqAeab_t3q<>N zx36?T7yZ9{Hp!-U8VQ0Ihop_Z?%JH7J2bezB|`l|QAVJqq6SclQK-hqOq=wgZ2a{d z#v7c^ozs86DDgta^aQml+^L6N`9GfM&gKc3)&`+1w-n8C8R2*I0h?-rPv&Aen_UtK z3Qml;@kU+LVqVjx@VIQQa#dC%%;4}V*rsi~`?p7xcD|T~o@J_Jw}_u$^tEh%ixImd zcpVeeESDEjtKgj6G501rQu>4I7{+vn$Q%yOBEnj_uJ*Oi% zZ$8Yk7J)?Tj(JnEl$7gHG|=^A=1F>M7O3Bq=|!AhR{0v|Hnl;KD#{3NZxn;>(0RAL z)@Uu-8BWxt%Xs+*6j1%+C3rDdC||jKn&EUeWMM^?s8gk>vLo_x0F`I^@Nuphm_<2B zL|T_##Ege4Xvs`T89zYutZFvXqPn?=k#0Z#p%|ahb4Dn12V)2#yB^*m1gpzG?qDqZ zlaedr7fCA39^6$ajS4|ew5z&tl(*xUbA}J?Le(6uc4Yb8qy5Lwk337D<)vmH)Z@wH zC)*`#{$j;C84`zpBY{W*eevpJLmr*JsDu2jeRDF@?No4E}jPb8q;4k3L@#bPOZgx+mbtE!K1ee^Wb-uv0X#7`Y2jN-kOU2vJZ`)uG#w2_W z>yqsq2gEhOY!9#)yqyKx+ub(EsSnoi6>3^=@oX_IMZ4nGoNU8seSTZ|B$sKl0TxR; zR%V0rcsQ4FFZ-j*A(u?IH~=Mwx+E9l;T;dS_o^V@SzO1MU-=h8`X_zB21mGR_kRJH z2ALS?w$wdPOp6kQm(=~x>U4B;Mu8R=y#pJc$mMXVnb47SMcmH?t2^xWbEFm<{(ut8 zRR-uZDse{lAk8Lxb5#Ar4(VlBlZ%bT&|BFrUYlU$!}5cR@=}iJ+>bc-mx3w|G}?Zd zi@hR-KZ{LsvOQH)FNKX|LV-nXYJZ-^UkA9L+s|GH()Ys8f`33`J8$BZf;c6Qyy2&W z^<-l_{T%D+9=fLnt`$9%2RJFR)p|ohPNap;vZpV90sQ~9vV%9~#mo|T{%)_nNz8ej zKx$6;b^C2R=lte)^g*Iq*|J`LIc@C_d)d>1MpOPi`dKibeY%2clN3$1WYxz8=#i&x zeVLbOBD5xS@`U~uAlDYC2OTmp|K@~D!n{~43U81a9@z~vvON}`Sd@_>@-O6WP@daBx{b^t8?tl?bzyZ{Y0;@x`D|DB>n7mkM!m-FZ+#Vyz_ z4Rsfu+FKS&(~P1_lbtMZh+>wn^T{VQkC&gyZ$QD@u|h0xH=b!D^2ns?PUfPKplk9wfA+(Jtn z5<0()mKvC?Zfk}t3_Ec&P#f1I_{Q4AsP@qyjro{Vh{nqJ_AtAzXuYFS{@134F^2T+ zi+X`*{?Si{H;p{uql}l9ehd`pnr}N&rhI3A9Ov9fQv#$v<2jbXuF-wF!&NGib35<|qr6yM zE9<&jlJqPMXsu^KK;DTknk$%u>N?9yf|a7!Ya=pyBr#H0CXBMYE|o+Ihxakq0Ra^P zmVJqvgp)*<`f^QE8}>sXeoN0YrST$!kOX9&W}efBIGdY|X?)#vsarHq;B z3<0i7xf9N}CHKmyg{JVQq%KR<9ILVr1~tto2^P2zzio4ptSrd7t7tE0Qy>@|*xOAk z{23W^e|g}Mp%$6ABpFJ`a?P^d>MTfbiO{;++L|%V(t;e&;Pv88vX4CjF>x&*3`J!O z8$}6-$>kQjS;y{nAorWp{HeymHj~SZ40y zU6WP(k6@6MM2kc8MG0qMEd%V+wUKA%W9MD3lWnt+Ro@9HD;2!VAT9J^84`G7_%+4khnc@bfxB9sy4_L8 z+*GkRzduGkD+b>%srl9Y{bGhU#7KJDioLX+!GkCnV1(M~99>{BrW$4{?N4l<_rV=g%@*2DKe)Lbxj-Jw&7rPXgL|i>owVE>v-V5j9h&JO!KWm3AM>p5 zhB<-ASuYNn?t%f-k^4(DiFifi-^|+aId+?(+LkIp_E4%qSd~~2sTl2i2m+LU@{fPP z>xU!mRh@c~;kXaU8bN9s>HTHa4!C~Us%3AcEYhcL+w!ZHG2#ZO0wo2f?PK-{n=%~B zQ!)rmOkBeF@ z*txAp=w+57>P#}UEJFVX)#gZBzk*@}v-PDX+os~A(tNnZ3zG&$9qVy1wYsL|Df$T_ zl8idaDOE19<5x-+G@1Q57F)B?Xr8|th2=VYLLVSkDha@UyKl93c-U3i4Qx$%@L&_1 zv-QqU^DZnnLNaKLTD(_s8CPd1;zVG z;|I?eOB}k_IBePC8l?Q>i&X?_RwD8=Il-^wNmHJkUp>;%o@Y;&n(ArEi^!q542-Nu z&xLD$qKIDt{4~@0L&anImb9JYjtywLVF(3YC8lpn?>_dk1X)#!YhXb=8}fv)oX{6w zf+SD1ad4~F7bTof8l>I{{|r!ju`o=Vy@lMRyTRw@Vx}w!=iABi-yBbDLC)9kcO;@* z2Tl5rdmXYqPclL?n=5MfG*K#&2DB}dkZ<2qPDz&5JW?oHvw>J}@cPgeM4?y9aY)dT zOszyhAkA|NsnxuUsSR40>8YaG^7St$*0!$1IkhBf=3p1|`iC<*39u1itS-tQi@-8S z``b=<7)GwiS7&Jy4hz0$K=L#;>{=!oS?|D5_S=(W+T4mis_eB?ye}e=8UK^VoX#8? zk=0b}ZuD7}tX;sjmI<@BsRq{(&N4hJ8{>U|%z67h;wT~3{t)>mWkt3e)wjxmfn zm=fnZJ*z;j%@98Cw%gXchiWwGCpuB9OMc&tBzR*~sA!6rSx2&*RP>>AbbJ zhYTm}*+QFS!m}Nfr~doF%<7CSGu-&qJF-7^InhjZIOYcaSg0%tYyw~M6 z*>>~z(dHNHcS8kl3k)(0TdjPYM8|6@<1eiXH_%?yNiDifxcIgUqdH3HY_{q*QRY06 zbJt(M?V;svm&oQS_ME^Jf1%BGC%wd>FKtgLNn&B8P7P0=yK$@-Xy8-BnxxK^@L)g0 zfN`}OMCQ`3MN90UQeB(GT?RIzG-H+Cun`i*xGrVFZV8la=2j52es$1U5Al=$<*p%7 zLeR_86IU)D46E4V9T+RHhipdd=>|6Ja{2ckjap?1k)+$n6(Z7h+4I_FId;+n##s`i zD7ORg_XXG11Km09!zALu3;PuAS-)&Jcf2dpIo1?rP$@psaePuQ1OPhlU~6B5iU*6A z8dHViI6KMgEccRK=VPj3&<^Y*4=jL+3ngp-4ZjTZTgMAFStE!PAOm&7rGgrIq}`wX21*aw-Peb#G6K`TU1#kI=>M}hUD$)ZDgJrIib>x>VstyhKcX+}i?$KA zb^V6n$SOKgZ*3^pR8F_)m15S`4oW68R1IpYf0Q z>3_K&{rhST+{UBWkHK`h;rca7im95>#-M(|9+;>3jE8NxrYXn*@S zm=yd!x?DcrwOwMyI&tlcP7`$9%NMxh6}mD1U#QN^d(%!bsNl?yCx&)tXpd06kq zQFZ<1W|)w%dBln9H!-(@&|;kjI<3a)8FC}k6trmDrsE^tzX0f?SAa&jY+e^vy})rp^`%^*eWS;-#=`)^7yXHA zUDM!gZN7WgbG`RU?{=MI5Cm85OPhitYqcpb83m`w$qX@L1cn{^r#aB>BiVtPIA6*I zXX%&~Dxr+!Zj-lWDefD`q}2kI3#mm+3=7f#$!?R;VcK~57Q-+1P(KFaxCPE>JHp%d zQQ46eb%l8G=HJwx7vB<5vp8#H&c7PC_;R9lwkrtGg@qY`wzN|R0K@cA!iR&OTe><& z+Sr?8B5=}7DUW0wvWUoDq+7^#@XMJcX6B84LKQY?{7a{=VLI$KzvbYV34I3;BX;H; z>+KE+sFo6EKC48|2Xrv3g$Y2 zEGVopU%K}qh_WR2SJDR4dk9Q+y9W`rXi0=)PRQ<$&s7}3>RnfE*J4gq6@1C@r`+O$ z`=ng9`D`jPW$0edi;sk7Ob10hQe+NbDlm6A@uQHj>5`d7|Y@F=o0r z>rf=&q4N5o%Rkt=5tnykz4}_aa2{c2ub4Fc#1iGbkiJIwSf} zojuZh>}-W$=Frye9r_z~Yv-X@c8_Elok>Hzpn;AKkzf2#`#2($NmZx!)cl<8UZd4f zflp|R<@7Ua;ZU8CskRE4^@tfXaJ9nn5~zNO5y5Z^^T~O*?h{{1BVp(hEYNaYWQt$6 zXjOl4)WP;cj9ZC{=ZPD&mk`0!MbF$*Z%f;cxKH&4fnnP&|ET>Qm1`MD`g<7ICv`?j zKl(zWfe333$4R4iHP+ZBz&DYioTz;iFy>IZDK!s=H7MN@qUEx9i)0Iy@eG4cA+_75 z^%;jKU$zIKr>HAdtyouM&$mcajYEB;v>;@c#y*CSu->@wp-{zcn8p-TCjl{wCD*p4 zy*VaYjj|mTSTRpn?Kb2iy5fubu-bGmD?-v4okgy2dHeuw3`>l>*4QOe2-U1n6n6ZG zLAKy{6HA=@OxVz+dPsXMxHq#b%OUY~Gy2SzM<}PP0^c?1{V+Z|nEGv&J||U=QB_fV zd)ymwH*M)bLwk58NKmJLynR%?4d>&|5oO&06cV*G>so)foAFtXe&lc>p`M!-nMym2 z88}(fky)sMk9f8N7F{ydrJ*OxTLnp&UTx|1hESO;2k3HzI@L}%7Bm~P@^K|&-O-Cs z3a$`bTN7-{&2WTrkYcSnTCZtFrjH2J_zEjR6{op67E3`@pL4gg&mx+FzF z1n5R zeLT%yz(?JS%ZgxOfF%HCc@WAPg_fq-hTLST?JO^>K2`@Z)DJstcnRPBSE^vGS+LfB z&?La>KWvRCC9F~>U-A8mO}PQa@nVKL4}S*9V>;WUgdbsa-U;bTkH>4)samI}28%@1 zE^<9u?*=kOC$Mykw;E}>7Ai&F>$$PPx~b?kOXLV`t0ZfuNF{wYzB#ab|K)6oer+sH z+KqyvtD^GEhhT1+|7XFGR`de4nZG^MKo-Dq1OHJg>HcDcS#<^ZFm|q3+tq){OSbl2 zDyKn7-Kqp&bRxnr1Z?A)1>Y`*SQBvP)F<@s)*75I*-6o4c~z-0GVN&xsdT7v_wmki zl|ED`2CJvh*~j*x26BEvbVlb9@S(k7;5mmt6QJvzb$u+_{* z7!6tI1l8N76SP5BX4Aan|4p9bI>|lSbjPC<7*)B$MD{jG?D$kh0Z|Wc5E*4D66^(J zz!idnHz%aIaoX^iwIw7v74x0cc0jac{>4()+oCOXLzE;q4+Ya0-sa~D-0AuYEg_*1 zl~UeeN`^tHONrk5>2m(?Dwp>RWjTO2ST0fpA+XE)+K6JrLuE~iq$IgVUv!Y4o0d&W*9FAKJl<+Cln?Ny#7 z+=oETON5|QC(F+>%fpgQl7;YQ_3cv)i;fs*>)jlCsijgxIFn~-pCf8ET=4t#dyMC- zN0`%Hk|yt2VXjEa*>p1Z;&WHDWL@(0ITt?Q1|}p4)Qo+tNu03<*`9NmNk<%}mHgcS<>xA(A!r(7sbVA04Bm)MD3dnQ6%9>R8j* zTx+PM8A|cZ@fht${iWE~h#>Bb*rhUk*Bbe%HlVtu&hMo&-w%b?drb)DHm8Ywi$7DL z8H*;AkC>{rA!StcQs?*yOe>B;EO*#O8qq(Rs-yN!qmzWMR(UJ=p;p$iLGhc%9>v>L zVKX|_iLp4cV=zZ7Swbk@X?MzE>2`~K7^(Nsk9Q?c)NTH629@8hr4#M;_dAd0pVaZe z59qS2LuYq%?Ywy6yU}K2;u3l*=nkg}<3D#P8{+L0V^~HVo+^InbuI1f@x{`E(1y~z zt<0d*#-^qq>aX#wQUG>{I-~Pl$LHTwy7QphjlG%{Cd}j>9ADUPRYp+@@blGkyY5~h zZWaiyWl8gjp+94OCfpY^(T6DZsoY|t&DL@`pCfc3Q-Fe$RoH#d4%6tP84{=@w-Hvu zKL{Y^OCXdfU@5u@43@Z?2LH@&9t2{dcO#@$gAR4-!}x{DghC=;e|_FVwsS^=v*>{T zAP<$9%vez+nYvDsKS%wkc9PLGUG72lF|Kg3vGdGkgYnkE^!Fi}+Y8Q-F{52s;SKfA zENTg)GKycTcD%HIN&uqTq!aF&>KM3`##+v+&)an(0v1xj3^fO=>v-1ULU)MtJ#*|H+I{aOw;V;1WQj6fz`Cctz0<=59yu2(! zHPcM>ZS*^m-%dRDxG;jru~z+^p*R-vl~jiD1)$>d<>%&ZKO}X^%(4YDU`8NsE~+$TLQuw3o0_HKt9HW(I0zfmYn3Dcc(i4nb3m>u4necbj1QdM$yxlFjjik{-}COu*ATeu<11xWn2wNlJ6s z(H#BV*@`!ZKjWDVwThY9%DM}LOK&<^c`D!6UkB}SV|x#<^=HBp!Ncuqyb78HY<6>} z%agm8YDEti5LNwid`R`(~6h#Gj1PQebt3X>+nPYdT)O2X8x%L>L@(>LQqK$VuSn9Vd!(&w6+UjnDD)&F~vqnm64J|1s1?SdlnF2gE( z%&=b*>S{3JL;|Avt6q+>(hbwBPt@(2*{g`kPmF}5-E{{EgJpA~GSuYKH~EMKkN6Uk zEU2Sn8zRi+0qSa5Y$Nh;!U$VMv_5`*=chOKVDhs1g?qgI%nT1NpdwQ&goj=_mZFCm zksq6e)NfQR?e18+-nDHk5=27k2)t&ELV6h}sn*r8WMg@W3GF56@TBbf5h8el&V8^;g>!`ZYG*rc$BH#y;2l6&%TJF7hxvscPwRTsU*L_Uf z_iZSB4`S$k%*))lj6T$J*a|5>eG%_9OOhhRz*KViA^_;mcS#oHs+n4Q&$pANT^oro z{QVu~1uO6MD3IgnyDAl`4oov~q+|R;`Let@xO_^J5gT`$#V^pxo_FTytmdaco?Xb& z?q{(?O>LVAcaq?=(vr31lpfR|*$z&TJ^?y+*>1dHX zj<`9&xPB4iyyKzN7=#A2i#h6{PQCRPdX)?FFK0h{}X5P3pZUPm31};_X{DfhJ zB^Opd?H?A!h#ePj1wIh>E_qU0TY5rbH<-Q!herS^qzUhSBdWcWORB%`(qM!yi@MnS z-eGV?i8oGV?5?D(mA|5AoT@g4tiM%YzC>Lg!hY%IvM0tJQ_8OatJqIvgujnZlDMX^ z*jc#PYhjg69%}ce)<`T8maHo(aehf#%*>j&<$WD@-zU{DC)t0Kt>yoxK8Bi5xC69> zyDhiAA^}L+3q61y97NCKci3hD)ei!UlD*GH-m&K&sx2w zd~UGI9WMp37~60eyEk&p{i@{yQt^ z@dGfu)9;^x~YIM1S z+AVebDOHTLf%Q!p20^#WYxR_5`2;pBWfY*F@;GDev@Yb0@ z!<_aPB0rRJ$y)~U>VWjbaS0%pn!I+0qS(2Qube$qe>;xYECF=O5uxtQxTG_v)A=|izaeHx6Hq&>76p`1W I;*h=a zs+|>X+^E(!O@h}v+>FqItSV_TwxKTbMXpRLRa1Xj&0qR?u78R&{vnPg_PU&^-Kk3B z7Tu<4iu#l4wrTxsS}y9K;7;h6LL?sVcDv%H>=biDI|vK(wFBx<$?{T)T4+NQ{o3?)+Fs1bA|bJCSnIB?d!xavIr-x^$2iXKGP5KUZ-w2HZbKN0Yv3+M}nHznRQbLsLeQR^~s+#w3o zEJfR-PF_{xB)48t1GH{ONF}0VmvhY@L{TF?SmvK)+q%%lJpw;PNxd#C9bf4(XI?($>OV%s5L%?zO=Li+#^$L~?Sm>ZK@WU%*UpY!;Vj%~ zg@EUK5@;{V276a3Wt;#Pe^ps>ae6q znsp&3P(Q#g#nRSRe$!WR_nq90*|DS@u=8nqcoAwv@j23**sNLL+cvq5A3vxS zWEU(hn5Q*$+Wd~p`!%#uzshg+a#LDkP5>1$zRI6)Zt8-K(I|;Wyp^F3Fmk0AC}$UK zE%6F{c*LuazW3Q)R{RU_7TTE&zo>r`fRNsAzsc%)7v^hP-W?;n2s%nXcb%DZc0z;H zl>cjG&FqU`4=!(he*upK@^7>?LYv4J=dW;o0axtVFRG?KT^~2spP+d3j)l-e$$wrX z{ZBs`{qW6`?=L|3FW@B-3Q}UL-K?V>@~RFtRMlSP)2h3qoOa$(J*l>Gb-P2U5gU4I z{$lCG)jd8tuTa56TiJ`X_biR#k4zwNEqbc+E*4H1(y=Z$b-6r@o`)(;YoEynEt%Z{je#5vEuY25-k^fmW zsW!qp9M__wwq_)8fbAqta1t)0C`O{TKsDrKb%U96iB^u?|dX1E4D z^@L7eZaP0AcQmkbY~@-*U5j${A&popjEcGBLh!m08d;%z1%5t%Z~hc`zszV2_rp#j zh>U9nZq!`L0zS^xaoA-atkrz+mg?bhmwFl?v+~a>%Dk$ILJ2H1ks2LP?Ay@+riM|W z^#$MooC}qv{%v}ivBw^cPlLPT8A835gHt}9yLtTu6nNFBI-x;{W?Lqn^JwXwOWP%% z${n2adShGUc{88G2X)GeEA3%m>2RPXT7&}IkkGHn9iw%y^0Z88v%_v6)hjULLMNG7 zNB0L@y5y#5j5C!z%Q@8Rv&E-)xa>P4Whgj5hCEa~5q>*hWvaJm$c)f;L5>+w+ABH2 zc4ub$Bb6yK(bq`m`kf%;(5X~+6{JXCiXrd?%M+u)DjrF4bxRfSHc`o zkZAbY$y2VbaGBwp7QWnXX%hAM%XvF|>^vN{Pp`XU8}-EwMj_|k&^SSS$4c+so?MQU->t)kN(NWWsEI z(9Lo7Pff022Q%|&c)9`QZ29Y#S3z zfkZP>aQ1xHIXjJwlb=xi!ZcEi@PXyF+wr~xXaTPs*%MhA0h=4+<@J^b?8qX8$ZQrD zqw&sh(m`Z!<=BI8SlkB@NLKh$h$9nxHDg2s{CkOXt2Zpl)0gGY>f;*|G1 zp*>f(qbei^vOLDPuiu6?cBOB}A=bdBzJ~f^ z?A_5GtX=}NuwHTJh&V(gIg3z|K|OJ_eH^M9uQQ0v)z%Z;92b@>ao6if93gjw#08{G zDpzZj@Qd}CSqT0Q5In;f=y?7T{QMmj-YcxUI|J3U6{Gv^vOL+=&e|}@&FI~?*g%Fy zZzqWIT>d`_1|^n`vi1+UqZWn)aFEQqq#L}l3QRqDGXsK^L^H4a{-0rD9P-^R_gt*Q zsAp5O>MY=kp^lZ(NVa)+tRR1cXr`blQk-GU z%9$6SG=BI|S{1-3ioPy7sva#SNv;wkK4vfDQpO>5Be!wuSUUud`n9`qRN>HoIR)b zk!|O>4g)^YH;7{vYDOoe#bwTi55@o=Rk3r^>bls4vn7?l@p72`OR%d`{v6KH-t|CzW0=6c8LR>K2I&^mJH@bs zAuhp(q04%Qjs$9&l5^44j?fC(T1AMqJ@=L0IuVAy-B58CyCFy_L`b&{9`pJ=F#16= z)$}{#Czd{tmX=HSDN}{g@<1AA>|HJq=4rP4Tq}mg1U)2@mygf)sLE*AQHXWJunFJV zC=UdsnbZ zjGKUF2{F{(XX1D4=>YOJfJ$P|(9chT(uyBBVr%HCS@uMAdckI+>K{c^^pK7$CX)LpDBq|R|X7D{n8;vWw53Y_IOhOsyz zJ8rvYuzb?fJ5sa+B9v#*)Gy)ZG1Xt-Zwt;e*@l9RW|HL^rzpeL)UH~(TqNN@aUO5j zmQ&h#6gbDb^}`KUD^F>|`q(<6W#M$kq~!Qr(dqP!2VO`|s$iFHtrlFF*Jvyq?aH^$ zFbU5)QGyBa2fh0DJWhgn#;UKTy3RZk7IvLf-|gks5Z9?dTD+tISPL?s64|@(;&=<> zuKtM&k1RwEOEg+?WT`f*r~n9oV5|USB`xr^KiiGfnj24r$WDk{+gZyxDViwm_YzBT zk{S`r`2zxmwS0eXn~bW z4XPV3Gc#J{zu%==F@9DW(V+Npq;YpwAvAOIz#$;+k)!o4OLoW1PUrS!)IiE*UniLs zTeHcj*Thoq1Eehu7?`F}PHnv(Gk|>D2U9+0i`-=m{^M^Sgx6zSz3Mv#&(1{X{m+!T>U(WNlQL_}bfhSP;j%EkH%-*(D53V;eT zE2Y?KH%@Af2O}l8$VM0}gkt&hD{VyWN}m2=0LjFep%x}1T6%rPQ;KaBvp}xY8h&VOksT+`j9Imo>Nw4TW>dn&M4CFV~1+8iC#(dW(TKN@m<$i;W z7z9aq(FRdR)f?IJ^#p0`AV;A3N*sNis|oL887HWMBikxE&`NWSGOL}nCQr;&rr_2w zFoqRU)NT3DYmFib4#48^8Y3{=H~&C4(Rx7pVH z-*zzjw?Ge-XHR9%;@vFU-OY7zk3@R^XLFGL`*YaV+wcsYW!mDq&GtL3tv1{>1dn4} zZihNm<?c z(PPT5kV_uVKU&;odJ@8v_(DMw&%+DM*pwk|IHEJ2iS581qvZKg`F}J?&CC(j2bE&V{BKHKBa7|)|0yV?^ zTE`{gMT{}m#Dp`IeR5H%K~PY657iSKEfVlBOLrVk532pq#g=imKQYg?6=@^0tgUH9 z9m}9^=jmDK`i9eAFBEC($=BT0G?~v8btN8W9k{X+8zt`Hr^Y@XyTLa|Mw*Z|f1x_M z+LuhTq`8WHN!U0@NfhyOxpBKq%A1QgM|rYb2a3qH;ZIJA0~8mUmdmf6A?;_)nb` zoj=m*d9=tY5Kh3DDj`dPeQw_*kpDVO@Le zv^cAZWJ@K8?v+MAomQ(j51k6g*CFz|D=i;zDgdS0Zu6_9L{O`)x`p)TYR>jlS*Z|x z4En>`-jGvWq|l}$b9O(KzsINmJFQXI)p+c<%zzp4W5t!+ICq@chZ#7xhbfGRh+h7P zcXCwxSuIbuv3B}OUdLC17u#WoQFJ13^cN6Zb~AvvSDp1fbzqr(vca}Hprb@d)(K%i z#Y|kS>-7AMBiya1b*%DU4O}Pu?Q3LB46oyHnpr5nSl)tW3gciSF3Nz>uORLNdxw6l zW8;=z+C?$JraG*@#{__X=5B#9?>~hrM6Q;9D3!pw+s|4B!gcFmO+eV6JzdyVwT&xc z$rcozSP-*Xs}z&2xv&G5&0%#8y=S3!W zz8OTwg`s>8X7KCa9zU|MQR^D~HIDTdkVEW4d$eUvZd@Chz)D8JZ2v6^!J!FqgIja% zKTD>U=XcF=U*>U~az?fKi4N~b-rmx($|O%)Py1ceqwLt!ny{W6pHs0xb&xsN`+TfZ zn&JLK5pr=UfP31YJO%RiWB}FAc2~)M{Qb4!-@`>@mE)`9S`~J8b9OS$Mf^7q!cApdd7uffEkgVs1(@7YzkSXzwu4HBWWvjfy$EaWur;`%iKpWssO17T!_ z!Tw?F)EQs#PX@NHx)B~`GZp&J zuhqzVk;aKJf0Fu6%<|o-oo3hm^A6)POv$_&PQ4Wvgwo6;I`NjMb}V1pVgd}oUUVlB zdHqs6iNlT*hOJ@%;Ku%mX(Sx-TU8tY2T+3H|rvCJg(fq&DRzXwn^K81($swV+CY zi&-V!p+@gcob}`9Z?3&=&ANJPM-+Bf0@HOW6*|xAHG>YsNvU^{VpANFovG13?q7j+ zl~D{+G0cLNw}_bx^!&&3DoDNq9%k*wI_<&M{4_qL9DON~cPY^+RZ=my*)2IxNx3RR z+?Z7Ocl6CM49kv5>lPljqS`y!HBU3w$Sc4-0D6UPEH^%fbCY)_u#aDhp?#C8#e;`3 z4xiA~eA#Z0lP%m^Gsc?Yj2DYyH+hio5h!4cZ`PXkN&EdmIfb)s1Q)Gf%ckZIL2aOfafEx; zuk0RgRU*v3oM9qw&4hQf7;p<7>ZHXh;L+t;P)IJ~FGOFD?PZQ&6DN+y9d%|FDMN!H zkzNf6mIfzovb`KgL)_FJ&!%Ao;X8Qsyh9Q$=mvVXzS(j5YymqmHL6Q;W4}(|=vd0a zyV>TC@b+2KUjV_$Ddd$;+wb=b^c=YZOKB#=D0!T> zQ7F@Fg>@;jCMLL2;5epX`s}ojMHVhMTCwvUu~pgMWA*cm)JG|62hyTWQU!}4>FUZO z9>h-rCRszfCK0ttzXz!7h2V5s>mzGZPfiInt8lqWyLr3HaI-K31YAomk1)CQ1u4da z!p%8mmmezR(~!`M&GvN)--%$w@EozamTxT`(KWlCk=b1g{7wwE1MjakM^yUN zV9=LSmIg(OiRJ}})K8A(_yBocPQA)c@+d!5&kkOEav^3(|jWOb-3ot zZI9x9)KxonSoR8!r`}wHH8A>Z+y!J*hKv$ZkuKlHflDg?n3yUP>HgAYtOePjedB%9 z@ry<3m2V!D_rGVtL5$g!2Il%pn8_-2IB7}Nqu-8#37gBaSV1y* zI4o&29^y>91;u3SrI92{*> zL-x5t`}{3OTuIVW($5aErTtEO$Ly694Ly%_pB0=}LGpQ*&uwRG(F^E-{Dt*NwM6ob zj!)V=A_o4V?6K!&p`!gd`*QK{TC8=Cm9Tp=sVaA;5_zg6eVsT|z()fD=}Ck=RqP^? zQ0^-EF9@T1Hs?4wuxH*1#|^d2_(+tNpV1h~>~vzrUD1mjUzhm2D_T5!Xx1_OIF{Jg zBgpZE1i0;fyX{d>9rON2-%-58wtI;%kJ+n&5+2BiWXflHa3(%YX6))(%qPCk8)iER z$SLc4++3joG&wu+&I46tlqIt1Q#6^j~5HZHXqTKQ&EX^w=EFh=f!T9*w=rVVz z$b-}Cf+_RY4)zz_40u$+*-}n9_Q`sxfxntMq|iF>&kU3D=a;k>sC9clT7L%U>FKkT z>*Y#HWVoAuthZ`LY)8v+as|i&A1_n~Ex08-pct52ZA=)?e*KDV@E7bw60zf>tYJPV z)?4%auPJ5Dm2O~ULSUVO(YZ^y53=;-DOpW69M>SMV1L70Fb%Pz*VE2JFlP>OlwCPQ z3pLd8VFX<4hWjI5Np4So;^$O34{|FeFTUF$S2oklr_B@5J@xbFURc^lPZ`P> z-})W_>$U@H4Yk|#O?f+>j8Asb+45=%-%?LL{OF++hA{@8%YY%brW0^d0Gob2qb7ZaK zcl{o{(MT?I!(;tSZR4^#Km*w3xPfNqPkzy^x+EE~g|q@O*Wm)Fz9GcsecHxNbI>3A z9WJzygxs$zMwyvcNwF=O619QGZ@WMI_ymeYoYib}E3*2@$GrC5i&C zoOQ@XNQ2koawk3?)(01vyP>@>dX+MYb3K8v(7XbyA3u7Lp9{(63kqX-w75jLFl`fE|L@GF~`t>Fb`XOOc`|WsG#$7>Ft$2V&oEldrD%>DW z{n{;ubggGL;lR#*gf~r-@Y6jP5CDW;qCv4Mlg zr3+xq?2BgFIymbij-s%}^)q;&vhn9TB83WX_HMgv>c0S23V2k~o=`USdHXHCvXkr3 zT@5Dh>cIj;O>|87mYPN z|Ne5ZQ=FZD*~BWZ_J1u={+Y^)|6ISl|8ceidxmW1v1Q&B@?K_d8X~t)&q5f)kx=Ou zYd)~-Ca$-D93A|Ud+w9E!7;NHbfftumb;yg9R&PHsA6(3%7~LEdKC< zVo*a{c%19|gre=U^`Tl+u7GF83vpIEN{yKA`8jm~_O+@N)(C-B$^cZJQZVe&rSQGbkTrU^lMV*b^fJ>1L7 zl5Vi=Jc#jX(OIMs+2R5cH9w_Rw}c!}4P2ilB+W|sMZB=g((HFmg6#JyXJduXS5Sw( z(F}9Ak}2~dbNfYolvz1(W>r>ZS!x1dRKIL-wZh@o!I_6kL%_$<3Qf?2dd*?V`ErIs z+71#e!-gL$sq{W{Sl(QgZxH^ML&tvkHiuAHee^bb-Oa*Cr#sV*e6XD1#4O#eRNzoM z8*o!ndt%tH9`pGvjS!nI&0QE&QuQKh8Up3kyrit^Dy~F}eTyLF6w8%4^VpbNM9ql* zYaOd7Ho5P9(vv3&yWt)Og|p*?7D0&Yh!JYKv9(K9D0DvdSp-QV^^Jw!zHZ)#H#1I`De;BG_4YnnnVD{H5roE84L`F3h9N=x9U!%4{&)JDRxI$tGO2ID$160Pme?4DSdmSX+%lWhK~ZW?m6p5RCY<4#jh zLEKkEUy-Aai-V>fi7bp7kF5w5B2%>|pR{Wm0%_eJ1yD4WcIe}!e68jA)5&DET&@^) ziw8c*$Q&4~M%mD%B-uG|jD-KNcL_eYcZJM92^1@$%aTfm8FagVDEgrN`O5>v9SoWz z+t+2W+BEnEqp=A5gXL?u`^>rbQrry~Jk#~w(GgiyJ4)g=lniBagk6ih8f_M9b5n6I zFelQN@7&`!Nc=i%W=(7cF0j|ARNhYd(A1(;YmX8K5Sj3bGygo-UBod{zG_P~N;KZu zP~(c|(C*8Dajle<;I=>74L!^|9kHs|e7OUlTmre6ut>l8j!YdYK;`UfHC=o7|J|_r zzvCNtR49JHL6oY~j^0O0WtW&g@;f@hGftT%{X@OLwopOAG6nny8@TD~EFR@A|2u-GuwIlP2=4f1^#gS0D5KVlTjo`^tLynIh(94Cx z^y*d8)=g1Si;HX)T@KUL>wY)LzQ)YI9yqr7aUK@(!PQq0My{9_#IIV36r33=D+1`V zE=bOdA2RzMf{p|3Jf91*uN}T^Z8MFamg>O7_B`}`#q?6L*-Eco*E9)XTd>H&bQ)6s z#^TxMZBYnTPh9=P)WWdMh53(v(Am?cqOYBzId;y&ECZ^vOdknFY97oR9dO_?QohbO z_1fI-6Kw|yL%fybVNl;P;i3iQEZ1i0PjGvz@sa*mPM-eK*M8nSTN7AFXZ%1T)-fCg zkG0o5ET5$aeb-`Xw^Gkd&5>}3FphM-nO?{$VP+=~nBaApPiFm;O<6l*k#5Jfo;$W> ze-I=ZHrb43tZgEbXHXnTApMh^nR8saxuNy7FekPBNVF9HS+JV6Wvbdr{ujMLpq!sO z4H89vtMkKL5O;G_+6`($;({Kh3@4&RNsvh=ZCMqR{Gu<@Nw zo@Y+2S>+WPr+7$MiazZ!8#{{LA4|_BG7_5NP*T0P0TVdoK%MmbK(~rdQ!UEIxmL5f z=0)70rwkpb*7mp-@kPsss3Z{AhEU?INdL5Hmp=D77YnHNfu?2(X%v_K=K#^S9;y2{ zaAu*p-+SAXVWp-}hV9so{*!>gJ_OaAPl^8M_GQ69rJry7qyA_(ke?*Z4teHDOVi(V zxI`nq^2L^YVvenzENrQ2Y_n2hwz11yRk&0ZZvf^vIH`lawb;G?`0M6hqFzv^^3$ZK@sp;Dub)$LAvg z39&3kP8-rFxl{WBwH0+S9dtMydyMh$U|oY)i57G{*1C{96ssWcfymUU4pNQxL!qb&jB{^nl$uJVGr!PfqK+NgMojNb11ED53kXHsf+x;v(|~Q#BBW@IGm}7)K1boYl(a3( zrKrF8WEQs!lX+QAWp4r#_~I60$23Z1P?6}=vF@vAZg5|10wHg~{XFKLpE!KyJJ5fV zUw=N@m$N?lx!VK3X62j7Z_zI%*@4a5Z=cHC<%uVIcN}Anu9)3AZh27JLv~29`cqlB zR+b5v4xMu)j@Kf}*KiJ+9gJX&O}#BKp$Hwgn!kr^Am7<7P+i@lr+%pm9orzJ@=|eBvVI;R z;={gp-NSoA!AJGHd+HM8i9~#&pHhVB?q0P-ji?a)Vuj>HjAZrT>`!X@<@LLz+E=Kw zbEumJjNHapE&hTsBS*{#%|NW8gSiW6|3+bk8{*1WMci1DWKGWQlSC!BlAzz9WgYQSe0Mgb|5cB{BsX&)l2{sTpl`q0}+hDLbo1=Z;rZcS)YL*4uPxjy`aWKv|uIWe~a4;VXM^&|Alq z2a9v4CP`tv^drmYYggt*Y<}v>+-w=z)mRB(3h;Ebc(qOod;NE4h(_L)y|k)Mr4{eM zH@P;Ox5Idn%t|}e{1nMCpbc_f4ChqF+ce_L0a-&KBcxup^tzlPe=|MnlA><%N$O9QBf(9!=8hE$;VSJzQrhW;UOX%r4PJ}HpcYgwgH~e3h2_ zwCEN#|D)}u_9vlByaDG!Ie zY&&z0L^P9hanQRRd>`Klo-`B>}Tx}y$o0d$I{Xu9Poy?IQfcea1b5ySwL_!dJ5|Ch<}2Sa^S!T?{7J(RVaQIF%PaQ zy_WY{>J2m8yWT%BtJ=EOK^}k?pV|7bS}POzV1M)uE0=~J zLLJ#ds3Nm3b0FzFmkpn%d7fq1x&%2lYdg=2!lInYM^Y7vqjt(b$j+9~pvlRB>a|Z6 zm@W#r(LwCMZHuH?*d}gM`A{TAfJ>}bG2Ymsz@q2LrmS9BDWLzNESdG61-tQ|vf%%} zbxaJsE|j0`GhU8!9$EkO9?kz#^5_2?2P&^N{ZFR${-?Y<1i&dtM2GaxF<`TK1VwXo z5_14JNzn_B9iyq39#GZKGirE0$O^6NHm2)s;J4D~a46Py`4c&AXO6%&qo(z^!W46J zvy&GZr{o>Z6>7QcK(XO8zNe`ib$vmUw!H$H8Ybz7aw)yc!&d4)-1N#h$Sq_f**M~3 zeqZDR&5=9{&PZR_=2(w7zN;Jef$NQ>q79oa)pQ(4Yb9k=rz+A;30mBg1Ug4agUGV9 zU;4PJ6`Z-EgP4N99;&SLGt2MNRFVO!>m{tcv0fhCxfD_p!HL)+o4E@MRk+^onbi`dpmtXly3sY_nC~#Z(||L zrskDu=afsVOh2)4fTr0(bzR)3PB-2>;DcBU$Qo=SQSjET)Ibr}h`K&OM?Eh0#j|V` z<@~gu(E9Dyub+)1!+$CSjme_c+cWu6$iTg=G>)A_$k;7^0S~4ANZNAsR|Z@t>WUdI zgM%;n8U+`U`^{ItqJ(VFB0r@e%)l)pN0?)2XYUkLKig~~j+_+0{PF2PlyeF$GPG&B zzUu_+pl$K<5A0FKa31zawWnW>PHIrqyrbo7V~&`%HJaUP%usBKH4^h295C_?1V(m8 zbw&Kt(O2fVY#k}9!pF)ZL4H;y_;fPP^({uO+=M0VH z0bH71T!&=dpr3m`$-Ro{8?FFWyd^-a(pO%ksdw=Q6B%iDcKlzzn`XB^gZmYwW;U*b zXH-{Y_b&4}_-*aPbjxtUYfH*se&04sfWA8;#o4vO+iYRoC%28+ldIHb=4_=x@UDfz z?d8ZJjoKbLis?Py7nqBadDM=~6?YX>u;k?4q_!YP-fLALAumA*ozKDl>O^IjtZP?* z$mWBpfiQ^e(x+RDZL0sHlt1N8BmsFhfu>O&i})C7D>pMcEuuIHKRJ^eXrd zF*tnK4*7Oe)>=iW&BTEstygu{p}oen5!!{y@CrZEf)?lKGh4I9Mpp@mK`_#*A3DKF znY`W_QOZ<{pY1-DqezJ7A-&3sfhC7xV@nrM329TTPyY;6Q4hkQirl}hz}K%mApBXy z?%sF*9lHF8=c4=jDE9T02{I_A)w;#sDs*9SW70xJ4=uwVyReCQSl;*J!U=Rk$JnoQ zCdUD)KklBe%p}aot`-P3y%n)ZH88Px45@F66@U~PK?qmVMB?U467)rz0v0sE7%amPVdADfLTWbf~R$zpO1*>?{>zgO%!*J2Q z*XeYwiVq;6mXr(S+wk`!L1$R-U8d)`b>?{@ZYc1Bc9`w9> zJJ5N>)SQ!P5$~e9T(S7eEFsDwUUpm*rn0(R+b3(D_)$sEV*IDmAX^l-jq%`mmp4U~fcMc8D)PPuM+lhXT zWSH4DZb;cju{#1!SxwS%NLA3VZR-7`+}6a2X(nKzwK~F6Y>YDb^0}> zd_SF!=(eK~emfVLS89!Wm`#*9$fPHu%|_B-g2qp&yZu6GnHp1CSm+KZmq>MV^ho50 z#j)M}71XiPcl55uHs6%uCTb%F-FtAf=Nnz!tie#nDXr@kocd&#v{Pa<@-rE}Vw5f1 ze>HZj%(BttJEIz%^TmK7ZDXx!71A^HTDVOGrC+qLrMYL7U?wh{&9tsIag#YsA^c!~ zEY{JN<4kQuSMfH5DI^#e#GEnbBT}4Y?h&-I8|#Hp_CX>klTt3VN3iuJ%EIEO)P)I| z+to1jU}Kz+7qXF)ybE_cwO){@23fDuYBchKpn*XjK_O0Wp3Kb#w0k@Dv%XG>jAKD}h6po!Ror(+laJ`fx zq+Z9NE51Jbjw+O7zb*f|zn#B$?-j3S^)D6C(~Sg$hAB4NLboPKWtOKk!JOf2o96SP z@aw2K8w`w3rX57uw*i#X7pp0=e`?E6IbUT<`P_5>!7l{uaPZh0-FfXnMDbls-%d}A zX!yplBT>{Dd$-{TJD*0XhVO&BT!A%C<1`gJ>3Z+&R0e6~YeWA8ih1zlJV$wzuPrNE zmQ`)D^yZw}3+&Ccf(c^U=UpFd#CxB@Q$+gPUU6x{6!jiiBprYzvb>10`~cUNI@O{q>d!xGfS3qz2dd*%MS-+;Z}f`Ad{@Mu21{HIAUw?G4K|)LSFlUeuMop2 zH<=1s#o^62VK%4s2oF;ycr#S51|+xZsd}R+d)&&&9b0q*KmU0LAntO+0p>t$GA^)2 zCar193x0NR;5&Sec9s&k(v+6lca=|aDeN&6zpTI-li7=YA^2^nC3U0_KFE-$LG0%& zEo^aK073s<@viy&$#G@5m-h6yx;hlz5p*Hq`~ig%t3ymUz31qOf~ZX6mkox@%<1Gn z0E_57h>hox+%ykSRaB5dxyTiIScL7?l?qPM(6%!;+SfWy_;JLvL13wd{<7CY(6?MulS8i~ z16L~-xYsCp%Y4ZypVMZC{?mw=UCLnK;P(0l0E*6(09p3af6>E(DEXR`X zl?U#M2o0sF#9#8t3>>1~&lY7w60CTTJk8-f6&14ma9%AYHf&L3RkIVir&TsgJ*f5 z5pHk!77g$SNLa9yH?$x0`O<~IkoAS@^kJlo$vkMKEFZnisPnd1LUUMw$9G0wl!#d>TN4VItsa=reDisJ!x%Z*!<8iN#}i+x8Dc^`g7=N zvDf7plSHBK`<+P`)P)uQ1y~;>7!b_wF|E4}97X2wh4G--`K$ykd2r+o_CqY%l3e2* z7K!)1e@VK5pf8+5VrKHcw03dZydzU>y2y4~tk|!c5>MnVRpDhXs+f&zu2>xPDi|ba z{}8|Aau;x->BeuGJ($R*N3}H@xkGiH|4=Qg6-8HWWTsys)E0J84iQsK+XgY!%Au+W zyf3yP)d#~O>JlP-I+HsF)tEwV4)3WgrnvX>8q+nj8qznM$)NZQB4VO2ds&oV;$H|q zE=Rhrp>n4OK8(!TI#Nl4R9M_KP>E{*($tfc-N-}BSBId%WOG}B2w8+s1cEFN{HYOh znIC1>i=6OWUkdV;$iD!2z3@%#v>#_02hD#0Gm#&tlSx>iNk6z?#J(e$nd)356X9ko z!c_tJ*JYzN_KzN&WCV1n&cQ)SvXn8hHg1j)JS}etRx{Gsm*dd}k)AHnwJDtN%ii*K zi5fAE3d;+2O;ROyGJU%gKkBu;7hd*-8Z@_!*X&-eEM1f>;$-Z#aqZacP4UpV5ounm zEVaDF%!?*nNTjkv1KlXvCP02^jI&gCd;WTGYYxaKq zRej2hfe}^Xcu}`2V1^X}I#Vy0JTc208gf>WK@FC)3cv@v0<0rw0RTphDF1XW=l_&- z@4s{<{tFKO&vFC3ih3gUqIw9;>WS5;_KQyUoW{30o|);jmM24KPndh1C^(wZ=Xwag zywT;?FqIT}jp_0fVj)qoIO4Qx%fJ8R2aBC(x*eaaFZWR`SbfvxAAdjg{*UzDWZ^XQ zRc3-$_g*>&t+l{%QSO72GH*3>@~D=5V7!2>Vw0Jb6~JPaytPJJ^e%AUWs0C;GDJb%5&9NzJ~%_6lf#m9Uo{(h&=gf z*j)eH&LVvpqvU%4adIcno`zhIvaev8o8($g!??*4SLy)wHPzh!`75icS1mZS3_WZ} zIHz#CV5tF)T3y&@m7V^DKH7H}&dx3!`(EXi*N{)DLGlZCokh$61TUIN`y3C~4^!zf z>rTWZ8I;LJEq5=pGubm1i5QHO?}$M=NK@@{dh*0}2=f>mFYk{O6Q%rGa3l;w6LNJ9 z&{kzQg@9?aoen>@;AUHLSfj5RBS=bndS^Kr6I8&Y(>k$`9HV^E*ilAW&apf%i_v1K zZ!Dh)@Yd2PS&iZCJw-pGLC~5`pRTTb>8?q!B9^CFHwJra_`a1Dv~?zm7fu8TfdS0n zE!sCx-r?^e$@B)jB7&2(1a`dmk!LJP$r^PO7=fch@iNE{CRv+k77df{Ll=$lD?&)@ znj0S*?+%pD#X@=Hu`EvNtH?aJq*Mw5*_6qfJ)X*>x@NWau4u``iH~L*&(JOTZoxT= z&^-2Ej4H2@tHhLMV4KZkC8zJQiC1O1?B2bS5fI*bf+s@uMtKdhyot#-uKOTx!w>DG zj^*s*pTPiD!D|X|cir>{*UjJEG?Jmi@5rA!OW&;M>*6IeDp07$ULOuyi4J0LmvQyI zes59`DtqedY|WLH&zjKK6y#NIF>Tu-M!K*($Os_PVs5C@&Xze#Px{!RX#U4_jqzPV zJ-6CbO2-)4Ie9xoE=5g|n&q=S4f5MSohIc-`e~ji#2(=^3EUEX8WI7~`%$ajyfu;a zdK)W*l~9u+ONz$p0(p#-23+DF!1YW=kt+iOha*DD1yS<$B$=3y!MHO-c@Pc%17`Y6 z)8t9Y;0*P$Gg(qnubF2yQHDh0WqeCG5(Bvu4*lh*SAU3W?+k3ft{@^mAe1v+W|24B z<)&A&CS1|6Sb}-Xx9r>bP0*oWt)ncN@Mpds5`Dje$NU;9p1}Y9(j_dp>H!nBxk21ev2v-nz)%HhESFdJAhp%QZW}nLa;GVl!u~QnWv0v6&!< z|B-q50N#BoJ+~e!PbQQ`NI<`9(G~A26xY>ApM5*ddqP7+-rg~t3rgDU;ECgTf#sb} zWpB!S^v6Ms107sLn>$tYt?!0eZhQ1T@{xdF*?0I0Rux%o$_E_3j`6yE-!zVG_~9+8W0v?=f{(m8K_u1ILsEebmYt>aMC_xsNtC@Ra)fEo>O` z8yjEJbkg!(WZxs|t=DB#8rRq+hoRS|5bBg0&b!!cCX%oAeBfzA(#btoby82Zsg#hU z3(&ZFWZlSO!X-zy-Ufr3tUQ{EpW?>ifYS%!RKBev<*OwSWlsdli*MR`rOCnD&m8xp$>67&Cd*&55%cb^T5; z<=|KFM5?`Of|||001|zx`a!EgVm&nS9Wg%9;LDOThjd0n$=ZuV@N7*{pfI{B0g*L)ys- zJ2$e7G?00aECx$nLXaow)MU-pEHrm&yE>36lu^sHfQBre;@GRSnpzHIF7e|5S0~1` zk01gR10bf;N_j|!6v4A${ZW?1i9)&qz(Dt+zFh%=ehq>AnnrJ-0ecXjR*4vCVzV_^ zb84zb5GTGj_v*K)W}h5I9#0kzY^UI8Sy*8})?_aib+sNw&U>nnsAJ*;%xSqoTSpqF zzo=jYd2Ni^ONOe{^NwkG>IdIb^k}q5y8s60h%H_m~E2Pd_IHdFXogr5Wd;;`^LSO9NTA5Zq%ids^ zT0H(7U`&UF6kO5B^2bUNV+sru-@qpAx5Jme_F^`4#dmCb&+%OlPTpQA)p~P#8O1CV zX!p6q^VXr-w`YNL4ZGef_O!sPe2A!RPtSQ6JSFF;_ zo@aC07RPtYt|Z908Uvo%VD`6tyS_Y;m?(}*vVEQcWu*2QK27A)lcA9)ZjRMHjU56s zVyZXgYlNNyYb+|3cU+Qh29rZof86dtnV=b%bs6(jYU?=PZ_5XH81qo<=o6`yIXCOG#W6!vRa#}HIvcJNy)1g#jh+z z?}$vnb<;0;BNn*sYkg_yKq?(cyVgP>JN{95OJz~J-$RH5!w55zq7e2(P`YZ6#!4@0 zs#2)==`L{mqxW5fVhka}EVoMWPXMa1USuvlQ#1oHV$;x7MMS%`{G$mzq)akqh@Av( zfP65i;n5Iv=KUS*)(_rn@nYT77mu7|&7+s3W#(0Kxi@LT2^;sO50hwR(;A`1dzLfu zOB-XF%{1j*6Lrc-6-|b$&o%kJA^X+iAgbzD;3%fBoATIosqA;zEy$wAVL}Xqg12T% z^!LYEhlQPsr3C~Inf;o`Y&=iwCW%7{@Np_yt$Rs(@%mcv0gQ+tV*nBX{-OpHGFhMg(PH+VAKUg1_%#~&HEU#n3*QTSFh2tOfVwc1CdAd5?hA=TV1 zKJ`;vyhSeg1E|$C2j>Jvy!zr@dE`LDpxBKNKF%b<5$oSoSL?Z8!Wu+lgfC@ z8mIO`H@ZIWe-hwdIx{pd%d-?)6qP(}iy`6vf@#a?J0FcRTb&oQHo8RDK~p#3pXEr{ zWSDTf#&o1D_0h4AmPSlX3I!~YBzNAHjPJm-q(?v0ILHFd&P>wv3Y~oO8a=qE)r0K% z8A&OILNW+t!zEGsFM!jAPvgP_L+0ye_7(2@owau$Bywp~;p@`5^h`%72rjl>5$y9r zX6i(rs|4ob;0nvPZ_(bdjkl$rdUYchbc~RF*Nep+M$*UZ?6)hKsse;S5UbC$xW&oI zIlyFtpD!dl)AQDZceh=ZW-*noZnThduSJvV9R!up2lAZ}Xhaxk(iwGT8C8eK%LxJ( zeQ^rpQD#~@jgc<~w2Gxr@vFx1^TW5LX`oZ-iPdgr=m(3(gFNFIzyaQ~hyWH5Y>DDi zR_X?$(fIC$@~p4jv$`!MEH7|_AlVib9(*r^l~1{D@;I-yRxv?(ik>!mJ4ZK#VV0qb zs)GRBH-shnn-!93Ycvn;mEpKhWpAP5E~2DlPK*s_>m1qJ`6Qt`h~D#@oO&^>&a zo$z}x`}Dk8bXhtF4uSmm3y{fD`3vwUc?x-dIFivbJLH7VDSPVptGe()u{2W>GtoPf zAA7hbdzCpOw$-vfzFuvL`0(7A;({R+t;* zaizm~!2krq&ZKV-!5pps{_74;o()9Xz=&mo_%8r6hY_yZG2MJE_>a3H|I6;ge{o;_ zd7v^S*69Y>5No?LW1Y8EFLa_mZ+U=~`Ga+m11}m{DpMygW*4k!iMgs%_-Opht|7yD zNMEFBW5Ay}Bw}~tMwdRcE#U`Ejg=&c6hqJreS$tk`$kFaud6PfVUB1n_T|Q7X+K|P zioXCAy(K+{XarhV)X){51$d7pcAoad4KvXzf_Nz?m}Fdcq@|Te^S0_oU2BV<;q$^W zS3>`$P5r$?nGS~eWD0NWe#-YM0X$z3c8-47*Qxi4ZAxfGuv%=wuP9YB}YsOE0t$2jBH;UO5I1f!;#TlV!4=xF^;&4QTY;K6;J4E zhas;|6gu|H{udg_8)ZVU8BJw2nJw|pA%TF4sjFT|8&kLUmI++}oO)1Z>lb1kJ-&L` zJQl~!P%;M*t9HDX`6h$<_^-zW<2m(nF^#uJ@MPpyKU|meX;RtNT5Li4Vp+~@bd!6N z*Tp@YLj5!^&jB5970g%gg=fsuTO~yd%X)Tq7r24d!a2aHCt&vzo{^j-NL67aDB2}M z<^tDp^tzj(>%azoWOMO;enFJ!rrp~GP=M71MP~Dg*Z#PlClU}3?m!(mR!iw>PW)-P zcm?_ktoQw>+h=aVjXhKNRXCD{Q75)&;XD|t#d2M`&2jfLAPahR2yHz@j=lz?-KzTH z+w7i;$cYQP-f}@hdYVyl3YD|72YT!OIObGrY`{jgCiUK*&In?14Z-bI(<^WJMH z{c6s8c2)N34rp#fN7&mP1!LL<9-Q&x3H?Wwgl-d50Y7Gc+oKY3;0~!?m}CY9Dn1=Pk*Fw>U!jzvV8dIFThLb zb;$wkZ`-f(z5>5STY`5uea;6IXb9%3|)vN3)&%nRbMY?Jt$z)_Q#_ zT;5KZd2KpOIg&y zKtIllSobBp-)Ga4x>Ys{G~j+n_h0N7KQh4dZpmM}tK_<=P_KTA!GJP2(#3Nnw%RWS z(J7j~0Xp{Lg77UvV)d*8b0AbIf&i`q1)03)999AkXZL9HlB4t%()a0isR%z12~@!~ z6hV2}Wy%d zuM4CwDq`U{JZ4NbW=uT}q~90j^MQwiQc=P|0418UZrg2~p9<5LN%2c(1aD51P^jL( zvY>F-&chqAhZ}1}p<}RV;^@_ofKe741>={M52zCN-*7;PV|POrp&()?9YQn0NHA1q5DllAVJ{ z7JGGoEmEI;Oz&9%oTazsj?{1KQ{$bvCbWcUek{JyWF>L={w4DbDy^T0&BP12F&P7Tx}*!xiao^SNPSCM#>}(D`~KwK(E)3rY`&UvF*^1P8Ch^P=JWY z)oMcqoR#_@lv=O&7hoPW>a=Dr&i{0EuYn(vkEw29IryN(Y-7f=s5HTLMlHtC@7uWd zXYznu(yerHNum7rC{v$15H~40@}0^RbtNrFKYo*|>asQAxm@`mOCa`C5T{f6Ohy;D zFGo0W)Ls+Ac3`Ru)6URY%%82z3w;8Sa^)Z?PglY_acv)WKwm*Nc? z;>7B~`4qV?OtaqLH+&1Rp7^tL1yt%W_Ftql>?>c;j>*_l&Jae@6D2!u@wa%CDzWuh z`6Dx*joR>%9K4D1)`sIg=m6B5vSO5+K>Z~I0K|+96Y(nbxD-BP;Y`}uBNKAc&Ze5V zdF>`tOkojA8~-yL$cC^*(++W|C&F9N7wI`QO_5q2SsB&RK+9D(!UmLUnvCeg%0ln7 zT3Y8?kT7o@tCQnddz`RKf5xH~Iy$`&n9MG$PwLa8U@^Mi5<@d3J>1qS?U34zqKCo- z7GDW^kaR*BthCBO8UZyjof_&v&R`A86;sD>=bm*O-O-$GaP6D?I7=DlR|J7n*=dNa zq(|BmIOi`}cJ86O$NjZbwg|CsQZJQy(gE-66zvtEJE(b>7zv)_INMSSJHLTsteiD8 z80XNwA)Q92V5J|GZ0n?W0aw=>UhR}4L4EjPJhE0js%M~VAdAUJut z`mkbAoAy4Ui4z#9?v~&|^NQkEmYoaMGPRADruMkhmxM14%KDJy`|yTbf3euXzHYx1Ez%Op_1T~IZ^Rysa?ATI$wOZr(O^=ew3dR z@K!h;JSUQ`dmlQQI3!j7NNa%zy5e1v-1`%GC@k+L67r4FQbBICw)&zLp@Sml*}AH1 zlnz#IgF5hfDPbP0C9j{gyqH}EdY_&&$dN%TWxE}mY5L5}wt9GI{)U89{K+`hM(AsG zlFIK>)(!d*BlPGkCsh6?C;j6#(izB!SyNN3x3_28s4pBXxsGOLDJ9mj>%nJmS>P0>Ca*Ok0%#8kJg z?1(LC@A*-d`_EX0u*FEGS4R+Zktq;SX79YT!;}Ks23k<}4A-)3K~^A+ zI{sx&9I@0RwQeqISz;69sJPiOGKStugFZyj$%??3_3QD8slBGw^L?V-g_&S4Yg#pe z2-J$=vwQW~5+^Ic5ff94gey#>UX6=aWzPwPX)~YXM(}tp=o@Gwok-{d7iZwWEo;g!g*8lwLR* z&}QXnyBoCnt~K_@0WgGBugvw|S1bO5F5UO<$^A!P->@8;Te7@+1eJ8lHd`Yd!xwZP zz*{#J;e_s7+=OqQtEM;dS6~RYaHDK$tJA|=4~7s)ItH5G{GsEd{bIl)yha>et;)Mk z_MsiABOJ+&94Wfm`=iP2k5}<6Ev0KA>9@}J7aur9u`g~@<2;FTUXK&Z%$YNuvtytc zym=y|XN>lK6&`LpOoFothk4w+_|42%in2|?>fQv|oyI*?#Xq0jDf{o8{HupgXh|5L z;Hzp=aLhtoS(%v2K1bwx^_a@MCT8ldqDT(hBDdcAIQgroRwi9KQ|fa-LQuIk?!muu z_;*hs-NrfB5_{+*T>?Ev;2SC9y_OZ;K+=zn@HgxP%Fe!u6xg#+ zw-qM6vf2n!*%719XvFA6jiQD1M}GDD|85_qLioPu%*mi7NvPo~s5-IWxY!R*)XT?ne@Rg1?mxJ9EVc z9%+?}r4>3sK(0?4_V#tgm7I7vwl1i5UM~9dlX=;gF486eR zolS&JvjEaTHgnG0wiRr+kMp${_+z{?yP#k4VK(_PHyNGRq=$C0&hPH%z$qFi^B2WX z1_*v%WWkGh$D!|0`}s0Q@2@5N%i(Io<1hiy-G-cSKPtTvJ?T)cy4Ogu?@uAYeIf!+ zy_(a|Pf3VzP0orge%!N~UStX}*zX+2{v0|@QN}o>%ucP0R4#qIl3T&WPqdCK6S7m$ z&hBdTq)ORT#m8+)F!|d6^vHL*fR5A<_vMAaqaQ+d)0R-0jFw8c#hBBy=Kb*U{rPhmAxTxh z@YE#4v!iuc(Eg{E^(flVUhdJejmt(s@@QSE3-|sY@U)PTZCL zdx!_?;?@lOmZwCR{>QGZ7LE1!Yi~4rJA3tCSk-?<@M|oB@*ccBO8MNWiD!P_1IhbK zpr9TKVnyCD#*;l1e7mF(TSC#iCy}!JdkyNSY9_B|#K1?dWw z532tgdtV(FSF+{b2?Pibh~Na8013f^I~_={V8I=N(-0uI(?rnVF2RDkyC%3hjk~*R zck}Da?#`Rpm)V)wowx7(_7mz4?mhR^z18Q|tvct_x6V=1zu|G;7`ngta#ID$0xsl# z-fF;YMAiVeD{#rt26^Z1sIW+IpN~7X4GjsH;@(|9nTYhZB3lZ;v!adamJ9V1;%Red zYf&DscvSapfj*0D(H{+GDgrc`HEA(&6xZb@Z2aWwXE~O0zdMP2{?LRoLb>9hvg3QA zTSJHyS{sil0qATXOsP&#F>rAOb?02@D7WCI_Qbdu;6-4o<+Ru%s|uRR)5SXHp4YcA zDE!}R`#q(2$BJtzox%}Vf4@v4ndy|}eF)Gp-HjgKmBXY$PisdXolOvvMsQmW_ z;NKmg|EUZst{U^QXX4mwP264!59trFbC@+sxlS5+5 z(IY!pQ$6>n{gY8{;~A%edd(hveN0pTjy|R~nyk@@B|e2e*6T?grMG#uEX`nvIhCJA zWlDk$-J>^T`=?@g&KV!fYeANH`)g3jY2ibsJcJX*RJO z8T}b6bHXS_M*pSYE^=iT8Y7map}JwMwI1pDl&1Y~(#ikiwQV016kvE$nMblY8nLtZ zNd+~kUtOoJ#j||MxZ^O+>e=S;#Y(B}es6K4$`eUY^g%vVl*ujPKJLb&9^^i`4~C4P z&g2UPA`3lo=EhzU6{YWQJ^~o*F!?*|GhG@{(v0yV?w4B+rhU7yYL)L??+zNA*39GpnT=uW88+ zTa;yKI@~U6yJ3~i>>lJM zv3v%U15ZvoG{@Lt=+k?R>Wq)M8d4#zQ-i0(Jr&LAUM8HMoaCtA@|^t$n>6d{2pW%a zwdsiA3>8&eI>2Fr`YXrRTDpWgSOy!gtRju|l<}#vsP1tsfpF@4TDvi zmC0TQ;(05q&3UCSx+)y%Ssk-`@pv%DQR-P#D_B~{y4tYQ#fRs_RV>{0`|!s!9eJ++ zvU_z+LXRseV+K;iS%v(K-%cF}!RR*CB*;?C`C@#inbT~C9E}dTo_=bgNo3FUUndZM z%+`3k^!`k-0f&8CbXLByGLl%gjKy3~FJfk#s8PVGkGa2O~t8yIGR!YPNxtK`Amo?`WH``E^9_K$Q zW$*ylgkt1dbSV$J3Jy+urek^48;Xs*zOEfV54X5wDdV@my{=Zt=^bbwyW|g=kxh=En)9%fZMxf90?@?#Gb?g@ELtAg1fq9B*YkT; zOx0f$WApIPZ~BdACy)*@CDn6ru@H^mdEOchdkH3B6R}y9io5;}%{fd&->e$mo0T;S zcA?V1AnmfFkF;0bOw3GCYzSk<^dRJ3@5AqVX2(>Ot3B4~!+xny*6$xfM9%8QdJ{%3 z$jDAr+M;nAZ#JqfZ7i8j35ch5_LbEhq{E&0BRPWqa1wUx2f zV0%|-$%R8>UYs;1mb{t+-yQ#oGKikQR&lPY7J{#xA;%lh{cVY1 z%sVBY^b^wTs+6cuj%aEt~?J&fJ9h4 z!+!I;==s|_qSpgQEVxIhkulW8p3)1aX8f~^Sa}8N5WRrZkayYsp)QyPq11Avq2JOM z$?8heaI|hr#BJQ$kMMLyqSh34HD!vj>r+6)s(3J8xNO}+<59a`V+IG)%Bms{Lm8qs$kYIm${WkS zzSZ|lcH@oe(Zm#=z+|hX^Q!R!He;f`r*0yDhANr9p&Xl6F~MMTs5hlzKUf=-iO&?5 zrEXe2Yf==~^U+Fc^2LE*l47+*8aR8>?Q^RoCu56)Ieq)ZTjh;ujsH!3C0}xZJpaNy zI5L<;&GM$am;un`upQ|8C&{v3-EW6!DcD(22^nRW&uAwq83)i%tJqQAiJU!h#-kesk(;Bft#<+znX}w3( zM;u_(BW>(2#@177}8A;Vvl=qp##6RcBYREr+VB5cdI~nguAjJuOQj$ zCy~`&2qHl!?RCg|>6l*uk{;UQkE!T>zq9_bL;rSkjRaQ(WEN1Q^Q?vj(e<}*r#~W~ z{!)DZfa90i&kx6Cu`P?10wQ!SQ~gn-->4A_eWCC$rO%u&Q~NAB*6aDq`7SRsK>N%_ zrBEWZswhI=d$+?spUr=@PPeXfz>AUinsdU)3ZeR=G?U`mlTbR5(?nvO-jqSR95{)> zL;o!g_!57POUPg-@sh_1XIq77kj#iBdk5Bplp=Yylfp4RY23EiOD9eV8#~&m8Fhn% z@y1j^gM)kTcAV!Fh^9B9@-N9?NXPL}djgnX^PArHigFW7CTpp^D}60M3DvB|i0_q( z%3K#|9bZ%77iY|ls^|y&Ni=Wa5Rxi=(w5gbH0b#P^q=?Y(t094;&HvojbXTb{8Co- za8n7X3N823_)XA51RbqHH*_cXfSv{Y7|3j}4k8v;e%z6u|IC3^$%FAGzm3 zZs>xfZIndkz6rr8uj!4d@{kYZX{|C(m+nC;R!oUYAQpMI&w~$Qa(kpY7JA5uw-j&A z*{nQOH4f%Mhn?miuI<53f-4ke3&MwPD1CfGgTnvG%~YCqLfoPBJ!v&&#T=YuSDNsi zRzaj80OR21BL629T@|zR&wg~IIhhRGGcGamkB1FPo}e73*JRTgQC!=U)6^6(!;<9x zNb-28sKsdsa+euY$|V8D9qV#y>D_D4Tm*R<1xUlX!?hCPH$TeW*MwKtOtzh9!oS4N zx!H{70r4F5)oKHdrCCBXx9?Vaz2vi?SOvb#aLXTbKr`_XOwZs&yv>?QF(tq*Hoh(j z@DOMR^^eX5Fz2)H-YQw&UcTE!zIHa2F*H;NGs81=AdCtXp}&Sp@mx_qa70pVw7|ei zb>PFButHHhU6q4;$uA^%BQu#7N?U_d6En}(I;bMsI%R|exZ7c73Q`XYo&fl(uC=2~ z#LZ)MP}iGt8i5(}Ln49`yvZO8ervXU-Y1bHxeh^2)C0})i#|z2ROvEKFi(w-GU6*Z znuX{TdgWXEq?X2+54$vLvq&w(I~PNSXd2eFJhk#;|0?(5Kl?tpc{WWaywfFZR(DgI zZ3E`+gLpP`N6nB?pglgd{M}Aw5ZnGJ3H2la$R z7PT%APn!t!BKpt${33+jrROB0(~pK9xe^AGWqM%aDul(BSXwitJj7#1W{UY*e&l8< zK+-WWgBN+A{KXg@BOw6>)k>E(T)2z+HC z{RGEB)zd89R%t^MfMAg&*4Kv;npCt$$YmRxvbqHz+Js3t7jJxR=nG1URGg&jWn|=5 z6af%#jg3LD=wKEN8Q$nl+5AUyra_l8t))A`@{a&-L(v73j55#Yo4J%7iiIe>OAb;a zMyRkVOe5PHPudpc#-^&p6%V4RJ$PR!$zqwQ5LC!wd5q3zv~OghO=rtEUg+=V(%&(2 zEBIQ{JTNKy=2{@<@-uP|1c$ajs3y~#FqCHR7`Uda79*@0u-Z>l9)hW_> z%6Xsxj`HRkTbv88DFU9L1LOz)BX_v?G(B7MJz1Ps^{L{}MmFY`pk8|VW(8Ta=>H%z ztm1c`)qL&W7(LWhZ-$gAiB55%MsK<8e{#q7O~Lw@5g}-d8CNX(ETk2}yEybBN|_M% zZb@&*4r)RMRZtg4)0yCo0AKUyfo9K=C>B*%|I+PiFspOr_%NUR@EY^`@0DuVtRs1k z?#eQUzPIKPTqFVo(pT+Ue$dda^4pl1*Nwl|^=fWgH&jfs23lVlNK+u~DshRQPewMaObO2`AAntnKB7c&R{!|jb_F;a8Btz%kcZvbEWkss`~EDe##3s4ytC9WUD zM(U#L3Q#K!%yf|LRC)Hz zMI;YGxF!+1Irz*{TD1`{nPRmeD!U==AV}={U~eY2$m3dt=;E+{;&AMaE8E>?^K~5h z02GUWwk%>$7DtwPM3%Iz#GOzP9RDYsO%y@T{;M->!$lx^c`Op+DI+{;9OZAVcLl&l zD6MF-?74u`c8``Ajhr0Y9x?ChEdK^;@?0OW#lg|Ex{Ui47q;^SJ#bT<9D6@%G59uKK;yslT8_re*zppZ)!|{IhkFf0-82@%d0zy%VQ-x-XulCM$f4 zBb+rtH`tLfhFz`?tA)-x;ES@tl!}1aoHWji8d-Aan>ml-dg0X}BG^VX_+&$3LPNlJ zU9}u>XYf_XSo47ws(DB;;kv(aLP*m31m~fy<#K7FQx^d>GH6+rZiOHAx!F7C*t+y} z0$%MyFitC^wmDR`T-s^YA12_0)4)F(Y29IIJ);*LT+!{m$Gv8g*jvm&5v*0dqJ+bR zo!DDxNdWFM%j;YxKFEMCEGEe6KskD+$EMY$;0YTyDdSLE8w2Q1ExJ0tG0ISXfdxem;B{aD4}Gm#xS03ugvcAN5`b zJMFuH{1>0@X*dUTjjun-aS{R3ksik*uqoW?b;#E=6b+hK@J+;mX zRb8~#yaJG4YjMoa$eP}O>X_6=^@NcMH4>4%K_bIb7z)^Sf|sBPIsBVkQ9)?@|?;c zy}p{RQVGi$Kms*WbTlQf6)Fvsb?Y#203B^}^rn1c)OyK*Py49y9Lf`D26t8*QzG6! zX5eB%0kB$WDpxJmOv}t@Xqb5BGR-=v?$WQxHk8R#4wEZAPSH!4JM>#Fvn15g`SP&# z&PT^F__BU&kknA^wS10*Y;&d^RgEd8VHN7Pk3Rq_gTnWD}F}VzDlR^!K z3}X!K9peR`(@?pK3dV05FNk=;mpH>T#Hb5JYPz4JZqpD^yF*@N5acl)zOEcgwN4hj zL4rtj%J!*kxuHL0--^3>p%Q;RLG6YN%%9T{r2!Zr+sbRB>XH8yGk%6ak&dX zd@e(aGBLQvkF~NGi5h4&L?~PoIelZKm_I>Pg(4&@P!`ymZ8AeRgj_Xp5ufU8B3&|X z5lhNSD)mGADd2wq+tb3q6>wgN|G&rk*WeH^{ndp1`yQu%fXn0`@ld6LT;%>^oj zBhTs_2+O)uD0>8EwU@#~QcdZ*6SQ|54?{UO(hGO_aX)t%QY(*sCic5?zh-P(FoP!c z?G^<~a8_B-myU&E3s;)74%7WF7L?&xHy^EV3)VU1>r00xl+muVdD?<-Hm^qubuWs1Kp~U>rU9us&kiMEzm9r;+(K7gx5z2UUh&0LxwdKr;=<1 zq_>)`OvCo)E*1Xar4idizuhB>}LuXjku#98FXsAw|=1)S!(v;7ge)K z63Q~bbhnPDtnRgI=R)SFVw1Br)$K!*X0_Vx@Oy5wA+Ga$hv{qB0=ZvMnf#6reA!?B zn6fbS%=>f7wqi{%#7UnBE}ly(_+TybQi*8^IHTGMlBe4a@;~B(VlbmJxk#mfv=j5R ziuMz{o#$`NdG>+BGd72gbIJwT`gb)cYHwFj_}}r39t3w1i&Gq6 z>}qsAdu&MYzMZNL`T7)wIrzw!m^m+2xbcproIpau2z=R>!&|nMwUe9Ca|%ONOK^0B zGbMp%3PpnphnD&JZ)K(1!j8z0q1f)Z3J*}ZQdCDnoTvcqsmiFcw@bxmF__YUGvbZF zEc~}z&5U=szG7v0pN2j`-w@Fpf*F3+X)nryhEdt7MHzo3Q}({xhD*jbKs>}s%rVmQZN68Xt*>0m& zo!*ePj>>pEA-Wnu)Wx;LlRK@TGkEll;9{G~rrqB#?%@6?73}$d(uvQ`)K9NSy$HDQ z$giMO%zp8*i>Pg5#ODfdhh53-Ghf9|uS_{7(w(C}e~M<3KT-g-UD4eQq+VsbsD-)x z)vcsMzhBBzY!xYkzh{~ilenYJsUk{@ok8LE+b4Fs@zM_B0|}x?^sZVU01s*#K)m$I zC*@)E+_hg`!0*#`Tof+Vd$*iV>_~~n0Alli@U9l{TP3Tr2w6k1YwutZ0Am`F>8POz z-2Hh(NmXWE+8CZ#xZQKW-lU4N_sQL3!BdR_1|O-e0A!E;2&I}&vde4@^(3r~UF
    >#o%sO-H#v8kYSb{D=r-au6(Bkwea{nn~nh5`+Hv>QOH9 znW{5$Ooidh?C!Zfun&}G0d}5=&&lE>l%UmC4t77KN z^^h?0(=<=Waum`4G(LCZbt>?O-GBXYgV(@d(rsN==iG1yG>*`biRN;DfCTHq)U`$p z4vbU+<()9`y@ocSeqU<#Z;kAFFAVOK(1fwpT8Fj2%;ikfu2sG@>tj*i3L=QIi{X!n zSPV}P_S7xCI9{kp^a33)YeJxtkhPmyeNDh~!hF!?cEb^B{_1=X>?0I-d@_4+T}4;R z)Bg9Tvma6I{t?HSwiTP=q!4qdIUwKv95jQ-4SYwedk3 zK%YCW2U`Rlh1GJ~{|R-F!l!!|$|oO+Acn$w%~Vr2sq$ss$gWGOJoKXVxZ50hZOHn) zUk$_UUO+5$Fiza?eK~)B5=Q_w06>;C2dH^UwC=aSuQE4@vBh*uieV@*;NYWArMqOX zNW32A;i5#^MbvD`VEPQlJ}{!w(YuKNY;5CVjQ5c>;43wVMq7quJ*>rghKTSY{&Kx& zN^(iFd6P+kF6BPU#eE3ValE-{6d@)jwtxKCdYk%+Exm*t*;94@2wU@>VPMmn_WFr#Mknv~ZGE^tt5 z{9HT%qp^x@m6!>oe-+&uls9M)oz9OKsE4;3w^8`z&r~FX6ozAr#h$c!`YL$fT5g^z zlqKHSaDIMw+9J@*F=z(apku20E`xpg40JtBkB>4;6A0>ul0z4186G>QJLnJl4W5}o zLv^NFqRj5yIYhBeE@nAf1$WKO=t2QgD14TElU*@ zgEfyC=gWh-D!jPE^2dfB!#qv1v;FBj1OWNg?9EJ0#UVGM?3rd*=fIA}h&{p>I2xlV@+SOev*uR^GV1I6G!JNP z2wy3AZz8T&TVja6&}??To(sCadX8yl%Q5&fy9&=D^BpqGFY>c!2d+K`Y=sV%NTqP| zPm&YXs>xFpT*j&Il2vs>Swo!=856aqBK^m}dovvr!P~t?RpDWM$ATlP)lA-*X9*_~ zsNNPf9Nno2kgSvZR-o6#EB~7XOw*U)bSVcE$n51?x@VT@7>%(9+p|@|*&s^X!|!qB zK=wviKdhf_5#bQnp#qz2VF5}FXJ*90wwEW^=vch(a2I&pMYCsN9RA6?ZkK2%p7BEH z`(z+0E&KOiMz4=xFgN*Y7EkjGDh{X#XXuzod1Tnzl_nzP32U~$XTkNgt2|<)-HAqv z2y90p67IQN;*|Yy;tGj-V)j{$Qb@O#*e1guFali{0Z@qsU%2NiY%7bg5(9Hf#N8Fi zyw9#_>t?K96nrdGyWei$UflEG{G~l`?uPwsSI(ixCrr#12{J1JnLi2TE3Vgsb?c3m z?V*mTwlef3A%xwS(B7nsYGY+xwG&_TNfOKl)LR65Hb-lFXuNw-FLDP9GL2M$ z_Y8GZpBDLn@LCg`Eczf7f;&Ve7@eboTPwkwP%W_JIpBT4$FeMr0H*W0u_)IA9Yb{l zY+X9AQgDy#^*cjgASv4my*iw4j9!0B{&2zveAkYjA072Y0udOo=$JuQm`7oYnqLTog zUEvYn-jM}GI6=IR_!{s`?-{uzyE|n-qL+>qtN0O0%Lriq4oognI_AXld?g;CA2t-Y zba<{}lz8hXDaCDik;^(a39|O6hW*Sk?FQ-2E^$qSA<|zW#4(=H zokvd;greT;DX;2*PMt84E3Wj=2VgC+{WsE5J?%bzpN}1Gu{z-;kz2^~Izyhhkg0^G zGACv~R}q^9vS(Y7o`_bUfgH1%j3c!JP2sk-_Ai_7+{~ALZD$Z-GXHf}%FsKwbNJ@$~bpXUfvb+aY6+?Ds(Ckw8e)q6V)@Q@~zs zY~CbQ;T1p$O;9K2K5`>rYo4|%XNigISb?#Gi_@4PD3~ZmCGl47#Jv;x@j>kwdt%uo zG`bNa%7F6&-0#FTsv_P?;ryE4(pCWnGlrReAZGFHM;}29Iah#6to!7B6_%5O|v%dy+%`F}25q0$Wk{-etEl?%^28cR34aO3Q|8Rq~$97$(TZEJ3P2!qLl| z1k&=;7V5xWhYSaX%J-A7!O$ z)`mQ9+-W5T73lG~3(&_!t{0R=*4SYV^{wEOU1N-0S*|wQHq4(EEpGL_3@1;s+Z)+mPov<7ow;*FEeOzGnu#Q| z_0X%KUhlJV;~RDgjJTWlWS{25eW9)xYiiTw`!?=inGP{M%LR%O;>^Kk4)FqPJ+|-b)O> zg5FpF0cuYZO$KT+2cJcG_V#CtxYgA_n6R4NuQ}}s4x~8ArFt`C#V5;VU%J>MGzooR z*AHjhQbZ3@Byh9|`GgadPENLA6}5yZS|+hl<Cki0*WaLd0!IPJhkn+ROWzh4bVz8c6#D;^Dm25oF+-|UP>-iJ~ zvI|eeXQP3)l818uouU|2?_W3(v7cNE&jAR{y`M>WH-lv`Zi#J0%QH#hNerXKR}#g4 zO8zwlo8@VZt}m75Dn=6SU}{>%ASE5s@XM5b?3VyCG$a=qzHrb}+W&8oT>t(T@=F!| z|G;rbH8KDVWC@pR1akC1&7B&lbur8;A>j$0td$cBzkwO}RMs&6#P#z1qzS^V|LiGH zaOq?M6wh02y76-b`gVQ?d#E;T73%UUth+7%R*1aWA0rLII5N^>k|s2=J!Vv$1g88G z54W|W=JN#Dd(Z}_710iwI(C+j8{QL5j~Z@)*V9cnBR7(`BFSYPsAyo9%4w#`Y2Tb# zD$y{h8n9iQizY1iSq*TTW%cNKZQFw3q-5^o#Ol&Jvau@wI0%%4zXH0)G@)fQ0|imv z-f6p(a)JZ$y}vl{%EZi%-IW&Z$3_B;ZA#yrsZ6M0o8F4H48u^q;$aDum z+=A1i5?jYt)rXWU<%*fFJuprd>hiypO5o3!Bq++?G z<2eA95HWw0mXG^Ikm|iqRXo#xKOw308tY5{!yp;yH|{t0?~~l>UbpvuRXG_Y6#BvV zle4wi`#o8oxqb^{$x{6rty9(5;Z*#KLt)xshWYF29y-Odja$>V>qoBu^|Bo6oa#Ff znKv#OVK_YlSlgDDONZa(OIF8EmXwrK738cj17T^i-skst4w^%nNPy)8`PfA0SJt#zBf|M4O+<8}jXOStpU^sJ^3#yhs<^Wqy_F54ZjqGOs(O;&2 z?k}2L%}%A4t;JO(HRTB}NqG3P@B_K2xIsEdREW0?o2p6HCZ}HRpy@apMyhq*96HlV_;8;Dqw{%i$EDtWE`&i_wyqi ziA`9fkgl5vEJyPVC#K)w$_?`4l198WAzeOC)7@}H8carijFboT8l^|`d^nEMT@`$Y z=-fkJtvUST#Qgf|i_Hz5`M`yn4?vP^?;JDa;b2&=xM_XVUT+A0~-#=LyQZ+N;c``-RaonU=%d_2+UO3uq zty{?ww&~U4lJ~9rz7zdt*UUfQ;K$yazrvaR4|p;E+-~vX>3?1V;?7C}>QtODZ#)dP zsBtqugIT8g#HBgmC^WhhZaj$a&iR~ozo9BxL#5}%i!AwPXI`c30pZRYu2eXTJC6{D zH-H{U-|JA0do{_=djGx-J{65`+3jhmEeJ|`0sg)`t7Dwpt>NCbJ$}FcbkVTFE_8xM zMk)BtQHu*>O>X1>uXV0RP&)m%`Lsc~?@)vRsvC>^aM@4D6qQhzu&55cltzl^2RD|W zHM0))2UN^{>5Ky@!h+FlM;uEHw?m>@6bRM^l7UkC7bN|I2w;9nx`sAWPN6K3TZ3`$ zptJu@-Yw?&iw$1GvW1Nr)P3*C6W48;%-)UfGw0~+$9$r2yZh%AYz5>)NHw^;$h_)}E9)A^Yk zoRzZJnPoAdP=5$Y+oopAZd08yS&tb zz&s<7Ovm?MPwDURpPa2@H?Y4J@Rn^>Hc(ENz&5Pjov9QO8k!ViO4=jOxNQvH^T4dM*`L{<`%kQLAj4 z)hyoa0OcNJrN*ZhUZ4f~z_X+ersbBfN7vDZk(+MsXgai@QYn={=AYaaOb$9jhBLhW zrS_o_x)Li=4RS*w&W9auymx%jjgxJFj=0o^n`Q1{q3uOPlPR|2!cW>RpdwoMwu*8h zjtSXcYLy(DlItLFY^d)>E@_L3KQAd1X|DDB`dYaPvix(WvLASXAL_^t3H%ij2>)R4 zYVc0TdntaBc@8XnV;2VMHyO*PWH}zg=F;&;g?GMIbnWoTDP@1p0FfVRY|LMMQFi$# z5%0Nf5i-xwE_<9#(RzQgC@E>&hV$S^XU)%z)YjRjl5#QEwH%R+uuWn)vLk9+4jQd( zSP+dqnfN-Jok+L-RDW%QC8!IS=LBgEVA(G7Y)}$Z8uVxz7}f_yg4EYt(F;T}!E&lE z<}J2WUdg*(;CygHM_=F7V)i&*E4xKUe^ZljI(rLKc2=lAC}|EZu$i_;UPq9eCZbF4 zSyQIEz53*Eo z$UHD?v#`!MraH)SC)?bM6H3*rl#uiF2n(3|6mhW%N-UGy+tv9naf@>K>kEG=)K8YE zO9fCK7(83Y+Zpz#A56$}S5fa3^`axqL-(paWXQ>B2fOJ35qwl=WWO{XYafXuZL^8S zie*yfquTO!CdrYGgK9EV!n15{H-tErOYezbm$Zg- z$oS4Gv2nQZ-8FQI)2z3s*Kve)6_6;m`|oEWBO|MDeWhQsV~lD#t>`Y}z;PhhkQYmW z4>=0^sno1iW4y!pj(A`!aQfi0@X@BH0fub}-;xXTukN)S<<5G(JvEgM3cC~3w({F2 z`3=n^4k!7(4hDiS-vi^zrjU!bS`Z)h-EFPAJMC`g_m=ujy{V3`S0m#DfM+MFA5I5f z9?*0o?&64T+;B4XJl3CwI~m5S>GSy6Q3bR0kte-9SBZ4gQ5FP3Bh^MC*`hP#c~JQG zH!y$^zr|kogG~x!hfkV6Qvy_HbxB9Vmw*heT z9@g$_ehquNNiD;gTZfTakw70UZ$f6^+o}|mj2C&>VuLA-k{iO^S$sr^l(@5vCitax z>rKx5XN$$2CD3RF(Pq0XOP6|7;v%)Zb3Z;SXPc5rPnLdnPtWzxOp;<8L| z@AGZ7YD59f$24Tbq+4f{)HG4{~Bwv_&{2R{O={$u6) z&waZ;p8mfp0W-|zeewhg3PL&Tj@#$n*+_Ur4p#GoRi%g4b2jQ{E96L&#%MZ}UNhqF zJu`IvvCzGf1dIF+2h2NqJgYVK_$Bi_+|ybgeF;yWd^()2!QGV2udex<59RUatc_Un z@;v>A@e>vSmx0+XcXkzzr8y^+i$Z(1xkTY@N&-Ho+0N;4<~gbz#`bY3?u*$8%`VXB zje49>c9$T|0qQDW8F@3;UEoU|^mT3h>@z;qfgS$I=b1u%6dz>vijhPZZPpHdQ^!Po=D_t_m)0}K_ zuzXn8tS6qTm79@_Z@Mx=TI0mHLv>e#a@$YhJ;ac)(&ayksXKUaYo)6X?U50*azVtp|;D~;QTY3i%#V@NwkgG z%$E?yL~lsbLGJNBu5W4S9f6+Vr}ha?njy2VV*7M%pT$WE-J6HriYpQjg7Or^lc0sqJtQb2-UHO~zOD()*l+ z={q^cI*e@_SB@D>FN^9}*e`PDQ!PjbEN_5k^~(|#b;BGYh~26t{TE8zffG||XO1m) zsD%=%MU!;++>NVfe}yO`-DNEWXF%qDta0|J2=igb`eVCTL8jMJO4Z1HQ&IK281MRgLqdO1cRNI|)H-dGOus_(5?|tcrrN@31MygGb}7$?5mh zO$Rx461womFW06R5)f_Yh_ZIaf-m44acH#*gNpHP%9~=BOW^7S^ua>Ijr2S!?e(o9 zt;G6(uN}Kvki<<*P4I#a4j;zg!HCc*RyC`eOIAJ;`#!Tb*$1$bY0{zRG_hdaFqDwQ zF{&x>D|>C(OQ)|p{)yTfri%Lz-*ihRz7DdwyQnlr>Jj`p3*fk}zymSM=WlPq3DRT< z&ti=Z7HdN$2PlLgB3b(VFu4WdYc+(VZ1K?7N-Ap^iS3F-YwnNtu|1|g2E_Dkmeb`N zDfBJTsUCsw9KD`ew6u1w+D+msT_7_^65pkcTPHCW6Jk+T}J6Ol)0SV*z+$ zc0KAAXX5=R<*Lr;RxJUa`v|I|0>*vZce_VjUU9O4k~x~oQu<5@%{Y)d7fRx;N(+*2 z7(-9=x>}*WS`Dl>vxCoanj_~l9&WWn1r*$^aW}Z9^k84bm$9*2a{MDRsrEyASo@<= z_FWvagopK#@w%&lQ^1&&d7mSZ?Q^|9OD{ZrtY5Lu(S18pe6hw*gv{ghHC5+umdUf& z6%0jcD`YaMz(!6$uD}bTZJ4wTD&1Y~opCv_SEK-yawxJ4{A?uuPY>`VXVzf@-x#(a}jG_yFxZkk9r7!Yo{%&~nZZ#v&x z;Yos;*dF9Ki~eq*$*(@raO{z3HtDFBai*?C1+(r=$UVR+rFE?4FWj;YS{vN@gsuS4 z4BPzPi5WiCyF^hKDc5UU{w;sB3dZ~Ee{EcteJx{z=;Z7L90JFwRZ&9%`O8lc4LVnV zX9f79kR|gvWaGXqR%}k>AWP=2e#d_wONWf|#hblW&3~GTW=E>80F!ct1ob0PZFWin zwobd^*c+U!8E4T)R{&sAZr|;VoX=~AGr`2e<^i(%Eb9o6khXBhhBn3Uq+W-E1MO5B zY!7Jz{2`J?&fcNA$thu}0(|H}-$9E-K~oPnWlDQM_EgFn)P-N&fXk5{WaT0Pnr20U z+!>U~lhaLk@h7k{Q!B#B2C5Tq_;N6thS__>glmNCkn!T)S92+RX|vN+b@qzMTvIV& z`W!m*(m`~?k$WeXe%O^XpR*#x`_p^7YX{3$0JDJx;L0uMp!asIqepKJgDsK-H5;~_ zxfWqx3gq@IDko+F$*pewGRosvvtjLj&{z_29&qT#CMq~zxf9^8WkEj2Tz`ue7 zIEG1uN!053Q_LdYPBfEW4`NKiBNYDhkzY3q7p^eE{!Xw#d zV+f9(Y1OIma;66cM*=~MB0@@QKaHBMauRG?zQmIQqSAtoQ)5my$^=t4(wRXCC_D zVSMbzR1f`Y_lX`bYKVjfjfGmH}bn!T!bisf5^=rmU{@g3TY5O-M;vXaBG~nb?R_yK70Gujh zTH@;QB2bQK_Wum^TR6W>`g^UMcXZB7_r%fG^x3E! z0n;!1kuye6YZ1S`E;5>cR{^Srg%=wummM7&tQbVHBuLn+qF7h4wdak5x~k&BW2MTmg0?fBIF==3STVBq!bbFd&!9Nc7Qo&fvH5hbP`2D*ofYs?+_r&kqUw zn*>r*DV{v=pg3XoPkxJi4ZSGkdX{vms+9FfJMJZq&a$G>z|ncH_Q*9PP>vp!#p&m9 z<|5l`&wVmuTD7+l|7pu=yLNC+F=Wlz)fi-q4Vln0RV^jsWlJpa0&TOfD~%-w;!on4 zeq2kzcm5Uypov7Z-j796UNQV%VHW(&HfTfk71E=w0OpXB7l&5>zj*eBOEaYEsgej& zTu-|KOzOZxt8uSG+LQH>OrRru6`?uVb36vfb_T-VbdCQeGJvXQ5WMsXFc6@1!xq0^ z8R7$4(jIQuM8;hOjQ-{cG!ceGXoYw!BI=M>_GjOojSQOdWC{XJfvkbGA$x5IQPQA` zBTM)#`r&Wo503~x2G2h%s{g+2_u~;iB=9dPfdbH3){x{CpuFMoq0<$hMJ84UUWU}c z==%_Kw#dlA+XaXYO}G4-?D5IGx-!_laLk0?7zz!|F-P0PW_4bC_2i^ZA0BOdW}XMD zHALb2tXweai&^*$Cdh7e9OABdkw#@*7;Z_x+T@8;}95(j^xd^&GWn?MS`9Ee7bk!$nn+q~FXF)Mxy( zi|T0~`RtvTru(~WH61PRA@S=~M{4@g;`?`b7mx7eJ1;h6qq~gxzC6%M3!V#s0+VuP z_rf+IM|f38<#glpZ*4?6fd1C7pfic%h6_y1E5NKRee1`dcfXo~=HxQ-6L1kTIB-}NEfkTH!K^kH>*=B{wD{W2nYyK zW}^bR!{MX~PvCR0+xBFDup#i3j%j_*g)=XV3iJ8RvczYr#C5gDwAv0q^NO+owZa-8 z%eF!${Ad(u;wlE zV%gwlz4PZlM4deWNkDoUAhvVIO%v4`e3NEt&&x}wvoD84M!->6#`y87S?}(fsHu1}k}w+!ByKkSCubg+A^d#w|N1!T3@HsCVtFxyJT z>F}jt+^c@LY;z3K@Jz^JQ#%WmV`LtPN{i?8RZ7hDPm3>LVx4W*p^*^I~U- z&K;R%yg$3;a#d%wz)*TCW40!`E5O}^`nF+(l#qp2?JeR*S$RyW#qmbv5h=kA3NX0Y z5R7&a!!B2S+VkBJ4gkc^APX@aDXCLKh<)w2Ala=ISTH%3(wMs8w}u0_EOrxs=VawX z^#5y{nR3H+0U$|fDAY>WDmZmHXh7^y<_2$TV%q;>@4dsC+P1x66crSuDN2tDh!7A# zKzanE3!z90O$4Mwn)Dh41VJDa0Vxuc-hxQ)NH0q7Nbf;Pr~yKJm*?JZA9wL=_qpeN z-gD3Av;JVMF~?jhYcSWGV~+A0Y0E$vd7&2dN6ofv$(C9x zTKX<7oPLhKLTZ8F-dPJ+L*$G;XkW_mln;3nXYkqqzzj5>L`<;c&4a`GC0J$4l&^IG zHj~7Hb;w%y;3iEAMr1WFCt5E)*7VMKUUpKVg`}-*fxkfIuV(0)XrO-%V_hcF`4Bwk zVn@nI(G*kf&Vch(gWrC2kBlxn0lj*J_zA8DXDf}MwBoji%sFDhcOz=7_M#nxeoOpD zZMi@wy~W6eQwFSRa;M>pYB?6k zNABLEs6#>1yZ>|%|272W`>-c}6q537oQjfqU*W&^!21i6!p|4^DS>}h0yIk6eMvoz zOL;aFt+Z1f`JYJ!lH)Y}uQ)G4VA+{QGU+<4PP}`ncB1*8kGpo{W$7{i4oDtae$cesiG+j3d5jS5Tef7& zzM6#_X~sLHavupp0L!3~R<`pmi`73kc`K$lZHtpT-**_f!tA}aXIWsw``HUl+-L}t zQ1`R9h7L<~FRTK!8{!3Ln41UwnBA(=e>-nwg@4=&)za`(&~~hsae;Us2WJZ-jQ1)p z4C904H69#}@pjmW|1oP(HmoS)2fX1NCyA)#iJY#4g&kpiOrmc2-G`>cLsonIb)PRp88h2CQ`oaI zJcZ59B)h>Tp#Af3AIZjR?jc;80H^5d2gUrIR7)b+ z4+|<~BMENBn#eBZnx_vf6Iptkw1&-HL)h_~53@l@;)qj2y<&1HnA~?ASbFH*{IC|} zmOFt4c+c!Voyz|agE20?;VsUY_D=MgSmf$Z)-6U8Ve#_Nrnh+IX&q0|(^r&S5*2nI zYF;GgO-LM*Gtu$Bg5{E6@;}Q~(aCyH%o#>P+X1yn&d-DN;cas;tSRcxdQ{<;?|c9zvV;JKiYDTrCV^=Xn*IL4SpCZceBB-`Ey?7S1Ro*;<^HZJK z_D6~f>lg&*GEiJvi_&=eoFdbL8dT7VU)w7@9&0c#oXUX+)C=*|7ZyO z;7|F<_EQ4?yCt9W}^Y#SOcigdIcF$%fu}7VpIzw7DbOQhYQ{x6PEOVK~QQ z5LeU3q-*K!xYco*>E+mnT(9 z9H8itnM%2-_)hW<5&VhP6(y?+lEEM;QTo3^UGXHD7IuQ0zpM1x)xw-ATfXery36RW z3NBg)J)7&#O>D#Nj_GA^F60FEvVXGgEJY92db7l66?=f2C!B1OvJ>C2-$B_oOHBU_ z=j;a*k?#dq%y?f~+}%$RRM#1eZD4+|o?>clb6s>x92|lRNS?ri4J(+~m{YpN*M*0_ zDy?1X+Ctx)W=0~iq|Mku$`T~!ZBNO{I?oW-NcWT)@MzYoZ!hz!UA`k^nn)?Xvk;D> z(!UmyHGy;Aj=LIug^>?R%Ie|J0${LRQ0lkEi@Se3Cm%^p$L>chUN-A}OxrrmQ??-f zWTu{Y1)@`);Iu!JaR@K&m0;2R=+~E+=@qAHrzCg1yb6`ZV&qLLwxRGw|^2{JxwbT6{Y= zBi-|H40+5cLx@0P&A z7b2hpv~elGDhsss02;`*>+)}6(Y8;0J-#RXU(WN%g*!rt&ozWRPaq2`wL2ZUJA!YnQ_^TG!$=b-A4GUP(I_Cs$r4!e?l zH_g@cYUtqxq6s~OS5%*}-P*_=oAN{*%zhyXG04ZgNUfV#n$xbR+k@-^$z$}ZT&u6E z3xhCY&t*9M)n$dy(;XOwWqEY&P@#H;8LLUqZy7Q%?ulhp9!>7fNS0d_Xmm`DeZp~x z;ISIm{Pr5%(uf#Pc)YyaFu%#}-2M&cLwNjPrFpu=20vLjX34#ou2y652?Q+5m{}JCsx6X~-oC~<{%$PRa z5zhVV_kkhs9uY_%>zVp>h@4>GZ24_-g*g_yaCBE?&!dp11y6zNAB6-O30)|6X&-y- zEjI}akYT*yUTsxxwc!fc285U(FtQZ)v^vw6W-go8|IUov`^ib<2W@zo)%pEYKkew8 zCb}CT^(4MN5MBdxY=#~tn#vGXKx^7@^E5L#DZWuJ+sP)wYNRSU#Cl=k4kf5OIoe_I zxN3iq2RHp_XZ6dA5GOgVg|1Vtv#oYu0wpM~>$kiyp}L((G>zk)iedTZMcki$?v+r$ zlR!I4D>?(!&tGnas#e9Bw?ZB-?G0XV!*WVB=qzC_6x`WkUuS}d_)JVI@0aW=hZH_v zqBDh{t;i68H!%}Sf`UCfDQ01WvJ7ql>qIspWoT5sIy_o>f4<@jal^iMWvca1vt2Ux z=>$P>xeRpuEvy78k3-Ly_tVvOY4$TnH4OM|ml!mCxlL;<*yR*oS8KgVpL@_qZq~?~ zAn;2pMaV-(H`vIzn=VTAoW(R0uO+JMx6mdH5(4?%b8rjQu`!<0SwnXv{PqthddiTL zxPfN5sNC1mE{>Ia*~Y5-XO}-i6z6NEOfiNJyqhfn`@GA1=K^X;e-O|0O6f!A(27{i zDt;Khc?gaJQqCeX-C_@z(qj%M;vtEMR7V&|C|8l?z`*nkOS=V9G8;LG$2$^D<=n*0 z=V67*aC^GKQU|}|B`m009v~Bg^G{};bHkj9N`puAh3k%vhIb~3BlELj5+MiIas`k( z555p_nw!FqVSISF0UNT%Bi5bTfX)d7;RY&X() zZ8xzAjOGmRBJ@B7OgY)^;hEZdj1CHgHfNrco+b{p6byO)_U4ht_8dBL>i@{o;;OS7KQxk#qPjA)kQ_6y$;M`ZMIsMpW zL+ZT*7xtd$Q`8SSVeeiN;kRK}DuH}BHR;VGkoJE)ubix^Aj5*6Zg6P_FH4OAK}}#I@R_E*|I!^ zi(?#?Vz`N&sMl5Q3a`wmCIn8hK3NNI!Q1~evWCnIx4sV4%%1O4 zBfl~gU+_C0(iWD?g=#B{O0(?eCqJXF=)Ft}d~-!DT2+$nn3LGCjzHPzoKYxk4GLy@?yW5u-7xd?Zh?LA9Q4_(P5*%NMa(fYDkwBg8 z?}Edv<<`s$fObhX(EFnVapNyUb_mX2Xqi^9=zpkX{Ia0`Jozbs|6B>Y$5u9hPEJtu zM`>){qgc|+8?{bI#DQ{6aA=JS0X8j|_u8BHz|HXUl^uWlxqz#2x*QW3bBIn!4zcWO7zCV`N`JIvdG zlWU}Bj3vgrbsyFnQ@Qem4+13;@4JfD5$ixpjH30f58XCAo#M3cc10(VeEspj*_FY$ z=nsNa7koLG+&+tHdr<`N>zI%Nu72VJ>3Wx|G*_}R(xDjcvCBfKj z9DLc**^gqaftFx?1+aNQq^qRPMAQyVP41E6NkA{NN(dBPYpTAcZv};1C8Qunn&&xz zdM|V?fN=e>sO8`dA*YBsQ&X4obTRNwZZ^$wdtRoErNc^|- zTpKJ?N5Vu~X&v^8mE9Sc8M>`!sM2DrZmqEzcg_zBAg4jcbPPu-%N@aR=VZ>aeu(=b z6d7NuonEfn>7VO;t*2K+r<%ZWK-J|XV}c8KfBr&o-nCb!M#eJQ{H8!dp}rV!)cTeD zYq7bxFH)X+q-_i{;Ea-lwmSiVJgPo6z}72J^zKEtt>d^J3xaHmgt=@6yMxxQjcX`> zP+Y7Y!Mz&aZfZN_{di_j%v+)#v;Fn>msms%uvN3shP36bI@zty-ZMQBf}~lkg{-l@ zTW{>GYc#wgOZxfb`@gg3Qsn*Y;A*?*85a=ehT4C zelxmLUAUR19(SRBU}UVnp`^@$yN>Xg(Uv&cg@dHwBb??%d;H|Zz;$%h)A}_6cag?6izxV9tR3T zwj(?8EJjS-CE}Ta`S`<|bh06aI*AC8s$|RX#r=oQ*IRnz=2 zhw51r7|ntTw&wyDHnVSPQ;opd?Qd9zvR-TMIucjlWY3hEQ*u^tXj7_g&( zd6fp!Y{aqb*7=zbE_p~2W89Auw+z7V<0Me00=c7CAA0tv+%#XTI_`cgYVsop`t){m zyNjTNB|AgGmZ4A81;zCT$n?55N@f-DApXdZ;e8%StvoUAH-=IsULvHM-LEAXC>k`g zJIO3!!d_0O_=s&;sLIx3X{*@issoC(^TIO*zCDeXOPZdUyK3^+6gdP}@hV4<3Nec9Fj z@Iyzt^?7=2CY{8`9*OWyPqwK6?3shuSJ5L}{AiJUaiU$Z!PKu41*zZPW`#=0&iP01 zp2_v`=f|Y@dsgI`#xE{x3JWaRx!TJ3T>9H&vPS8!mk>9P*PE;`%5O!aG|qmc9A8f# zvlE$g2DVFokv_>1t+0J8&YXv(jqc?(Yof9pRXgjxkMfO}@VV0;+#{e@N=fG;ydy{FuusuaH~?S=cy zk05^JQitxKTKB=@Sdf7vY4_wkgJ*6n1WIAD?M1|q8Z_~tF* z%{0#&Jxq7@3$vu&y?ov2?QODuWUp6(Zizh23#NHV_m1j94TSfQ2v{@0!12qP`A?rH zDk?Vhi-D6d6>%Oip&DO^xQ+K1Br5F?lyHW->u6-ws^!XgVJN;RVD{lY_B~!Q$6BMV zTq3(Hy-;TkvM~S(FIa9S!Ja5R)R_7>D~eJsReFkHiJV2Gxr2IsoLH7-U{t@W|2&?~ zOZKiHL<$^d){locwUp(@^qEPU0I1r(W#|< zY&96y^qh6K=OkOG1MR&e)8iT)5@}U8YDbFZ^WU_JZ}}*gBs9@pr8{zW6Pbw)Ef-l_ z{8I()ySs?Z&LG6H znZ`|bKh)T5P%LB$MlZiND^XXz7O))ud~i$VE6wmZ%xUx&Ymat_Yny}pboLJWQ#+vM zDe?Xf5OOWGvdvOqBT2PEY(|;Q?3;P8o?NYV#iAm~C+8GvG(w>ZhDE8c+~_E{mG9B3 zQQql{sE*wJ@{+I11I?Tf^|jO0tW>7cw3_y-j}RGooVaj1Gwj9Z?38ucNQcNn88_{Q z=e|6F$8?4KY4kl3P#CaA{WyOQe+czR>tqEF{i``Oc`gNZT-EYlrOOe{3nGDp#odYB z6$mb?8?X?DBmtg3(@A97m&Hw3qW-w6V6L~!w;D+2*945Ph4Ed&VIBbaZGgaYS%U~(YlyXxFGN_)-!aJHxxBu%*Q|{ouy+x4&425C zKsZbXdjG0X^8>E8!5#68rSO1N`y}h}cV?PIzDOxAa4a_c*%nYtM9y+Y9!)JzX4;l@ zgoN2|U;D-klxm4^F|_w{xISyx`PQhrN6E!cZlRN>a&p-NSY^fJ?Aog9Pa$Fj`aA`q zQr2gmH9uRW(;hO0ZeA%z`oAkoWDKXpGSzb)oaP{y4nP{|S}WiBzLV|T3xn*ynsLd( zH=`+?<%hoz@sSYP0bBPu^D7&51;Rbv>T@uG#z2?eCTzxzoY30$e7ZyQBDe7%TKdv-+8`8 z@x*ZE>+!c)=c8*)Bh{kwxJ;RLb(m*%pDd&jk%Wdf60Rbpi#NF2s8xH9am8eu@_ z&|d8mdn|Dh=I(SRW}sj_6LJqtIv`IIUN(918P!u5lswKA)uvtxe_B&E5~Gg!Jjqu$l?%h2Ld>8NNndt7jTP`=jRbpA@zG}jfLbE2p{_$eWR zQO}=Umm8708rAP7`kK42h3+v!UR5s88NZ*rnIX`FbEqSL`nDKE%eY)xu=te)p>}eL zBUO^xA^A;jvbeYB}VBt#Ff$o$DFQ8B1W1KRY$CPXg3`wFERuKxy=2udmsn z_(IgugTQJ6D>o@L0I>)HtXL^uh^8}s-3~1B-}Z(~om^i#B>qCQ!H>dgg4X?TE`%pv zi1z3K-4uzF@)QLE?YaXZQRE_tODt zIA#QFfybN4(!oZs;FBKjA|I37q}w4pbPz9qm?~pQXk(_vg}Sz?Gh}qWo-SQM)N2zM zB7{G@p5l475FB->Jb0fO{Kw1{c}L*|aC*~l1HprMb((~2C-IZJlgPTiN=H5mi}mDY zLGGDx3gfYd7P!EZF?BscK>mz+GO80a@SApc_&RfJn$=*5)<`h318IHp=DSH^-!7z@ zKJxL_#~WHz<;8vXHebpDqb-Jrk=f5ZXx5V?%`&cNrN20tB%$s)7XP`FXmX3CPuu!8 zBPJU#oBV1>jsDBVOfrEI1(dkyf&fVp3IL3k+HW5Xz0;FZ_}tm24s_*;*PP1@)5+5t z(4<`P8;b9GxJSrHJzvJ|Mt%Ai`S?bwgiqntN6vQn5(}PYs9>%XoOO+8B6Ikd&9A>2hd%7 zy5?+s(#U7z!-lG;lbgG%o97ba z?L*}<(b`Rf%T-+xjXP|9dk4F2K~Uw=W|qD|@C<^=a1nI>fV$4}@Sw7hJ?)c8I0haC zY^nou5uKvYgB?UunGsGWZVQ5E zpm4C8)va1hiR$?Z=n42u*nA}S$H`#%$d}F0DW{KXEU$i#z5LW7IO$LAt#5utME_@4 z`+mN~zk>w8O_j!}OzcL90<``n^9^1|+uB(zrq2?}^%PS!GHwBZjx*_S>LgHk*k1S{ z#n(nQN!`1@pJ{QIBBouZo+`Dn9M@!OT~f@_^~q>uKZ-gB<0YI7}PdO8!9y-R^brJBtGZ76PdyzMDfXXwP3?I2JGcDEsyroB14woLVnFn07H zcYkIh15YtM#I+K0r*C#+M&lqhK8XYeO&>^JhdjSmP;78RBeFdNy86K8F=$Ex1w+;n zxP3Z?9Y3l-uL(fPV#|pTe*0eVuKRHuGt|KOw>9 zNd{S@2P>Qoyv!`ka+G35GY_cpq#`vK6ITBu;rt%5%SixspxwJQ%kAcNT8Qqw?aMgwp$hVnvDU90lrs2`JfH&d< z6w_h3Oc`nOw zP_Br_OYX3$hVBO6Z29+c%PNWUWTrg!{Pq_yJ=fLt2d-nCV+6BLpe#AcqP0iMc{g}w zCe}ZiP+YKduzG8`6A>S4=2ejlx=a9^=BK~IZL76yt9=9uUA08%B6eVt5*&vgZ!Gf= z+P@9h{x)aL5-$jBUMy>tr@jz{eQS9xKrgFWXoZjIH?5s*V-e*I4^u%X4i7qI@Pp-7 z(Izf|b5yThwzRNAQXejt_tDd&xM=fMG;BQU_+V;!7S$629o))QkSwZx`tl3W5o;jk z$1~B=@erZd_kuxhQ5U&>eq>^|b^edo;J3q0D9~=_Qs1IuH5n*WATkX{!lt}0Ef@1G zB`cT2vcF0>eL#Nup2z+|$v~;mGxwF?a*JN;d4&Z_Vu|{2T4qAo$~OAKG;@W?wK)D! zw+Oe9Tb*xbWl3t#9$uC;WVxm~wyQ7i-{hc2sq;ZM&rLiXc*k{f_YN<^ix6lg9s6F8 zZQZi&d4_{D+K@Kc^uFLoe4}$*$P4#Slz76#Bh#zU^qQ%G3T4?gaXDyWhiw_(IriR( z^<{&!WHaXE^iK1epJGp)U0TCAzHx=U*pn|YG<~1SQ2bcTI1QHQ!5!#B zoS8Tibn9}Z`$?Wk@HEkfmx;scZSEqdMH%-r|U*)R7AXm zazd0E=_OvKcT|PUf;KwNRuwEYLz`(ll&h5GpgtS?rV253TA1U8y|x-^y_1z8bkt^_ zXv&{x7moJzIpTO&WN*j19Jfi*0wEm(`nFhD!Q~zr^H+%@O7|WU5KoP9yYgO8DL$!+ z^dgD-kA=a=mlO-Q?=ahslVDlhxj#kk2Fs+rKUQMIUF986Go|nH1h2sS+=Z=bhRV`T zl6$b-*68&bPMc{GK`r-Ytku@F0j5u>N0oi?D=dfvTUs58& zGrzZr5lx@`WsiXxv@<2eZc?4R7X_sGoen02JVV-ZqJBGvhsHIO`)JGcqe_{NE(LGxqm1_l zU+UED6W$uB)&zmBuf@5|k~J5*C)QHT85*0AE$_egk|JF7tgp6Q2;*TM#&{4xqTHSa z^W-fb-zLMo-Xz~kPrOy>VV?P!h@gpqIjse3wO?J~7GO9QNP@>-QP>vSfcF(HPf#pH ztBt^{1W!^^?BVW2V%%m2Gc5#^*8o2}VKqvdjk#~&irFF~4e z5jIPpEt>o=7Em&HkpE%ZF%^IoaV(N*-?+szhAL`m|~Bj;(vaq z!ob00Jz`H1JAbU{Yj$VmqPsR7_N;tO-_*N^rm=KNx5RsQz9x!_BnG5@zYDRsJ@YG3 zIj=MsyCZwa@sIcpj*>P{pRp5kp<3!KbQv+#CIZC?m$2}>y%_9$aqv6rPH_ekKDNcF zO^IBo;;GJH)b104E4o0bVqJNsue7)EMS3B+(SfiI>7*I3bn!k}JrYCj`tHR})DRjm zPO>*$Zc)iT)tan(Dc}FLaab!0C4zBK`GOx_m>|#Z27Nn{`ycl@L?^ zCORsAY1Uv#yn_m0kPrpzP)Pt1qcwql4JTblv40}{aA#uXH%*8-rX)m{0AvlYVbDXS z$h#nm^r+oYL&270hk(}et{za0M32+1L_2I0UK3s@&shzU^_9&uWJR3`ls8K z(q5(!Kh)blwZ++k{x%XnO@UsCLGCg**@sTYlxop3LN=nQyCrCG(qD`2m^)xZPRTEP zbh%vUQk1_Wvwx|9`BYCki&Bb8{!xe7kFSy!#}LGOgJd~)5nWSD$H0M$DxR?ggC4(V)l60nLI?Wm zdhDLmD|?QIR@*W4xhm!$cp{nUnvX%vHB_$qwRqDwXVme8haF)_w?6g_zF8_(NVie%Tq<=-;&#}#P7k%N`QmI$!EdUktKe zW=ujX>Y!mWwXiK^KOc_foJW8+L9k+Bnp1_X2MM`b3|` z76t*S1FBZy=m!;&JS(lz!7Gyp1xftdp{|N7j*v-2NxTaUUO-*wB8 zbts{&m5BP_+FKu!cu|knob}}Fg|Yd3x{gmKMElyjGf;9v*C!7f6T;ZTj9lUx%th^k ziI=qSY{{Q2+$9UkT@$6j68BkyDaCJXWqVV1ukwpu95E1sUW!8TQp=JuC%J3ZoV3`b zXAZNzmdGB~*7-7!B;{f_e8jFWTKhRX zkmdz(%dtn|!r9E1x-SnFPFZs0ogbtIUnL16oppE6XIrR$rE&#gR$5Z3GR#IxU_=g- zDs={kB;4_Cx3{%>^-hr&FAU1Hi$Re)3B5dwo*thPHGN@%djx?ciJh4~dY~K!Kxgrp z?9FFT5W7CE?tPGiaQ*O-;Fd*PJARR;a5w`re&Y%x5pHq^XK&YYqHi@hl(+xV-pd2i zV2S|%Tr1$hnTysc7@-*^bxLsm zv`sW}R%@SEF6I}7jtuag;|=Bv?H7k?49HeZjWW4YmdI;q(kevy8`;{g=+f^7Jak%= z8oQ)>*Z7JGomW+JktuwfDSz-and4J4HNRLPvz=Y${?3fH4$Ytd{8`TD1_yYImdNjGA5QZx#dwoY^RbEum*EcDO!Tzu+dDN zH>z1DJ86E)pmDcfz+U3=`YA(|UD(~gW1)|_YDl3OEa|;_+GnSEh*D?QoKoe(53E;bh#{(V;`y52) zN95rS;D%KOt)m5ydwalcr5bQz2PP9}nt*eNAY^wGi0|S@)RhiE7Ur7?2MEFyBu<;Y z8IKn86e@`XQhAlhoTxr- z@+<&3zbzOPIrl7(^V=&mcH6!7|=&AMJ0!gb$Ol`;0BbvMc(6vC_DS8f|9 z#>1jSn|O}oiadj(`KK8m*IwXS{4|uTrKgGg>5s)b6td}AEgahdb9Tk7-ZWzKj--B`NfBz?l9N*!{7Gcp($1sTH`C{HGoQ%sX$V| zOL4^MLE}^$O1|GYw`zndouI3$( z$NdO87>8QKZ6PybpEG8Zcf({!F=y-^$ztWZTT+=+PQ879B3?oH8y_wB?K;xetj|>G zqoQEwhesK%mWW!9-RxrLE9%UuQqIgp0Qz)2wPE>~5er+!oa(wMVC^&DFbV zPgSoIZRM&G5gmC9u9CTX>)C5fSt4ShQ%>A}hbaGp_xfv}$!{vuH#(vA7;F*C+h2&b zM82(SUzPRs_#Z#+8`%KA6JFl*6mWgfv4s+)RpeM03~@e*(3w2-4H-?=cwwA8#{2%} zDiS;FS-Bs5kiPvFh$ObL*M`(WXPz2WdIglB|KVkx*Vp6Mzwmp){0WutFCGft6ZB8| z|4$?U2I;Slze39qBG0$h)^&nTOH?N>tLnp=?Ieu-KiB6zuo_{C&Oy~6A12vz+STLX zKK-7=S;N!=@wWH3p;68K(5)*4^Sx;*8sy^TgP;soh-wjAZt<8Lq4UfhXen+U{c!c+ zgjs*(+me)LcL_zB-C;UL};RXWd1@@fdn*8I{nRQFXGxpFOU&rV>Ry zJ}J>RD;Hk;rhxZz@|EJP36f=4=&2D}7DzLs1#+z>6-x7B!%;c8zcg15bQ)7q3brwp z#%p-iq#N~AydtYD_l|27A3iXv+qil9GRkZX*MHz8A@sI$v!q7S;6eBlSd{wRCsbjF zFhf9F`Vb<-Nx-K36b$w+O?b=`?-M(jWdM)4kSYE$NXh@j3Fk4NVZs`0b4LUs0(2?_q7DqfBd>4B@D%I3r{9+iQoplej!33e7*@iybsMb&_q(bg3LD&g< zj1(URL0O)1Q5$gvNx_It1ptk$WzzcrkWo7=miMA(<(LV<9oqV}as%E1fz z791LpIoU*ZBpRY67Pq{$0s=~DScjyjCfs{VXR>Y5`bTV4+F9Gb?7#lu$$w742>WW5 zzPz(q5wr^tSErl0f@L2aX$7F_1>hA=t1JfOWc2TGG`TLzld?W`@bz8MDm0>&-)x3^ z`SV@Zd|}_ZNV>Zem!D>ZWPN_Tnd=i6D0!{c&hz4K$)^WoC@B?^K|=nQLnz%jpQsR#Kc+~P4^J_ndXu!J7Uab(@$assO%d283`V4lX^I-&ri`1v zqzYPX`eY8$(1$csELX@syj)xweJ=jP=22dQq0!(O3_JiAZ z80-_gldnFu#Id|~3{g^ebXN$}gAHBYKE<8aeNH2~Lv{aG14{|WBY zXa0(bW0o&Xb=iUPbPChg5M1}XkCq^&#JWfn0I0zlWxbm9L<{1Suf>AWawl+5{v$nZ zh_zne9w+C00oxz-*M?-m*^eXyZ?>-;qVD{9>_?(e?=f(t2E!)wZDzJO*`z!+BS`MUjP1| zuI~RSo1dIRsC-f1i&*W4@11k9`w-6nyRIhs^Xm$I`Rnz`30|2Nao3eLe?#XptpPllfo_$h&3BmjiOA^g*@6vJZi&!#qRF1WIb7VmrgH7ekl zfxCSnk|c$!`F?fV=ymRs0<)(l{a2?BY#h-gMR73s)#);@N${aWOq@XCv`SE0l)eNU z;HN19!0MCyK)de&7f$ZN;V~e_vHA>XJ=Yepc!}h2{cC2*FL#&w=H>vx#{aEbFMG1T zZcnm|fPLgs?BtDn2cg;~73B-82Q=c|IG}Tkkb#55c+Xi2>K1q$$Ap zTft&Y%59{yoXMb6>wz)r?myM&{&Thue?j~DZ6L{?O6gz8K=V9rna|Q+C`rbW(S~jR zi0(*Q(tsJY!0{n-?ygguUSxNz)%z1~pJcW6AMHBmE4QAi^eeckt>GK$AJm)u;{WMy z|KB3xpTG0pR02P79{!b6$@l)rSEed#WW8=EgeRDq_XNRU91-n|yfIPzlukf7^|;mi zO!IqLA`$j7nq_?}NpC$Go#F&0AKrW9#YvZUIkZ1ri#+)tU6^0{h8xK8@nf6+;{SB9P?cC9q* zn)}tc_(e{#_zY_e$2{aMkvMQ%71p4^FE;^x3c!-61~BwbwU!RdnLMMsxGfe3zU-yA zGdZu9sW)m*yg?(SR1v6RXf8249fZ~mY&AjY_vx}Ou=C%|H~3WNK+}*(SPq9O%mw2k z$`5ISz>2*MB|A83zK-fpVW6mjuXfqv3@5maASMccgnw;r;N8!{04ugLYWMS3OsaN7 zUBf`l!mISd1ISkdihO~*y#-6ac+vS^5LDlE25cU`>I-05{9oy+G3qJb2Ry9jVQwl^ ze)kf-71EYoj0?{IfZI}r^%N7sSEctN5K&NTEQ^}l7b1ms#Zlrcp8__4m^;qpEfCGp z$kfK02mT9m8>Sc_CunmAT_cG#e1!&aUa_#TM_o}#Uj(x5Ay$9Ln>#X=JMtqR-mkDO zzYntiN7$FY(6V6BssFii!jDMt&w27B%0mwhbnCg&7F)w)9R`Cvw7L2856hlVEZEF2 zAx?4Zr#GDZTqIZI-@(E1q_T`I@c5A3P_v4|GHk<&hcRA|xWr!~z3LG?DmbO6Z??o| znE9%H@_BYHIemBUqB1mBc~gj`lPMZoO1)=-CY?cADZ@Byaxc3cF@Du3XBgEt@`u+OC{<$w}T!h;%GCM`USqH2>hh~@0Ng; zU>W*Kg4j#DT~_attBAUSZEhKBk{D)sb;$AkZhg~U1$(g4jTqqy@#XfF zpwMq>b8(;3mV1qR8Xi4zxDoL5#kNsiNfmslJX8I0*~7eBKz?ix=6y>`ZWf27#lPv) zQ?PAtR%O65Z-opY+SbweDu%AfooW#b(d_15F>fPum26h#eBK+jPcVPY?DHLFQ356M zTc{f?7yKpWZ{U-1)^Ji{qQ8b$`nDZNM*CHNU_Bz0`wgxW%inRIfVGh78&oLB6zQ*V zom@S>#dQ(`(zStnx?gaen1OWm{}0zGs{}z&sN1#2>{Z}=^&nuHK|pJ5YUjBKh^4dG zrV(U^mwu2nhL`9zm!5Uo`8brk3VQZ%kOaNxQcc&#GIm8$^mU>HpWFsZ(B$?Q`EI?4 zpGQgyyW@@KCV9ebdwu4L*W);IRitC=u~mOpTq~$06x`>@n~kux2s0=an<%@=l;DKN zK|C?MlsZM`Y?u;`7X^U$XEY?8D6;N~PjAQ#r3`{#R{ zp9PlxTt@%nsmsu88gO@DcE&Fyj*QH$yeYZJnE|>K23?;a-u%$yC43AsGRo;(I8EJvKaEd*oQFM zr|doDteTpq9v>$rI#Qb(SEKT8I#2z-+8X?4JvZkNu5KXM(sJDl5zV`{jMFrb`V7;H zCJ0W+RnfdxU}bh zY%5cBP2%IFS&FcIq#`&a*Zt2m%|9Dv4Ka=7j5lrJY&+}pze8pa7sxT(H-qHiWuWC4}1Pxb4I+ ztbG5H86;Dn*TRs$x5({0$r;N>pN@si>Fl5QLd2-8{MxlwrGHk3Q6RP5j#@g(WRU@H zZy@+|Mp;xl{GQ!F40o*pxh68n7Tf6fUpQ{R{f~ZkzI1?4=Jwd0>SUpaBazcsR60($ z1CPZ$wTX1qa-;R^5)na3>F{ir=yx`N9pmc?TI%PYbocF3zehyUx8wAL;o?%{t(O64 zzb8ilB;y~UtY(*^F@OHm{oE1$*%$Ob{|2RQ(XkWVb@<3s$<38Rb0ISW%^o^(KF<6k zeQ*T44X=@o&YmWV&*j@0n4>)r=7vcVPa^IM4iyP%iW*pTj^KLi)_~&K%o7_hQP$H= zoNsa+(?4O%;s<_w1mni&oFmi2?$yW|_<|99)NXtxD3>*pA*$oE3)9ti>uIk&jwG`C zwzm#_XkOT5ble|U@Tn9&O!nd`$$sv7L`yUB^v9+59h^mnol*3D2OnF6W@uDKMy3Z= zWx^`?WEJ0Pz3-e-$Hg>^r1}@JMFrQG78hP(38B3An1r1rt_JSmw4);}L8{heA`$gy zxdE2L!>IPbSpM$uBXS>C9R`ZHt6BA)*u)4vqQ2|abBXrM1aT=IF6kt&Cw*yYlS)t3 zb(->$?u!1ax}e#|M}Ty@&aAifuvOs_93| zOJ@~KbXo4T)n5$Vz7{i+k={4mi_UE;xE`#M7uMMu9x9y6Ag=V0DkanZ-igoK0{&te zwuhx7@H(lzMA!$=#gfuyMyx@I#lYc@OlLoN{|1}}o@D0`)~ zuyP^9cZ0pHW+13N%klSf_T8{MJDaTCj3Sh^y~*Kk!VSpjRAgut2V%&a;OCZlWX^2u zpx>Rm-IbEv?QfBprT9Ob62Og4&^-q97oue#OLfaQkES0oYf}N#q~HBQ)a_rXeHVbT zDqkbi0Qs7-^gCH!j~{&^nnG*cX})B-qCJ*TZ#d#**hZ%x5d2FH!91TxVJXy&osjKnz} zayAb7^DAWp+otJV6rT>h1PYqYF>p1xUH+_T-|QXn_|-Zp9QqHKDw`XeyH0qMpIk87Mkck8$0WJS=zaft>|K>qISSc}iZ--`J1cXd z%(+dHc;q7duaV!NgUUI7SoC_+Y_rb)hrRcJifY-`MjMbIC_#ecoCHBh0uowOl86K) zM?qpsmK>TOK_nv}pn&Amjf5sk&N(AF=h$SLrhUt^&pT)DbN4=H-+TZ2#{J(oqsGu) zRkPMwUCpXhv%YVB6GQ9i0+#9t&6n7JM2_-7bbCFnjhJgzv29VR-taUXQshq!v#k?r>yj{ z2IRuj*={AkpZadUm!?mZF)BWRul6a`L&40W{x3y5<@+vGl6zz&mZ8{63J=u1vG$Uh zYNB+axUOZMw7S=#G@DylSfUUrd+_lf1E+}NoiV5x#< z$Rf=?!xI@0iwY$THF&KZ#`k?h@^X5{N{PDFP2**+eBG5Isdy6U$%fT9>2?L1_e*(c zaaHSCd3a`}hU2!NI}X!O3XI|~GvNu>!-Z66Z>2?&_A;+M{9MdfPja-)NpbNzKfbi0#mGgsM+dP-2ek3}SMIMYqY9%l z@uar(`K48bz8|%VjLGr`=jzE?>tY?a3uL~%{ju$C)!IRR`Tn$`Tc}WiL+n(MN7aea zrSvKtXWSP(e+rvetQy+RAqVMzZLvE04H39I)7_38 zVK44HkQlRgCvy4Zy16#r>R)yiKx<*ksBzbYiw$Q!;-P~D>~0=2U+ODI{Y&FW7?IVr zuWVl%*^;)(AXRtDk|Bu+uVXm10mZjt7;QX!1X$yh-B1y_5+TCh@cB2huw zgdd|O3mx0^7ib!qnN+Mdd1mYeWhvA$xX)d>O~#<);`S{gcun6$=lWok8gbqBm(e*@V-UHQ}FKPo}unrpn44 zr5aly8x>I7;)o^Np^1(o=EZoJ#-Dyw{ZBrK{~IQQ|6A^%2nwDkwD{!wnBCj+g_fZE zy&;AB`7zRETA#>cWA)=GKK#e^5>TUMwm^gKy{DkK%C+>J5t;&lK1^`VkW=BF`|<fHLo4Z&xuhc{`y+c@0>rESg zFQ|%_>@E+4^m*MPTPDlG_;&eIFW+k;D}eWCmQp>nF8X}bmti^lJ9McjjvxIJa=4}m za*Yvv2@t9YU(_H2i~2unW6dhe1Y;z5J(~;#Mv4%Y!c)f)MxyqIE<0w-kDgG6; z97_yOE;Iv_>$O)7T}ec3$JDVT{Tesc|4FAB)ZSJ-oTy!zZS+mJHLoX85f7)Ieo;KE zZgqM8&2JMLE$xXu-%Ggy#e0raIggT`CEAP;sMQRL*v z7tolke)@0m!LM@F*PbUa9|(b{Bu|-L0dWwnlw?(;_xfRJ{%gsAYfs-to)J*?E9bh^ z9l1}8-eZ&r;*@PiUm;w}9+5Qiklfx(CSh(SXI;Wz3-~1T^==*u z)oWWxK5b*b3T8g+uD= zkkY9ZU)uBc1m*8u4w2OEZ0tl};^yYuQdLR^mPTC@**^Mg;Lo}oowP%r?CsU6tP zY}WKd_kiC7>`8cdn$~C8Copm%rOS55ZrMi#gvUpdM;2gx8o%nWXh**FY{w+wDuLL9 z6`2_7D^2h3nvQZH@!tt2`*Ky+%5SaAU^_~Ozes3dWFN8xrEDUZiW1&j?LdjB8@Gz#6cbl%W3HfQ>8BSTNxCnbu zswdL;&}XNRo}}d(2GLaUPEsR39%5d}tuliM@H_KM9RnU0u26(}@9S~gs~6>8?ZKo% zsFCaLi&GEGqmL~|9Cg(=2hWkJB5Ut$u#)6QGt@e*9w*DMwLTb6;VZ+lV$uwk=g7KY z!<0&ym+mD~>u(mRiM@zzZ>B-GwddCeQzl=$zD$1G(-X3q;|}S zrlY=+e3z4#)~f~ez8Y(e2EKwNb<-q_F8d2p)yf$gzUA#`=lPUM<2~1 z+|(`gB^|lVUJe8d3~wBv4(%w<3Ur2R8U~P-l*n8df4y6~X5*Sb+d3FI@OxI8 z$75d>6(q$oq(7fjSvV)`;CCMCu>^^}x*;+4UuqgBl1xy^?YzBEH)eKwKAX2T0Pc zejfxyQjS1hpRHls=AiQ+KvIY|7c+tEM5G5hHXMwVk2?}NSq98 zv!Hg_+THGNrc6R!E#)2IOpoj38BJ}(3(gwc`<_lCw&@Kz6G@XiOGtzrw3`RDpM7Gp%46bTaCC$4Y>@IQHqutTr(%>6s3G#`Ws}R4r?wYXA&B(O_ z;WM<-q5-3sULqVH%D*eSJ`HGdztKVj$kjA^6DeCC!upDbr6$a5u}_9+*2r!}@3>y?SWA6`@2G}`e|PSc zT7*3u5HdZ9ksg07c6}?QvC!=PF7HC$~5KofsKQoYW=?PBg(z^JMy3CG$zGo z*loLk#R4`Db&;2mwdDLHe6VkW&BX}qMyA^8xp<+rhBxXG;i^bi@&mNg5V|fL$)VA-&K17l$upa$?Yq7^4p(U*ot&LH|5{SayTW=zL+_V#^yFvx zWqHQ?_AHCtWLyP^ID_$TOZnn}dwevSywqa^AUQ42u>=>!2CURoFwcVYEk$a?@FI9i zsfNTD)PKkncc3vZ1>=j;lH zWNc#fA5zX}IU9`rll?@~ty#9x0m;a6_TIJKsTFQ10U)QPnl|b*Wc!GOZReek`$hTr z6OyME;%pOaSsPcXacYJ0fZMc^bpN zo+ckQ*HR#>awKu}^AFHQ!4;f?UX7ZwILkat@JpfkOA%vVA02>QaRWvP9@yq|{Q=4@ zBqiwVO<=Ir!&1B$_*$Je%;wsb{HQ= zrG|=VHwWC_b&;h2^b5bcoN|7%kQo}58)7a|r-?HA4kc$)EYcZxQ#M!@a{EKjD;gK3 zmi8FP*c%2uVH;K=4wghbgJ7hBX9Qnq8XeUF$~ zVNfU8*kt)|`c2_yU71@Db5+wNwmF!CwBQ(A{kZI`yzoiBcHEU>$bjoBwLJz5i6yJ&syr$*x(3;A&2ELbz{ny4V`(&LF1gKkTD3{#RW0Fp z46h!nHv*Ej_{?L5u?7t+-6L6xIi$3=1{43+{Y&Z2YXA0 zY|%QXPg`t-)`}CKQ0A4Xbej$aGqP4!!saY)vs0hDI-E!MzDA3dd@CE?HLH@3xH_Rr zVJp8G05uDa* z7sRFi*9UF%AmN})dyi#po93c;lK%LMLNKU+K6azVPrq|a6s|56Z`b6$~Gfx;N zDe~FK{D?*^tgCxe4OLrOJKNuVLT|C9 zbBYqlLZ|CID)94HT>|U!ER(j|5w_mgk)_6l%UpBW&%6sCk@3A{{UqjJZ<_xPKE3~X z7XA0_{lf^9)IMo_BN6602}UlwKsd^M=9ifFNe^A%P5Ohu z+%ZL(5Z}8Us>DRzPN`i8X@)Z&JOpOEI{a1i#}J%rfuQ+{adGq?cBq-$gM5P%Et)3~ zR8)*CJt)~b$YS=MmtjuL7C0NnMG7gs)tlOMNYimWSC5k3XO2=CrUqoO&Jwel_;!xP z25#4&^AwoeNENs?fGqBPXOEuPc)kdoT$w3^$;8ES(MLBNb+*p~iHt~xk_|^$%d1== zysBoeI@{Upr34+$aP;@y3TKZa6ku?J$*vI^9YKdqdXlG~d_d!HSql5d#Sh=pTi(sj z0Fk^jtfEkBywOT{+aawfKMW)l=&`IOe?^IcySOcYF#j&-g!1#E6c3+~d}T#6Ruqfn z#|Vvvr9WM_T8CW?v7?yc7SkEcfI5Ulju0LiVHt>2gxY+dzr8tFU)d(ViR)hE3s0JA zz1Kxd1@|*$Q~>O929b-eBi|xPkxIai4*HiZKrSROdSU0ScL< zti6ZaEFZjW`)cB@n(ss(-BvVeJQXcB09&Yt)F^*v>crXUABNXwKc^o#2859>-V3z5 zUh4rXQ=&_}_msl;o>LHN*$44uf9xDCEN){^Vw2f)!RrV%82xiq-?d&E1iGC@)GyoC zqjE65h^5*4KQZhdpv_bi_IRY*7DjIC2Phx9PGk`2>m`MoolKSEKn=-jTfPk*&3Wya zQgsS%Z;)p6A69<+(-s$c(N+;pCaM8+z+HE&vucGIDXh~(I6ig69@ zz4aB|R71#}{ttjT(%?C8wR%958G1<#`3I=ox(F!gp}g=3- z0>L}cYG{En1g5X5W)kMSM%~SrUwH*5kIQFMSdMjyNdQM zs7w5Eez$I7w=;~?oe2HdCjj{e#w%!?g$`$(<+^V&9A(!Lrcw;3!)-jVy`=wq!_kU{ zEMXdtAFynq+~~>=rF@_PP-T62hvPJW)l)tmASn8RHZZU}%kpGA`gXCX>33AX_mMwQ zGajYDwOFE!a_zhUx5Z%pc7Lr;e&@83OZF(=0a&b?Ky1 zBAWVYQyAWrdC_q(^d*p{HhAI8r^DG(dz4)c z+dc!!Pwe_}9~f-CAkA{-!&JgxI3}V<=KG1e!T1@Dw^L;QBp?qLJa-^6^#fE1$e3S+ zLl;ib;VwTwyI_nzB=rnHd)W-?FCqc5f6{UF&+>Cklxw^Dr34YPx++620ADrz{zBAM zbwan($%#3FdfZ81n}@C@_ciQQRaKPyHAvOR6UwFP*(|3;8`qwQdmJ611i`9R^WL7R zFt^=`JpHp|gG09PcaE(7EF=7GOonS)h7zz%EoshiuZ|@X{gAW}XQCAR@ha4K{+X+y z@=f`m9#Va;p|2P&DQNRL@2HnVX7>AxW^)35X)A3gc}o4l5#?qb!_aft!G+Q*H<^SB z!SDKw_EChxu=~S*uu}N>rQlAjb+oZ9Mz0QbHA&cfT=})dqwAdWhqTzgEyF0( z{;l!xXGZD2ZKDW$r1UL)bByMMzUZT0@lmg0{+4Rtp0y2n)F>|5Wrze}3-N#@Pm5%Z z-Eu7cL}ltv;$7>Yzt=*r2VP*n?f}QLGulVOdzgmB(xn6^d1Y(L4w>Bg^p|tNPXJ6Y zcVS>?PnrCF6>g_E&s!!HFY_u1dhbw%95&5&zNIp{|Qd zf-YXFv!`HDQUedCBfgZfvfY(QgTnC}Shc?SRej={)|X(Jaob%4md%^T@0rC|9`HQq zw;k!0dfC!w`<=ox{CnH5nu??g@;s(|q=N?aNlQf!!RI6`jJ!R0<=zJ5hUDRHvshOy z_0II6#vORl%nN4R>@|X54<^8#kpaQyf7n6tu0uWC4JPPLso@&{vX@K2o`*y)7AS&_P zNprk;?A9ADLNr5@B)Te-H&%7igP^uXsQqqOohWt{T2?0C&C75mIb@YAp>1Ca9Fn!p z(>nI%Gg)8-CkGIwqr!DuAgVKXbdPHK;|Gw!E^u5VJu1yKW3Y9j-^Q76N}0u}ULux@ z9V-GVpgn2RS@)Uw^*QWQx!7^9eL3p!WOgo_&I=mtQX%5cy#O+oXJv(3_Uc2BNVwBD z$OK_43AS>$8&&ph*j`;6-isD`)g~z%LZ+c`#p^qDud=Q~Td`bm+GPG+{4|(bhC;gb z5}g_dYlsm=4`!<^RMM_Zamaf$ZERL*6A(%(y(Y6(+^-=l;Uk(j>+-Rml z*qsMF3?%a2!kz3xyb4RyhQ7}WbP?fOVuMp9}r9>Y)q9%kM~}Tl*~5V1NL@zxfUj8|GL2T%XG9efkm)|Kz}|S1%Gz@0V0DgYe7+)08<+pQh&f(f9b5Urd>}2n#)U>e+)jl-+HEvclq=b zn+3Sm*qx>Z(M^j=GLlQ;Z-hTf)RA!TmrSYaZuM{Lmo10ux4stL+)qG<)4E=|P%dHs zt;^_)XMkv8)3@xOrPxbxQNoU7vY;bdXSqLtRII;BD$wLCw#8+Z385AJw6rsopV=YR z(UY(h$rZ9w3$`v|O|*!W)_<@hy?jF1^aIqSv&XX0PKJ z=QR6z19x3>lK;%s&FsTnNMPH;&NZ*Gh_tXJ|-<$!k@+jAd zN<`}F;hiAp#u3S|v+7vc9)+H^a8k_-ooWLa$3T=t1zq@mrq<4jcPk${b>}jrqK*<6 z@|^@szup`nFyJQuq-jwk^OwSi97l%(&=tTc9-ylAY&SX(HM)Ebg#79v_ZXlZGtgs| zODxFfQO6h`|B%RnI)&O1A@4suAi75rr5D3X5Ter%z7sqWKhEYTURiE1A=c&Y-jFE;OfE3ZgqZv(makau#R~XQT6BxZ2-|E0Z>%+zxJQ}rGM3og+{W@2<^$DSd5%1K^UpnQk6*G*8RfC zzO?ZQvX&S@-^a#&?F^l8SDjJwF=xDQl@aXGAF$c35R2_9d)^HGUJ+b2^h^=)o(qG` z`@L4WwN@%S#!W}4$MvIfjHj@V>0u2MO!S{_jfZ3v6#%G4mT;Hb?Vq;xPqNIj0rA;gHu&A zLwk|5S0yF#ctP?LnUbhLcCBPDX<+h!&s_qkfd`b`E1LJtX@N9a)6mN+K z)YN)~UG{5-@3+%l@9D7z<*p`4xEWe~HVwyHH5UUS$dnms?+Et+dznr`^dR0YNv%bl zS{b+Uu`jJj2(TB#2QxRHevwv_vXeF3Ij$KPmeD})Asj6WVWR)RThr7C21C92N!Qv7@n5zi{$a2F|HNUDCMFsfNKU_>j&4^x3{Cq6>L!n!5=1zeu zH4~7!a)6kj4V`A{`kO>e!Qbn$@~n0LU={Fx;uilGFN^-7*yV zm<0Kzc=%4t`%uarjaVt!u_$)xYu#~}uhA)*ba31tDqjt`vrb~yRsG$>zi+}Mkc!(> zpCvWvw9i&2sW``z|jNeu%D!(kEmH^sGz zx&ra~lw!w?IIIQMg450RR5a{LZu`YHCV2SA`8V&UCcCbDpRSLkqGHaB!1q{^R0E%E zci41ROvGKUTxh>|08O!;noag2`SkwV3p}Wi$NI4W>!I54qHLX;E#7BVSP`2O~L_Ms>}JDCVy zAM^4%wb5om`uyt}L}Km6g4Q`9Hx`ce`3_GT?WMq%Zh4ZAPX(-IZ3+ZGP7B3tm028Z z7`lFO%NIK~KPW5Hrjrcjlr_o|!)Szzy?ZOvwIW03@7L=@MH}&Dkx!0SF=Cx!&pYiJ z)-*tDIiA_j?XAl-Fx6BfeO2J$5xQ}^Xt8#hl+tWDV8dWVRBvD#G36X|thX#K|0u{v z|3!k@quZu^EDt2_IfV9)33u~7n^8YqA#jA#Sv@~4;;r4EEr1k=eA09!ZbXvjf4bX` z@2aw2m{1;P_C2>f^#i~y3wJ;63(l3vMU-g{i_cwD0Kd$()6h3_NJkm$mc>Yp1{v!+ z`opT?sd3J7otC>nam9X}r$ut{t5RSUouX_)wg(KCDhSz(EtE?+)3%VUCFVdd&REJ} zL)m}v*5OLI!sd1PYzUQZF0@3cDt3?e8rhz%1{+UxoH30;;}v`BE#RZ`wM=7w!Ryqm zk8I3rta}C?=IZ4!SQA0$^yJJd{MgJl<#lz7H7$R69rP6IBqVt`J(c#65!;G5F3K?I^vsbLZeHuWvP9GF zq5Dm1{r~<4{7=pU|35POQsb*Xsf4w`N{Mcjmo%|t6hc3drCi<4qAs{bYA@^fn3A2$ zUrhM9)$e|1Uc`P@Rq&HTYU7@j;NdikTs6ZDHiq~E@#uHx`)Yd58Nc3wQ&X|S?J<`K zF^wb9h<-%UG4gz>(TOHvk$UCP=quIc0}+aoef>7m${=yoV;zdS=y2mr!>faK(V=>m z@y`e6#c(h<-38W18jc*4e3H`Nm=MHV@Kd1fz z5MMNaWlT~{(N(~CE6#zAlNpS^{Q+9m23)QX_*N72;I`G=af{|R$k?}^P96B~PF?C> zojU#c762EU!&K)S7TN`RJF8rn{Q#AYi8j?$=Uf8B$%!qx1tS2xn}BW~uZsXhI5>-C zL%~Dq{h10z&RYUgLx_a_ynH`!`Im-w7Qp8;M|-Gk-bu*M(jn>Yc*@9Aea*2LHyl50 zlH`!SXG!_#^HnWDBI9aYqZO>`5t*4D%UV==Zaq{z(p6$ zCR@`Rt?Aa1Y`z0)c$xl9S=BSGalrK|H@cAe{f{2kf0jRou^J+P#X2Ju;9z$`ql$Ek zf%Op)6t!I6p#=k0>Ft5_mySluJRT`I01(>hY$&Pcrb1o!3t0L`2a^Rr$^AJQM)}H- zu>Sp@l07f+jJbf5aB1xJ0@iK29^)~Cyu)Ao7i<^mU5<9js z=)-f(qGr$0)aD&X^s1hM48utUGsYba0D0Bbo4-s#yzP<@Tbo)9F9K$Xt60Y!_~)y& zPHp9@Q>|JJ{y1NwnDl*tAgljq^v(21jou~y*aq?AVqWKUhYs|0kG43M3X}nCi2j|4 z=GMSc^cn4OIM0sVQH@@v_>C6W_?LFJeGyyOXmuOXc~a-hHTJZDi5>VqQ=->D?d%yN;e%tnoUn8hb*3eekU#^-Rk-t&f{B%xv%&zCxv4K`7t&R0u$buF9a%hK zt4H~o8z(U`k1-_x9m#`q4K*9*mu@$!dady_jo4f|)5C-0K(e59_Ev&^ zulPq)^u{{4V^i|x0!udQ+$O>GC6Z_obO{q$>j0k7=yQYCgb$PEJA3z>^arRL*i~wU z^8CIrKrh(--FL))yYdgcncs+V!ckKgsh2-M_E4TZSpCu%(<%PJuj=Bn@+|+oKKUmj z08A%2q0V{?&fA6A_mB@Mmbp1PL2J>&pES*mrw6Ag!pj5OZ4oVJ87;Jd*@Y+NCFf)< zH>1+x*>O^Q5Ky&Lb>z+F^<9?m@a9il(GjmSOo;L4iEr7miIxjIeJ;q-(Y|cmm(Gva zBanGhOjV`X%|lX)#XRpLS7n3WJX!t)_ef2iovc<{+L#C4=tqQN%6io|#rf{(>mjOz z2dh4nf(m5~mW6*`pWRPAoc&zDzZExIrCQJL;W;j`ln9qApnt7n zOy>V_UB&4bc=T^-C8W>?>8)CL_vG%4j=bR-uSD~V#e~Jbr4O8TA&~(TIeP8RMBsD} zpWADPuXXcX&Vu1qm^*{!!P9)!;(wF$kk;b=W+ z5lw^}O@BW~@*WoTC4{K~Lsd7pm;J4NijKO+ine^wQHFgfViY$pGpeJX6Q3GH+q0h& zyn8fN#b0#q3%8n@o|J;P7)lGGwUwXA4pJ1u$rJwY)>rTus8C6TYJEJ(x5R%-uy`^v zk8o{(*6$9pyClNLgIG)?&Sp`HfAdJU)gkd}Q%oi=(*uZOp(f%dvaW6_Vz}Bixt&)T zs9*HZV-!+Aq}P<4NT0xUAORmKs{ zj1;3>qes;IyC1G9^0ZvTYizTg`2-WGu8Htq@>rLYfbwT$$~uT)KYS8{hhuyU3-yX^ zCBs%!BYJbK!MvzbQh<~rj)5(@2 zxws#ZDRHrjHa15P?J$9Y-dilLZ$QrA++0bTMR&we3U6*SS1|q?cfy7X#-LV55a-XxWA@^iW#=L(;Dfz3XMky6nFl*3m$298{h9X|gtO$g2DZ~~qb%h>j zn`pBrM7>md7?}ny5hiQPt#_8grR4ZkUhDZ~g zVihK(oGsCK*Rga<*3W~6Y{7)oOU(PwCx6%l<|gTy_QWq`RY4JRGw>Z#6f0sLIqS!R z=b6aDb!JG#?(-mYJfPjAh3ybNmOk?SFNX&b?s}PHr&1W6aaWtp5@sEqZh>9Ade27Zd(*(hr z(H|UW*J6)wP+F;zQ!~J8k86cfDUqn7ODx=}6NPoj+rMM8i&k4CKZxb%30yX#m#bR7 z1YzQKm~lbG0e*%<0Pl2NjPn0IH2O6Y`Oa^d$mm})k*oJp(eD93&Vh2qO$8$xE`g~G z-L2*N0b*?f!k+YH0hbXneg{d8kp&{0;Cbk+gtK=*V-0`|y}&qkqaOqN=m1Ovz*zU& z{_ZH5{tehF{u{7m81Sb zEBC~%fv))YVniwy>KS!lZ#Aon-Ll?2eCHppg_G^j2eE7Rku23CfH|8Wm2L!*|_A{>AKh(XNS>@9aU4kjZwK8-&edK>* zK$)Zaexnlh*YyQh`gp~zxo3X6BJN95c2~9Aq&HvfXN;VJ zG5PZ%@4v~nkT?|dGfufZX%jeo&K1a*0}%d>wXg~eHl>+8!wua|rkXSbBC8W6xsRm) z{rvpuzkBB6%KJq>(Yx36CmtLBFvNyZOGW2&MHrwtjqi`$(qB28ykmuc{K6cj>dpb> zmL@>qw*!9qgmc1_oV@k)&;uq0ZAH^Xy?2Dsl#MsqW~;VPYez*a$kDg)(j1jhd1U%z zw{5mk>fz8QziSFlYG0H}m{2v+)qmH36(*yjc)T3CQRu@q;T?y2oYnCOMBPI9A=Oaf zcMZn}y= zD@6pYu=D5%^7#23nW~<(w`fT-#x%}Hh?{<_jAp94w}fBu{6AT(jea=9rFN2;GK%ce z^1xhiLcO2@la{?3@y_Eycsa$**89<8;sC6lkJXvUT5-pSe#NzDULKsKZ%6Ou{vz5- z8%1CQ&MgREiX`EQx$~Y$hG^}W(xKL2VMSdQX}Zqhg-Ne8qIUb_=5Lu%@h$@q%;9>u zW;%`1B3dFLk0e#5a6t;Yd|d{R`PyyI)F%sGFBcsauck-Laz>2N$IYF7j&#~E==Xb* z_XdU~+w>vHRj`Dt%O3>7;DEzjrE!O{hRSen5~^EGcXi0~01TwFYZ#H~vzt#6o(xdv z-%`wR;$@-FWd@PV3!RMxhX{upf8*2>Wzj|tR$*>3kQ|}Nbs3q5*v5_$Urz^`9?Gqq z>AwLFuPa-+1t>)74VjfXQbzXDr7k!Cn5D`WkrhoXkA%0!*_=Z3od=!dm*YnJN36B^ zU3=>LDP=8fris!ztkCK$4C7m^I30scL-MhYAA6j*y4h7u4WP{fp?j}=m@UW2a2$sw z6=!saNE+bKDELYHB3`%ysVE;`=I7CH(4Wz2R;}b`3%0ZsSk% z+(1U?Vt#2BMDH>c$|fg1Mj`)Ia(tthm|6q6*F@jz5hS^T?`TSek3ITDrzoGly?ut? zdD31%9UN6zACg2gJH?q}9hg;NP?AgkVJo#cwc$eRCX!snbL!Q|j%r!?R=EaF%x9-F zr!yw&MGULu9naTEpYy5WXI#3{9G}CX#88|a{nNLShHqZnbIm=Nh>hdzmQ&FuUaZ<9 zaI0O514hoHW)0LxnXbnbKqe3lP{ODveK1Hk*9H$V~| z24af;(Ex<%A1Le9d>n%sw$6h~acl#|N$W$f4wKCxRHM5f_zR`cCM8);vr!J9IJ7#9*5(<@Lq*$g;7&k zA&0Eo&1?SPg;_bFoutA2CrZxOTU^!N!ASl6HKuV~+aLqOwyg)qQYQY{k0&r$=3L$| zmsS^GsSX0sgShzuv?qf@sD^gZ6eN3tuPK?PTDOdNJ&tz`zhdv6Ow8~esL&npU*IZN zdfV4#Fnt1pcz)dFQ(q7&PH>gV6Lr!iTC}=8FhfXiwM~2)ALf*|n5g`H2AjyyA;J}F zD=A_Z%h^Jz$|PNTf`~YSjRqb*?jg8FLk*8uC3CR3%^r7yN#5Ue??z^bwFgEhU3HdO>^-cd6q=iFFiB9*EOpwRvxi zXS|6QiMe+z3`_HQlsxRUKfx9H0K=Gw64vUwW-p^;I#@p|Ju4DDd!M^`*BYtU4(tdK z1aQT-KKk@2HlluQtmIhSOvgWn=VM1Bpqmn2RFpoF5y%n4E%~+kYZUdV$jBMz?!jqu zRJT{pUR4|esygB=>Xa!^@2jh)nc%BW`zj847|N>Z>Ot0+?NaGQ%Bg}orWkKtpTeF2#eI@b`7jqqs;>D~;4k5fd9>GfavQKe1SBmVPf3)I%%Jnwwc47}qQl<6<&8>e zZ8YT%GNYO32=myeH(oe@rs~%#r8>?3{=5p1wpA&HT`0-f2ol(quJRoAv!O#L?hAVa zy|;tp2F(Vrd_Z^j)IS6CR+Y`}1RX!Oc*)nIN})wkkN0R(q5Tmcu(6e&pkQM@L1SEr zFTSO3TLa+?&ZW~+%t`uVP~$h76z?oZa0Qx~+=d3+FIPI6LF}L!NZb>}X_m zO6OF~xXM)K6`2vZFiSd=8xk9$dokl7r(~&=hiVGj< zzm8ZYf0F8c<3WoLc2Y^(wuXUrL>jF>-ImfjU2guDF|wPv^U<1l$0q-JD*XRow*B`d z{^JPnWM`Q&elg!yY?Lg3zBv)Wohg~oD^s3}^1e>lV4$aDWxxNd#y~|HbKIj$)W|E( z+)T(r8}ZwpGiytha^lQR;evw6^0R|vz!wZIAaC?2^ZR8A_@2zgI0Lwv8`kw3fs#VL z=S}zT<*{{(R3r#}D*IYKqyoQBeoPsd?i0PgHBBi>SFWp!Xj1$DUFZAw1C+6;-Ui;t1TdLs%0O_lk~w%C^`STtWkn5L zmVK$RKQs}ElFqtqib;}VIwyNpeg=8<);k;H?wz5U+Aq9i7gVkwKWVp?Fe!2sefST} zBBNFwvJuf(p=~oJ_V6ZQn9ih_mURMij1*FL()ojj^V@s& zrDgW;QGOYA&qYMp7WdH>!RrMJ%LRjceSd}1Rv@z7JQt$-ly=mww95U4U}Gr_XxUpP zpLQ%34`KXz*YI)J*VkPXt&8yJjPKomi zSNB3^z$;Z$RMc<#JZ~WMtUl;~W{*}u&oS7UIn|5LiPqKcP49gSU57=j6^l(5!-&84 zKBC8J7I#>9>&%WN1(_LG!R@Mk^|WbR%|O$iOTe{Tjo*F%m)=`I ze2P%cLJ#=+Y^8ZYdH#b*-faxM6;cdRQ?)CA=E24}-h}GL^e?ofk_Xk)q%1Ktr7?pI zbB=${Y_4wqGH^9Q_-nYGH374Xubgz+*yU;P_8nn|thW$PRT=DS*aF9R6??Vdm+l$i zx7qY2{Rv2o$s%(_82uJ%z$A&7cJ{rsotf3IK&Fo{tSzaeqo z9AxX=?wF24rQGICUwoS98~S6g`Ol)7Bj$CLDLZBfbZ>3HxPctvq~rdmg8_wFNV>u} zJ8K}#2|Sd$MU5R2L-sK2LQ9syggZv0li-V-cis^S*x%J~zJ9;|dBwpU*pMa4w>ipN zW)nBtWH{nnc#>9o7cOsXD{O;rEbJs# zx&Ns!_Vbhqz^;e5%I~{GXLgmcD@$%VmOOK{k#5d8hRrZFPRhmd^h66E=5=+7&y-yd zr;b$wAx_cOS7OeaW{xp{;!=abcFghsaFl(Q&4Xb}B4%b~K(35Y7{IX_@Zug<+h(*pq=z3YvHLLrITIC4MkI-%%A9GaL z5n8~2DsCa1OOfYfEPt5f9CD{;PAw%hh}Wc?*92IgRD~NgBxz>wDidFb>&32^zLc(k zHdFRkW3CJ#XmtM{_TDlouCC1%ErNwWAi;vWTX5G93U}8K911H6cL@@L6C4T%ZiTxB zcXxLJ1b2OJz2E8Er+f76d;8qc{e6ALc?W-LueGS!du@BxeC9K!mD|GLawan|+SSrC z0nAkUlrEuga7wamRBQWAXI>e$cG_bhaXPv3iR-|r$&*F`Yge|kUzA9{WYLC}sK)E8oWR%Ut!CYn6i2Pf~Fb z(}*INrTVF8O3?w1(U{2M0LKuL3jiF&nO_Vqm1%(2Ht+2|JU=Ku)BS&KN8f4_i9J59 z`K?*QMJE3&ir#`7rDmp`JizggRIv>MD>xc*0Y^i83gC=2cm;JN`LnM>`$-L4&jQ{R z49$4?^d7<;FM z&e;@}!guaZp!XfBE2Z@;lHb%?@869%birkQQF@SK0Q4}SsoK|6Ybxd*=+ses^xzXi zy1ZQd1BG%%XBUFyg2LQ5?nL@w)1cCD1g>@WA@SBu9!Wxmu|kfmKR zkTDH=r}23_Dn;b*M_YYOga1>lN_R^lB(m-9$RMG(dAW~>V=bzk^v%a3hF z8h?%xC=-M86WPJ47}MhDCJnu*oi0Gk2-Fb1I8X7mhfsAH;HP(#icTudQifOCPr{s_ z-M6J9N;?J9yCQo@xIT}k#nb=E7BgXZZH3SwUspzwI6+g`7&Ad{NWC80K!5aXsYQ)5 z*aX+^D12cewr1rG@y%N%^U(|cZzqN*iDj5jD7a5+ucG9H8{BCTJ7bCn{QecFn2V)YCS00;0=H2LmAneTC;U_BZD$qHY5{luu(I4wX{!L)lPihAAqUcFe+AN zHQW6r4=$5px-Aiq`m1bR9^@RtaZb@OLN}p%xbZ;Sbm8xf7)$1BFza;d+GF2xN)9-F zySFnUSPRl?S?VFfoPk~KAKPx?bU7~7J0MzC4m@inxtVh1<1X+2(&&=iLTCMBUv!0q7A4Tx+Yl73Yxc;v|UF6 zXgudgx5}{(Jbg>+wA9kvXW4fG0}V3E9iCfNr`htNK9g8lAqrff#7g zRXB`MbTZ3r;-_` z91OMQ7rtglWbXXGLDiK|p%VW5VAq^-^!+2f>+{l6nlScKPSiXbz@Mr@j(`$8R#tHP z8*cd{Jt19NjbYCZVTI0oWL@A@xMI4!Ull zEv_G7xpsD_$HQt^XwPbQ9QZAnDw(k#o5Gg7H?_a~<~!fC#gEigVt=I5J_b|@!H~W^ zHGcpA<4;|s7yt^O1GlE8=8HcXJzSSAE+z&=wgWE+v_(pEqKdC-z zdEj5jXuPY`6t-uJifOl8*>QNfUQ+;PIy~1JyVEUcG)?ljvo1`c88g2%4Zgt2zISq{z) zmiZT$uX;qC-;-wRX5+Cblwcx*k~#DH7Hj4o08RAC@>^Hk-KlB;i>{c66`4N(0%cgQ zN5H%(X@Mi5l!Q%xLPUSNpZm>zl{45wM8DTcNhY)A|FT zS~K+-dsTs2&5@rb{x>pPM$v*zjK?e=8mC*I>J@6NJZ|N1bB=Lm;^6a~O6W*Uin-D4 zfoM))$@lx}f62rD>+?T$26XT1;xP;G$+r`v2Plqy`$%cFRYsk$uQ5kAb=Bf^ujv@f z%kL}$wI@P>_e6X4cILG{RWK1OQz^|ttACCO3VLuxsQR$qx$?itp_z&$l71Dg8!gTN ztCu{ztub$GLC7a5c0qfltLHvq5&u}dKCs~R9$teU{)<`d?-!deg}`Yk6r|G6t?=T7 zaYdCmziq@qI8mjv{T{8D=;YDg1unmr^antKXV(v_eH&E&2LKFr`NA=>JbicuHv8Sc zYxe(5U%e6h@4xm8;_YR^L9E&R_Tzy+09A8PZ}mR_`uTqVn(3d1{-yMe+bsN{iS-b4 zr|n}0uOg^`>q3bo{Bxs5Evx*0?SB4S`quw`E!s)$IX!~iWRwjd=t*@^E7w052>6eE zAhT8W$9wvTnkLyw0K5*+GsX1gY(Y5TB2qBi;f>@W(_j{sbVekA1h(M~rV%DIbx0D4 z!jem5RWL?VM{R_A9BnxpfAzB0yeE9~*m&;>^Ihu|+?bg(bsQxUf88dk#&shOH(x2q zyWBNi`a1B?T0(?{W7DT5>wWDwiW-MH2=qiSYSic1#~TB!vcoZrN)V?!_dIeUg{ZiX zljjWjP_s34+)6#av1;8l%uoM}EW>6}XOIT|?vDA0I&=%Yirw-^(>l#-ea@ux64}@$ z#T3MdHtjL%%JoTjJvc}!*>UG64s+9-l%0rItw~q5RZ=FFAr*LZFKrLe{)S%!vAM9j z$QcCseGM&Wv;`^@N(2M0OZk(ih;V zcjI#&=zW|WXUFD!73heG2{sT5xZJBf!{Az0iuI3pv6o0=N|tePtEH79Y;h*x(<(j& z(J>-99l6dd>Tz>AV=B&!)rpVe2K0oc1}AIo2p`gb%QB(#_)!KzT&(Ja6=xo2ejRLE!(2(iI zt-mhh)Vao!RG+$hXn0)tfa?-w3GS!xky2?q?fE`gFqPw@H$#9T49F!Zo!U~i|pr@4-H6Iatrl=Lh!(t@{5E&vn#_aK_WMSCIMc7V(Ud) z5BU1i)w8~b--&7TY@*_-zU{+2|NS(dk;4Mrx!9boUFM%1EnXkvRYJSLazq*#v@1da z6v3!_@1gUf5Srk`|Dmv>g5q%j~=kd z>snqv&|F}V@KtDEK1hwZ!93oCPozq4&ck$*luxDUfv2T=;F0osP9@L&CbdFnPb-Mgt;8S7gBnEN;2afRG^?Bv$C@Ok7oxS z-Z0hXq7Fe!73BwMm6ts{NlpnOy-Wg|I6r9=WH9q&$+qzJ^!&P>fUHJzP#vjKH55)} zrw)>J%-(9#sXUz5J+6r9QCB^>x4hRPa$MZ8=@ZxceBc2EsCHl37q~&;dslTkJK?j@%tl^5a+}LG~{p2 zFRE)>UDu>TcwIPK+QGP2WQ~_FvcpZx(wk?O_9VOtVR02+>Bhr5lS0uvj(Df__cvgJ zjJ}B4cP$qb3nYNEw<=B|W1j|dZvzS!Z9w^oc8=wgVM&oXd`amfB+ep0ks=pDwNH!* z0sPyD>FST>K6bt=w1MVs9C7`nn8RB^C(n5L8=Fe2^pV+)g|U_2?R$56Iiemn)AB?3 zjEgz|d&?SRuVnR&B#DEifSedt1eYhH%o9_FldEtI8%7_r#?h64=6lP0pArRwk7noH z-jJp5aQGd=8~> zAa^Bu12J0DhN*Jm>J9i>p#3B@^!GcDzJ=|_!XlG#9syx=a3WO^_0uLY zUY_*^JwKltg7l3kp;$^ujFqhiR7E5{b2j{!ZOwx|(vrg)^_8Z|`_$?B+K>e>Ho}2d zW^Z_{`o!NeR#915%3H~uoy14uwj;(nKp@~Rv_G0Wo&;H=Bc<|2hF<|9NHq9d_uZ^{ zYJvk5iGe+y{y!zj2s&@FFSaC!QUUAUx9l^F!*jl^@e6@h)MNd3*vrvP822n%Y8w^phK$W`R*e)K(Ez_oAxay1|V80Q{`SA8T6}=@xj%@?Q7T zJB*i_qHcSzneSkOG!LVV`uMh8sVg@s(sC3sr|2WU0mxyg?`vI7S54$zxS-pvaA5c| zumsI5-OUC&YhCm~UW}mI14)PuSz**o0T#ja)R?cqzKo{|_ie=vrN>mj?+8>w&@5AG zz%^ocuOja;<05;&CX3uagEPHwRBg|JD0X|Ms;hr??e$mZP7JJLc{WeqeWHIqG`rj!8onGiB_k13PW3P{>pQb_1@EW!p0)?FIhPr}FSn=s zjOkq)79S@;D1b<6LoAOH@|h;0z(Jh(z4&SFy>D*4n>uh+%az3}J&t2k{0cUBI>@ri zjxqSQ&IjD1pWYBC`EG5|^$Ap3E?{kOK~fXlT161&o9c`0I3mkqjJVzj%lTjduGEu$crWlP95(1(NH7y+gO2ue;M9GP)8R)<-y5b?@w z{{o>%!6v4C_zjU)q7x}E-&BpYld{D?Jbqoc8$I%Mihr3SwWM&-r&+n7Zlz){*v(TL zYhw(_M$2=Xk1wBLquysdXS8KLIz+Z`n7cp6MMQj znE0Hg6lOtD94C{ttlp$9#kWQyY&#%OgYGbhYVU3Qjm*)MSfpKE{D@V+fVLVqQ*Q6* zkpo<4gwUC;;B+k2u@P*aG$(PuT%i42oC-X8ikMJCBBWv>WyB z=@5Q?XHyp&yM)CRgjnr`$)bk>e@WR%aJ#)_sc3Mv|DraXK>NSn0{y>VVcTDG_#c^p zzd)@2$SnOQj$uzv-KYiSwoKEc;1)wslnubC>sMYitx(slU0?B+Lu?Ui$2v;9t=t!ZZgNepGm?Zd$d7VCa4s6JD54tBb(Q&Fn2% zSwS}^XWO+8tm1ic6~Sk?c$=sTspSd?n2x`ldgR*VhZn%md_xP-3Bv~JyK;>O-I5O$ zzP4rEb{xW-j#=loU4MwsBz@mAv8GfxV7G%p=|t{|_9&?JF6{l$NjaEAU${!6U$A{c z!$tplLzAeUKMr%|iS|dyV_F$%a_9Re8&+!NAeHvFK3T!!yo8RULIDH$)^$T}j@Z&@ zQQ+x{>mQsue(uEu6Z&G~+PbhMMYlY8ymmH6*S!;fr`B;EM*M=H)J+5bRl!{P!CX=* zJ36s4$Z0F$MS;Cv5?-)<68@AuHe{XuB2Ic#Buppa#XbQZLKt5vKgUD#2j+#rL=SB?~(*;ya|X!AX# zu3jzluxP&-mI>Ez67+{iVH++0FwDyLA_^DkU*;%`=7*tem&WZ35@qaumvXpJ;}`=e>Vj1bN4Y2PWGbvwMhhvbfd^OnjlyuHB1uHM zJM#w->NxOOBGJ)qf?erYyN7Mmjp&c#l1^RP1~h&wR*iq^pbPTjL_@ypd; z^U2V%&Ov?6ZHz~*ukVHfn-@ny! z*!)F(TIx9^zLYDf^My})U`sR|M=h-fKhUWK%KI>LeMTd@u{}dX-8lLa-!B@=OsO8w%nbnRQ)K;LaeY-RJ(o7O~Bbv-V`GATVw*9}H81d`sie zugqv)+3t0~ImUtRt|oCBDV#+mQY-?r4F3M{MIuQBNox5PtI>rr@9CmIm<&3%zY};~ zGZV=@#7=eLu&y<%4p?vu}U%(YVQ2byZV=1R}Lyn%mz056f77CkgQ2{kS!d>9lV1LVxw;_-ctNLhoTU-u5latDxgeI=%sXBA^; z6T373h@|^29X3GA3GS60+fuLF$D{&q2xHC+-ub%R#8BxhY|rD#3MqltcxH(?HWutl zZ7pJi{aNBKA-#?wsA77-c}+D9E+*LGyfH1Oj((lZt*Bj~0{ubr*q-h|)o@;3m>0R; z)@~rPo#a45dS@vgUhh$>lE|5|)_eSQluiCIg*lv39avh4q7_LKunV>ACF;d)t9%!f zwR$!mq_Yu+mf*(Jh{R47I-7>Py(j{mmP&F?`$`|sadfLS8@VY~xkT)qy}UD&u@_;C z$(%h<9DkTpwd_&E7%8#w4lX%Gl(=Qombf<}t00Fp_-))TlF??er&*gDND)0qLOV&)SNh`hS2f2FVSE24Lz^5I%-4Pexv_Vgc+pVtbfRVjOThn z68(XnyVj!g?}~f;Ju#NQe(S%@3~2wmDMXLG9TH45+vJd_AoQ|RKHrYZYa?A zGG_Znn}}d81HR5<_CwitbBB9P-M~gO?|hP3R?>jjD-@ zUD;)dULU^cg|kl*N`C`id3-DX;+2x~E_#AkI zrDv8aLA+sadAD#MiXGDH5+@(eJ4sB;yB(D=BI&VVW!Vs?Q($$o{hK)k>sD>qs&at2 zNlx8O2n+?~=1zEvutxrBY)Bx;qQv9$j4<1c6q54-1#rW1VXnbH%Hd=sPr3x!-(hA< zABqtphdm*B?{HpHvH^Wi%iypM&1B6@lQWwHf(D6M>Q>)Z9K+gJ7eyi*s>pGfj)ViQ zxlBKAEzREixDOkfk`xcEXS#ZJUGDzKGtT~%Ti$93Ldx}G+tag6S68{RxQf`_=}a%b z14Q_HG@><|BLIXlq;=8zhDfT?81f`Cg?}r&)K>P`J!m7RL#UI@!FVV3K}u)V`>;e_ zb_>XHZm4026oqwBOq3EQlqPV^^%JKuhGCSGotSKSmOj*ZXukGis*DlA7Ap92#94$F zoVBL=JROf~7J(QCgb)@`8BPCN9p$X(p&wcAa|i}`M@CV_M|HgygQQcqG1)G=;JRpw+cCB$%4nutFc z^(b!oo`mL7CH=V;$nMiLm%Tt!xgbGu)MOqhnZCyw1u6TM+2@#XZ<^}FDXFSV?oX_W&R+f4>;Ei2Z*eBA~)9>~XqrzhhXg!G_q|E$XmttHnXlY+i2d5qzRV^8){VqZ4JFOI({Lvm~WosF?fVcaR7Ox3M zr|s>TYl#=E$I^$QA>2E<0XG|v9)X#zk?2+}>xeDcz*jG~?ajBT_~B4HYVoLhVkGtk zz~rb*aIdYqX-6gErN~R>*&|qyD0~sfDL|G?2Q&P@MshsJX8rCx z1i~pC?8?-aj&K-%S;(`ZbjM?Ds=FOb&9A8@-dEmgexRMi?0rxr$VC2~vB%kE#=3d{ zT%bS1-yP)-UAQTeae1lr(szm*Ja~XP%P*!^&T~|Q{c;z|4Vlobu!#=mNgThlY>wif z$qS^gPZ1g0wgELhhM@<WUBAW7vx4dmHHo%D%-eyv9sTq%}PZEKL#?rU0UYIFHt=ghRC z#(Uc(2Ohz9CUMUei5mgyluXEcJ)RNui`|tCOQhA@R)2Q;byjD7n898p4l?DEekotXm5@Nn@PiCAbBTy{!Whc)C2>M; zhpd=?UmU26KE*R!xc9Dk5KvGPK##B3T58oj-FeV49HVjkO2d0nley8cl39S8xJuF3 zJWWY7^wc!5O=jj45sX@7;4^jEgXT0wb+!R;zcmMbmaPb%Tsoq?C_Uw z0$&6-=-X=x2d0U*F+iuPQb)tK4X=6SP`!C=2M6w_ z?`&dEE|D}hP2VeePk4sIfAG&GQebvy&_dO+5}i;0hq}?MGncVBHPuapAB}X@UGb)t z32GFn5Qj!*}HK`&qJGr+#HdBN>47g+altK)6H8YOEhTq{XKOy zec`?Kr!z8BOVZr&4?kpe6Y~1R0UdDFSj|Y2m-9K_@1l`Q%Fpb}-SYI8(!{d{hY1Jw z2>NcH6FT}pA~UTchw(d-(Hvv0_Hg;l1aUyI5XQP+W@iH*jv-n(ozoio>Ryp)I@V+p zRw%2juvDG#D&%7)wxDIv*hZ?8Zr<5B8BpQ;Ra8UCGhuao`*hnFSCbGUM?x?E0oI05 zE;x0h$&eTQgwZr(E8@tB#7Wv{rp&d;y!8*j_sV5qa0W~=AWm>A+yGI3PLBtgy)#57m)vnIjd=crIws+LpKK*<&`sYl$e~}IGojCg-sCh76sW_U%wAzAM=4dj2)R( zUhN;b9#BY>z<5t>X~Sq5ZS%n~7dkyaB26r0`3jByx?G_4Hq`gN*rV?BDRvpYFj_8F zHn`zQPW>M8Il=JD<%xk}d!z%o$|p{Ow&9UTf6@*gAJ8O7t@+E0a!L)Yd;);P)YD4M^r0Yq}$f3DHis*b09%)vZ)6sBK|LycPQQ3xyFLjn|M0E!psd~z z850$ihkadZH^ufk)mueo!5o$UMgSHnU+X0zNI+vZnzcu^$8ASgyd_cj&d9g-FavIF zcUjyeUh%iBW(!|~Cq|G`?ce6Gc;!G#i>1WP4GH59fU_)xnxCNQ5Q+QhLGI;1@y)y0 z#E23I1eayaSttMwb}PzlY3Win31zwVhavQ05!}G+?SPwZlxf@a#|V?(4jFjRE$SN- zCn`~YNlU8veLgS(hv~IR`-c0Gea~E&^Zl$$TP1Y;c;Lv+E~w>N{keEPm%omImc}C# z>HXYfXjR+_QWArvGGgk$9Q_QUghQUx#h}t%@{2-bt6zexqwyqKy^=f)wiUiIV+KTt zQyQr#8vGoHH{fy26~VFJ_}@@7X!>YVwgqUQ?PadB_1E>m!8QAoMp-U+&98^~2+wo1 zsSELJavL*P?Y5!nUnJtgy(ddKxT9}0%q9$^o*P|UJoczPtO{0V|OIanb)4AT2Hf` zBlyhL+Bfyc&3xA1Pw)=T z_q7-(BHm>(kX$N>2&Lw&XUMH7=@;n@&@>Xf9fDKbwmmUdkOhh7f6sjV<{f&RzFFms zOeD6L`yhyhgJ5wo>fX1pSQVUK=rRvo*-kx|J)7Fo_P|^u(iQm^1tV=7j z7HpESur`P6vK*$zLm~S{!}Hy zAwd$+iq1fz6%ixFj1os5}31jph_lxJ0cL0X-jS}W4$`PbXRxW z9TZqLTYO-IT_?*DH!DkJy*)ofM@)iGPd$!!RaADxJcea;-j&bEW_Cfi;>NL#fw`6^ zZCyC>hNDomVUK^e#(^dGbT@#@T5Y`~6vBX^EYbJ*POU8`|I@F?GMiKr#X^=%tk@K6 z^?Hg5)T#0fZG}o%1gK7s46y?$;0g5hq?GM4f5zcAw*Jg_sq;$Bm@SGJkrJ;iQ5rJ3 z@I$%gdKrR_4gzafI@rdwBkjr(}X;ZL}U+82VH>o3NV%hrU4GW72g@+;UrI=D} zlIiL0h6^oUJ$q|vozn0Lcs~MsH?6uKK})Dw~mDA+_xCNCaZF2reWU}}rraI6t8A2o-yIU&fm8Sb0%{x~ce5H~%;T3McB_3-k)M|}mZ`nx zk2S&mBECbe9%^`xitNUU zT?9uD%WM@Veyl7~P*A|q-3&6sPGAwijDG+IBo3W$(c;K;lDFSMbCjWR1RF|69+h7; zvvR3b*C*<`e&P6vU=myb3AnRdEEy~QR#Jqm04#L$hql#1y7eL!yJLn6weUVApz@EY zfiNxGCcHVF51S3!9b)?NRV3h$5$RXaHt3a55q@O|xkpR4z)^&6VX>=i!kX9R%@#-3Z#P8f5JR5leC1kK_&-T> z|4BjUUtjp&YX<&uP5ApQ3gBnHbf85nclpy(o&HPcmcXjC`hoNZ8@jsS+zSlj`ZARy z;8cF|gGOR(&BExWV5XWN(Rivx;JZZ0T57dnAHeo>#D^mKEJ7Y9U3>f6Phj%jsSF|31$^B}Mn4sjEvr=uMK=JE*e#5%bv~!>* zHiu#DCs^@6-PRX=cvemEw>7IiiJ&xVx!+df4F(v-0aK`{ebYw;9)_`mL741Z1|iu6 z%LNhHEvc*e-+(TZaC!bMl0OHHP9N*_5K7i)%K>LdIMes?%)3aw@((63LHhUgpmZ}- zRa3N4LXk;!0Yo|89ovx0a+RrEsG;DjeBP2!cNx_dfeDJA-hl~=$%icr@v+%j7N{iJ#2Wro#zl-mh<2E%*KB+) z$&kW-=RW2=3vAvj@sjsR^)BKWB604QIPiHY_Hw{E(Cdj`GUALWCH7=nS?wdyxS6X~ z1VkpA3h-}nx8cl|^GFC9u^lBdQ{^LEq1Jh7hH*bMCo5Fw4^SaS6przzG3)L|w1Bx7 z`1|oDrYCIlLNajlq!>dnWQipxVo%hhJzOmE;dVI!6#mwwe2gkRP8_k*+9FStg+4zv z(VJ6X8`1AC>W&I3$Lt#}ck|rhjxl6@gD)VXuN_Yh-4;cw-HnX_G3NV!rYwPOW1E?uYA0eLvYzYRAD`ZoWcF4#ka0za>w6NW$+ z`qr`YZ5hT?m5IFcamp#_Z*=zf0h;pN;hO|z!Mv2HYw73A(CPO{o%QTJAd_~J1T=nn z+KU6o5?<_Q53omB$EibPjuz9~Mnoj2O{M=GTQOf8Cy<$U4qiw3L> zTr^O$CBTq0BoA!vTvZySE#N{Qg7>;65IuSK6`CY8VKL2JJ|{We$!DrQ_TzooYxj;=} zIz26p-4R2vRnbqLppxprwoTg^ zLN5%~I9{)rd`~x<=8l<7hX_Rju#+2Dlr1fltp@lNy)@hk5nJX{7=Y&NkNW0nTqh1z z3|}M=8c-!}r(YG{hm;^+C$aVuy_}aen`baTGu~Or0}o}?QJF~YY{54?)nfSK>5w} zc8P!%Ed`BvVL8Syop?#&RM+Tgtr(dhpR>5Ty6__t>O~zn@70~b7B7wW29>T z+bw^DUm$EBrC){um>SG)a7eUj=?b=ngnVSgo!7($_&o^y8CE(2g~-8r4ux4RC?s1& z^w}_d^)tJ&x?H>Gp1m-Akc0hb&HYs!EjaXeNJ`%s3o_;}P=OEfZOlAXZF)c?He< zhLkoF6ki3?d`CZo*`nc77GJ{#)8<5rXU$O?Z9y~MkU3e|WEn|$?t(bf(=Af2pj!VS`XTz=-Ocgr~_YGT2D)Fb%U?Dk{1%U%vUGEB&2A6=R&Z9T}j)q z1kDj)u9)_w%~@M}s`GNa(Anv1e6_c%+5Tp*E-X%(;DUz6?2-T$-=k+2;Y2&P@CU$; z%e#+;vCl~aWW%i~9-gfU1Rw%l73K6elqgagc48C-HTtTosW3Xr*wtE_@o63I0ucti zvqKw5G+8Y_R?e>Ll>cC@*dbIR^l=1br>-)_P?w`mk(ku;u+ojG2f%eKC`C8a1KZ_% z<8v2&UAJ3Selx*o{3^iWBU>Q(N<7V!fP6HwzB^ToANo< zCR;IU4r~5#u~*2kI&$t5!}3J$Fe(hiMqs%QDGnj7Sca?tR~x##y!^?)BfeRda^24# zZ*Yhu2|LM2cQF-Df(C$g-~-Bnlm0?=(U%Vx2;wkQ$=CbTeA^Pr3#{d9X&cp>3niay zUT!r!7*xq}oGS@8bdZ1t=*x{}rnOpi|8WQWcMYrlk3P)5LioSqwf3X2A!Kq@{@Yi4 z{v6_zv@G0Nb19ZSfC~@FMV}q|2-lZL6y500I;eeul(kT^jQMV^o!^w1>EP7z>swwf z6nfIifl|L)dF~vfs@&3Ej*K)J)}@F!E|~k%pQ>xs^Ww87l+4Vc#5;WwSG&c_Hgq!A zy?L^SQ1?<&{oeO(=GzOM<6PS^pI)EeW0k3&;)kM?`O%IHi4O5fzeL4U0>|Gv7uw;! z(F1*bL1RWz*+ExytF2Vq)EIYg{)vrQtrTIML!eiAW3pANHak^dT$q!&=(vFAm&yq4 zS!jq>|+yDF$sB^Z@9KcDA{QG4t3 zMzgOukYeN(I8G=+`uMl9d$jA@qC9x*n3NT6N+YRzc$@O}7Cx#;Qd?SVzO?nDHssMS zJ7D}KV8RemIS4G~!5rI`MmZ!MBG?a;LU7zTg(=vok8Rf8vv=?!+e?MjSv%Nl!I8X-Z z%}S5KsYQ`np#+$8DMk24vzds;80uW!yT#s&jo0E!eHiuiQowT^1}WjK?7j9K6=IMO zBd@TZm2bN!JL5Cu+3Sqp&P93a>jDEIMhfK1-yWsklB%cKELsCsG}(082+X#x##_+m&WZ_o)EYf-;RYt8k~;Nan$N0)_d0A76o@XJ9#l{8vk#IQri@+pV7MGrJ_8Dw zrBKGS@ihz_Xw4ZMVH_~nhOM1&V0wRj8P2?F*k=EA% zUxo9=1je#OZ%ce>=k#7P{JQ4-StkDKhJ~x)il4OcL@(r+rVc;m%^)g^7xcDNjKJny zksWQ7F6R=*D|-S>7_6M)tGUVmv01;92;j-aZOYI@R!?VIFc5c7ERpFsr&xDC z7PUE<&G#Tm=LwnbsIE`xjecjGM>0eRMqb%*iKk`u)c}c27l<~BASS2t!if9b*$_W4 z0x#MXH)uTea74>};X{%d-|InTumY;U&l1YG58{;JyjYca@iOhxmv>T;4QVk2Y$Z`o zBk0^XyuhRDDEr2!Sk_y{F%TlBgv)+BWi-!{{h+&IFvwg|ij zqP@x(6#E(`?2A{Fw?8QyYtV>JAmo2+4Jt1?VCGYwGD$@Lx?Uf#dS|!8LEabr{n@>E zO7Fu?-doBf5^SOCLI)r{CcFxj5@om!sVpxE-uzqHNc$l%tb~yk)_b z&Mu+3MWjZOXzlJIY0dd9oLv(rOKAR@;dB399K_JBqG8g4?}mK@m$#k!OA#R{Z?qaD zx;LjP#GNfsJ95HQzyv7v>eB0>zmz4fDO1$Il%7=CSWIwFSas7{Sc*lUUWY2kwS%Bi zpx4RM^|D0U4Y*xCi2Gg%G}h;XmK<|zUAm>;N zoDG+wK&%ag5ZWZCV=4vg9CEM4>`CqiJD@t^atl{_@|=a_y7Qz zRkn%dI3L;j^jVRNqxEU(`ICkQG9x9y%O0D3b%Vv_pBPy581IH?)-U?0s*hSIe=HtQ zT{ElEXe}4(d{;j}b{R5%A-{{XCx9sYYJdH7Hp#)0ZL7X3s8OSmsbKn5b0M7Um&PRJ z<3YkHMyX6s*5ZsibbCY}A?a2V)K9qWHFfo^j8W6BmN7?dxj4>A&$T&n{fDog$1F7A)S*>(gV7Wrqt;zPna8r&f*1ruvLNrKBIlWLx*&+J!)iyw>Ggp9h zDO#=oKaCn^_e~IqY3d$}@H-D?+3PtST*gI>5Sr^pW{~8qpPkiYTj9P(t7|a>(>rM6 zVJh^sfJ!_gR;)j>y6mYf{s^@VX1!3zjQC5$H4%tT8+&v7hqh?>iRAeu78Br`N{ru- z0s8`fCTIcfhmT)?&A4WWV|0}*x>cRWtfB)*_`d=b3q34E1`gcuW^UhUG|V`^=RA5G zF!|nXaAgdj$Q-*Y_~p7P(cr=#+248pcJ3~G;q-k9Z9+HfPHh8lgDCJM1+uw!_N+@0 zi5S+TGq5L>K&5y#)=Hm1`(d@@Khs{MOPBMRCpWjapCfjDkK);+G4u5;!9G#(n!!qG zFUy@xxHOF^y_ivg)@;{6vdl3tDF81h`&MDUDR5=Xu`2nZQWHEtFi`en*-hjcM*ZNN zE1c=_){TR#U9M7O*wJ)Y80xRpTU5GWEg?Eb31|1XXqd$wYSlae=hUj@O9Vc~ihbax zxPLo8<29nTI=NtJqQlWyd_|W+*KF40X$0KW+2&WtiSvr`)QpkHn{r`r!8Ii?H+vXd zDK_NN2vP%bplB(#luc{AtJ9i0C3sIE`-5J{z%o;>?kx9P?KJx(kIBI-dcT4;2Tdi^ zwPM7HjM{&Hj$K!me!=#mSEp10<{iZzspE^Qt5ToOt4l*sBz=U0ckTnl9MT)L$$F zLGhy18}-91&&i2DQ9C+F&em~f$dU5N>(kUk4$CPxwGkW(Z%YdJJG3I4bP87F>`Yijk=<61zV%5xGK{KB#e!P z6r?}Zg*;4a33pGV&H(_KgqR=DOUA0oQ|ybsP`_=}^?&T(zbOH}Ok*?XIp0Gvv&8$B z=N>h?r%GhTE`!Gz+fVFh+&Wh9BF}wNlaWo1tXT-F$C@`oOO0?o^NjhjWOjQgdsZvEg!WPzhTvh~em6!#Zd zhuS5r6AEYh?C7PTn$K)mmJ!dmJF_RoDVj8EM8>vE0@q`DJ6Eq=cS&_;XH{vKKKx#M z>0sn}RT_aZ~Suz1jhISL#ASK+D*_+{>MiY#9j39OuZeR5qlrq9~j+Kbg!Zi zY_Ixu^vDQK1rM4&x3s0|2oHy;YGCD%ToE4iDb?D{b|~WC5x_h#d}|A`ZdF}~5nbF^ zVjcP;KRhcymKGb2ETvi|TyQ{~)g;kuA5pO`i>EqHh$+(B^K}Hca3^S^8Lg%jWpZ>QNS-KTUf?H9x-VhhhIW~$RoMi=cEE^> z9%*_VkyLBVNYh94W>o$Gdh!sk9(I3B_xa#YYAKdsPk>X{|wOPN9s-marF%mn3@^JzC-Sgy!eH>&hRbQ7TKl)eAluebX5( z9(Ig43^qL)yMRTNX|4P(y=(t72dk5PjCgTe0!2|`EFEn3@EE0 z01e>fIp(+sxQ9qFA~9f9jjx_q`m+|bh0Aik`copcSf2fCr08EXl>akk;4iZJ-v`uY zWMph^g!!~6{a@_8bySu}#ic+YK!CJ3#ogVC zyA?0)5Foe|?PTU#Gjs2(x!+oMopaAQbMN`@&AXF#@BL)0WIxaIlk5ZQ5DqJ{nlTY@ zx+7s>^8)ns*(V7310#cEDdffhPdhO`uqbWzA zby<1*7FBofb3g96lM1?7cZJKgcSpNg+Mk$D?gUAc;D zoQUKk=5=COoXr!41ZN=F%NBVFd(g!`9R+%^+D&Y z^2(vtAPSMHsJSx1)#8-esYSSstA06rlGwVB`y8d80-ivuYZPY@3}vp*0Zzc@xD3kS zP(DV^3}@rFuU8LJpmOa>B}FO|M%8{k86jn+v0NK!Tt?$X73>n;B_=cu-bX;t(^Rp? z@5W!u@=UzIojN5iG1}kc-w0sJqsAJdWR|IV=0@5jj0kwmw_`{O#ZERCeG=h~4cmI! zU?e-r_cD$PKzl}7ZqTWpU-(ED%;_xrb4WF!}UEty}-zI0cS)Cvd4<~4$TA0?= zt@)7YIX7!E0VKykFy+#0vd&?bxW0N!77>Nish@bu{>8MZMpfo~u-jYV`{;!g!IG5Rwh~}v7x+B2XhPB zDnethT5s;4FP8Ej0Mp`K{=t!Rv{jp;gFb@5vH`N3EUO{blBw@ETr#3alXYR7#e^s&+!fchhJ8JW{qdBdtQ!Wj<(>RZ>ijDjlNwN&#c1F7malL-prd}684FGhJae_De76t7v+GMZ8{%@v*^jYY;@s^;EL>JQ9b_WZ&Da=sY6+q`&c|CcrJev(L|Wqwy|A3~{vJyJWCv2<0mqKHL@I zoTLAOseBewUd%ZYi=KUo2Gh^anB_EH@XMBcPkDoZA^Hmu|9je}Yvs4(3I*5fN$wo# zc!qAfhIdz?h!aPE;LkJ#^AZA4(soOlrm}Yqt^8=(#Mg8d-@I!SsL-wel#L`}>AF+G z3a&+a4L-)X6@t}727#*_ZtMJ>q_`^VymrpN7>UG2{E%=|kBqAlnAlHd8IC!0ux84+f=AM?B{N-iK zKAuTm6|s%_HO7bKn+lH4@7wE}xcl9ihg3(DUS^j2%vJBrxtQ!-eSJ?qNFQkwcu7;g z<_csyzTi3sMumG-_3DdQb%WibiZhOC^peG)R$K*J*_o|AZ>E<<%8va z;mMK|IQ$6nB{wk?Nzh%GrGZ#<{u)WxJClTeZqHkgsd9|`~%&2jrY%3R2k(J zYz$?ia)8i)uWioy8Q;C{^bpw{DhW^*RVtrWNU%Bs-lVZIVnU~Z>6Afd?9#B7pmYu&Up&pyWS*TGmbL z2qEFS2weTfnz1d~qs_93D)vS9KMgmOgI5|nCI(g*9 z8syU9c6Of|`&=f-DQe(gz(FLSZC`L|OO8zM^9m`NX9>T@`S&P2vkeRE($;Wfu5mlN zQdb7nZuUqm7!|%cs`Q4w=ryVyZD6Rt!Z=9re?3VT#qn|ZydDJ1e5|uxSuugn^1&1* z4Y=iY25XZnfobLsfQ{z2SYAcc6WGn$yUXADU$*Nl0iMAJ`+V+Z4umS}e>M8Y1gZjO zkyyJ=;#Jmt@AP+%hBWx>YB)MXnn2_F2ZxSjb!1viHOZD?&0I*l3iFl=YauBKSH|U} zzEP*IL(QUMwd^v9=1UwgMTO%P61EOfm{#-?|6J^pUG}SmteWm%lw>xEASUH>xy(JPgZPyP?ULdt}Ll^B{%!j z8N}09qjOV9DTh^u&n{v}^8PNwmlB<&BxX_*j|^RViz_WemybiwMCGcFiEaDSw?+Ro z&N0qj?WymT)A?X$vQA(!XQSVmS=z@`*B!RfGQcvUq#ETexz_H<*rIE_-`NpHd{C@v znQ>;PRDbpezby(hij!#NCqBSStNzhJi>Yk#f(#Jgoz0)OOK!_LX)(qbV)g z&M#TRfNGtKi;@+x$vPb6K8AX0`>)5!x~`B8pP5B_&hj*nowQG#&O`+-(?`hjQGxZd zIdpBiCm1S%JEdOZYQ#R%eARs#%hV=Jf#a^3@EgsA5oIKD!Bm~&DH1RpK{LhcYVQ*r zCu}=NO}fh<@W33+&Eo=`XeX7oFx3;mqxQob1;>-0oY%`Q`AK2XMJb)_sqaFSPtZ5| zGO>gPN(i9KfkZgltpqk_21r-{qj-$Y4W9mC=_S}+aB_C&cCBvOnMS#T7^RXs!GJH; zET8zZ2hcScN!-3Dgkv-UUOO@8 zQd3iQ6}8mD;5+g?J&Jv~<{!?y5z%-f)3Qca8~m1ZcmpD_`jI*Ga0^K;!V=Uyp8)2l z?4ly9UvG_wi{MST1H*M z1`lillg%`uWtd*#kV9;I%jUKrBIAsqKA5ADqm%7A_Mq1|{7koYy9PkiIVj>Suu%qw z*;^IJII@IW2+;Ysk6P`)$@XX4?Sgp;&P0lxeRk7f%t;&|)Ac;)-$V-S*etj1CGA+fQ8WW>)+4Rl3--Bf z%fMdx-ak=vWX87lKz{(jJN5R$&#v$pz81b(jb@RF_7^C0$AB%!PkMP&sV)2YYG=T# zPFfKZU8J<>L_-(Melc!>d{CYNa>sLETyPIIt{@f|KIf@jW!Q~xVRzrw-)1ml);4Pz zy#L&F8z7nQ@6Z-N1=ng-C|a^Pl_6?cnB08}2{bF%)!E?JS4kyj*oJCbiCjTIW%lSp zU=g9A#`Wbd_83PYNNtpJ3RTs33oZlTB>L0b4mOBS!td*RgW3diATe#xlcu*sdtfA5 zMYpsRTEbny)_~@sYDj)vR)G$mRbO1T6-2ax%~Pe3?#RTuS;rS;&q*d&hMjUg^gZ{P z{QYrg=1pVU%({eas`3s%xJ&tA%FY)`JM=fNz+&oO|Vj1Q!Q>vcsQ1 zbEY?=&M0VgW(|7O1ecZ+oPnK2q>(t|)Um4Dx)Y;f$hPsv-X^rm3ce|KHsu4g>T&Lo zv%rB7r=>H@i$|?PT_TTN`ReA(F&OKQPXyeT<+r!uCVu%P5A_L+*@|$U;RIib^CEdQ@|XAC zB%9|Y&Ae&(>+E_SeOYUn8=HhWCI-|0XiArV!FsH`!aThK)F^eoK2VHJO{~3++D)|q zjSqPh3r;NO7{>&wE>$h~o*FaRzshta6jh4%hX&0A8| zRh;IF%GhzJjK!}5!I)4=ne@d@O2n32P{NV##j)m=bkdW7;!Xux+C1~J>0zv{icoD< z0KmoX_KK*b$hz8=lB8;ujpyEQzyKktCdM&_`gBjwHE5;mA%U1vSia}H1*#&XJvs)N z`t33HIQg@6!(eO?s%wQ(1@Cv%_Y8B7{VfA^E$VeDaw6=-G*;n;b@pM{UEecZB|OCO zr;y?NbiSQ#r=0!+Sf|p9xp#e=!lfZSrNTklp4*voM_@e7quo2k+9==U$b2FX~zhNVB0v zhn**K04Gj_-i}f(_2LrA>DZF_OJpbAvd%r9eE?`7xfp@S%=2NdE6D$mrQk@|B7C6} zbLWoDjnJq16=!7EHvh5F4O?8;JnHb+O~?HsbW<2FH#S9W3o#&qRu6ZWH1TV=4I~jeDoIgn%gOk2=v*P`q3&u+d zmY@blpV1PHE92IgNmSKN^Ags#ByX&yf7dytQogz`rZ7{ft!{at zYy0H>hbGTuF$P~b?bxA5Sp|OC)Sf4cRm&(d$T-*BiYgpD`bhxnOx?s} z&w_@M+?B>i5#u_C-=x;zBseFuCZNH@z5(SbzjNC9;GFqYwDqFAqWk3%eG$1jF=zXy zUPQEek(;-L!HhI8$I1zDSw35FFE!u(+7s-SbX{Chz15#Eh|!?z&s$bZQ&;)1iMbnN zvgw$Nh61}ql44c{>tRYn^`iWFw_4$k@cly}eOU#@)~_IVFc=;F!(-qZuJ5gm!1@EN zR*Pm!ePS=6`626;gUAU>L(Qqq=R?ytDe}s9P~i}K_8vDvyq{9~s&u_QS>>a} z82*0%lXbNZ7Kl31AGO=b<@m}94lkgiotvoSo0sL@7-5iEk~N--+({rutp;#e(VmH#x-O3H4E3{p<7eyjIT4wxy4HuE;f2&-zSgCvdlJ zwbzhp9SlNbV1))K~M3=<1eK66E=rgVLfmZ#4-P z-aYFns`)`*_tyQ30a;_2IYI4NbT-Y)l)ZbAqMl}=5l?mY-%4CP@Yw0 z%3+?YSs3C4XdDAT|0H7;mM`$~j9b12SqqNPI0r*)E86d+zKgT%@p_vOkuP=9;6MyR zEgl)Ij`4DZL0(mWLIB{Vq$*}_rqcNF3a4VqNJ4AQX91n(tE)%Za4e&vt`3{ZMrcNW<rKl(zg|bob%3E^&h$G9UaM^`rE#KTza- z4zLYTZ}TPT$^Nwyo4&2Od{Nc_TkOWF>cz}BvUs{B?DVXEeJ<)p{yv_SUGMi{`CrPX zXSdUW!qvv46&8VVJ1>e3YJG3bQx+!O31`@Zwc8)a!biCIrA#_@m9Dum5|aoc~1l6oraT#rZ(jm~FD${hcT$_*~)T(Yy*j zc7R~c8Wo-ioXc=&wIE@qQ09vh@H{OoAft9va6w*4IK)8~Q0$NQ(~)O4!?*XWwHK>u zi@S;b3abMpRhWZ96=U@fo9dF>Hz2YXd(XWpr5xWXKB$^ zJF-y5A%2iviXz8}ws72Mrl>^r?{t7KXVLumc2V%8}P|60c zBUxMT+5Eih)k(e7J8EVGykWxk@>bjED&Lr%xF7WnsUC*aAxg`2aANZ)wb<1Y?wwe| zi9DlzP!VmPQCwK@{nevJt3c}-yZ_FtmoQ;~fS0wu&P|vRwIHj!M)72cc&K`XF~wou zTjpM9b{m#Js7UDqOqh81sbo->C-Xmje*c=7!M`9C`OB6eFHRlIg+uoS5KSrLda&+? zr__|7N!+VUJsi_hY?cP8JNL zTo!Xij-o*5Zx@yPJYjm;gE;@{ip7*h)*Q678)Tg0x8cXF_>=1J6^D_M-Iv$W=5I+RH+{=Zg!FOct)itOBnT> z{dJ-fY+XDA0u)n=v!%cNL3O^~JgLLsQt@m~!Cq{_E241$OgA z;>nfn1kGc*sM$;VMByXOM!ZD4UFB&vFdO@x9Pda%XlEjG=sn6#!$mAHJO$d8p8@R` zcFv?H)x`Ntf&T2G?Au$jBoAulb+7j;&1Kv@F#Z911pa0+t*D^}wnN7r2YC3{vAj-b zm0PXR(`BA+9Fuzt!xW*XA%8ZQOp=iCP!iPK()go!)t9^GWw60-Szp|lXE;ySi6_6B zf0CmyC*BQa56Kw(MZZJ#UR4Yztx-6b3T{vI}@%9@ohB1cW zOkwTKeecqf3Jq6-dSikaoI_DJFrj$34L=0KINjB_NZ)moq7{#)05b$Q=j$x&F(Euz zKP>^Z#^9CgHmFDHSZW!OKP=Dh6x!S(>K^*zqV{%=j`~LpkzUfnZGulfD=o~i`g?Sk zy?(%R)zdGfbf{u#{L}}S=SEB|RNlWcEgtt{t)CFC3gnBE=;#+0Z7TkOcUcmnY}D;e zZmC<}kP;utmUiOAxlnfzj`Er#&>nRMHQ|yM+}T_=dhy=C$YKG&*K`n)fiw^iEpl+U zXlZem_Jq=F;d1etML&W{rr3X{i=)Mfk2>tXNKF^rAkwn*nrAePPeW2a6W}^exy^X& zpndY4B>lo6$A)bqGP+fpRTy=yE4VYC6L})D%`+%^2sry(B(8UAPt|nZ2knX2RCBZ1 z4YbT-hmCP7W)Ywu{pl%2pfdKrxZa(CV#WRzveEI0uh+OPfnI%jL5o9adct)7h@nfW40Z)8e3kI{w;o2pY;=w1)S=kwlb%`eOF3Ih$ za~T~M#jtA~ZeoI=vo{B=yBG`U>+7DLg={^!qvU!|w*;8wA75{ONX|->|E`MmqMJIH zLq-=O_3{WniP^d5YNW*(!72|xo%hI3fXb6xWh2o?P(!J2>5*uklGmo=8fCZe%0B$A zP;8vGYcNn^a-+Ql+*gd7RPeH=6HdT4RyM&;_?*8S-B zyC&t`pK9=1@-8<5bkImKO;Ty|SUpwhk5NDLJ*Wr})jdVJ7|Bma z#wTOGt#VG#@CuPOpx_+c$+1UChzP=7eDFf0J>zw}V_keav)QCtMrGjrx-t0C=(W6g z69hdK;D~nAIfzRBh~)PZpS|)bwM;Cj30084LH7b&_y2&arD`U1=q!x*WF&oAuXxG& z95>xdFI-ZNksM_L6>)52U+}qNN-bJGh*l15fCN9#*Rzm8_3$p&AfW+`e|V5q1YGcA zhK&_dZ|Ru!4Wao)_G#V7>LTBXgk32y4uY1C8%B-Sje)GIm0n#ND_7M^7?a?(UKBf) z7>)ap`&bkk8hmT*k6+3{g966ayn4Z2G&%_iiAtDJ(%*t5222Ab9y7y?fAaZ7haNO; z-Oelvf(LUF5VHqp5nF(mxZywvjQfjnUrp{m0Gkm9*PYMfqnDeMGcjIM&wstvkPiCs zpk)(Jq1ar4I*p$;We(dvx*J26OI~Ew5xDB*U9ma()S;Ja7AI%Y1*^9>m6_I|G)W-U zebgCq>2Vi8=;M`+K@n#YVe2*185bH}Iek|0zFu~SSv`vgMdi?S%?lEW-z5w>5c!$1 zzQfDq`CA>N?n9Lwovz)6p2e>Br*$W1h|sAaZb`-JzE5PU_f}-hU7Z}Y+(mJMp#p2m zF#aA2+z|G?)YxPtH9u)n;D(4KFjMi!O_j>#Oka+;@JvdO8t0AV(C!>Wu0nAlL_Io- z09uN{KmjhDY&A2#8l<=re$N~@rt@aJg2++#q&{`Sn`Sz@*_=J+I^Gl}v3QD5O_^lL zro)w$Q(r0F_ou3w%MjW-B2_B9+3ds>>uYdp6I@lSj2fH9Cp3#rgZe;id(=xgLsf$7 zN?E^89d6b&pvi_rUyA0=G$Y9&f2UO8pY#3~jKDusvw?+0dyeg5+gyXL5kVp%T2QCa zHvPSJC^p072(qeg1KfIRV%*huaCa9YYsxkpPHieqqIHYa+1vu#e>d5Hnhgm~iQmND z^*ui~yP%wgt-MM#fxh}^S}QBG-OqN|{=+!rqQu7{y5rd_8OjUegUDb*uqs75 zD?q1m!KD}J?3-%!P2WN-7GDCy(Kbo%!R$NT12MS-+j0^OnPZK9o?VsjdTwJGm%~EY z`ClR`g1f7ejRVxM9gA?x1qHI~!a?45nl(?=6hy*YIl6&orA|3Q4KnIb8(?G(E1Bbv zlT*j}Z#4-D=bMSsY67G(HwHG|S=lkVHLAh%7v)}iM19Me-)6?EMG!CW$&1A!I^9j< zL+4D}sgtYBr-*gvkwy3lIrqzZIpMeG_isx*y)p%c&;a2h(w4cX(g>ChMYBL%yhjY8cQnV;^Z`xT-Ll0&2AARu&&v^B~C4r zvPbhgN-Yp4dacpny2t>@xb)daN2<^fY0As$y3sesxGJVs+HqS*ka^0hSC4FG`{lSN8smH9U>iXoR{OV%mrrIvc7+tTWd^`Iwp!gLk3 z#~UoV`bWMIJAT!2v@kN?_>6A{{duZ3uKWvXuPm{hBm~N+a1R6{XUjIh$@vN<;G8Pc zSQ9(BMtnyU0{PX?$t_mENzrj53Ihz&C`Jt3zF*)MsX=aIC%{0-G1tX7ysXX3GC$5- zYXfgOi5rCyN{o~vdqKxq?pnn)=@HjU)Wtlv*Qs#WDfeorxFR3)J!|*L?qpsw*dqE1 zG{_`2watydo1-cz=VoBPP%q*h-L?*rnX76>t4E_m-cj@k>YsMeS=!L@6JZYa%RQ#z z{tzcdby9lsN#XL0+2u{YRj#Y}&N2?8f6d1L6pB518hAbw+;AF=^# zR<|hxL4J#lwm4Bu5tS4ST3F8!D159L<+58Dlf{@ip}c>OD3?~CQ6Ly9Z3LV~O@Db8 zETUmA4zNs;gvu%V_-i$-3w?2Q2TCq$SQ>st%McMLxdm2{W{5Fjuv`Z*#40v=lkl4L zq}UoQ_0kwgkav0{#&>O51&axO)~nOCl5L22QrRipA#mA$1J@$|@-o8ySy&UzQ&OBG zvA07_50Yamy(3g$uH`VYNn!gh!Vvkbc&*$}i6=J$$QU+=x%u8rdrY-0_pFVa5@HF* z5hjjPCxi@MT=cpwd|BxksvOYs_&Q*cy1lg%fp5Nib|$2a7sY<*C-5Dzg2-Ta+HFVn zX!sI}&N*>Gl!Nh&mJ*Eqv31Q^4ay}6!7FpCw{fD5EYHVbUrnP4Yw#;+1NECBCVud%~Y*joGqMBpq826~t2vCaZEX6s6BfXlV_udi&# zxCK6G^GDPY&3=r|xj|aSmbylJk`%SFtOHwr=pC^q=Cyq0WIlULyR|H{8Jg3fev0xE zyC-vsDAHnWID%I`!#l^m*Y~L9*-0L13=M_d z*`hsvrn)LAdtnvYK^m>y%z``*QGy2Di=-w!{x4~uepC2m8G>ehago>(ZE&2Tq^hK#H2G)@kX0X)n=L_wGg;bE$58CEU>v!mxnSw6&RBXIFA}Qc}ybyLDA6+m4?zk zwZMRlx#0R-Jb;T^PwW#j$RCI59NFSqdy@MU{I?$DsFJMPqR1TSK-62Dm*ueCZsLmU z0IS9H=(08oQ>ej8V~lFYZC|>>R8p|lPbGoK$2C-7T!(*hF~Phl0qjN>B_U+8N$C$# zvC>1Wwnv7Gk+qNiF3WS5Do^($AnM2sUgp*x1FVG_wc&2F`h;AHFwN@3yaY-=3Ef<> zEI1jDu&RQsbpAB{wY4~Lq|smIlLwy5`g_`J{XZk3@|RUd|GCZo9RZ^MAH5%A?^D$W zht$Wz%sV#E+11knygQ^afS>&z0A=MLK%-dF|GKGJ$ENhZpZS-3J5H|zWwYNJ^EIUC zum^Y%#{Cj;ootFcbaG!n1B@v_quTr>(Mcah<{`ZBD%o56RR7CTm{r$=WF z2p{8M8KMjK^}VtGNh#n-a>ee7)QB!%b+NuJX8&&0^!b5t3kev|CU$q%y6dG4N+yfxL6W+4)l270+@?czOhNIY*4l`G<+u#V(oYsxDV zD6b(&e|f-z7LwKlbLn?{NCz~K;qIMw)dv`_=D9e@E|lF@8yWRJ`!vitg|4YZp=zoJt9;%x0Nnt+dYYnuM^*41VO+JIE~nTywNW$WaQ_ET-B;+zq)qMF zS>{!apb3&lC(?QTRmk;b?)%9pAzIR+NTa5U~y25(w)!BY9YE zBt<#5|{ndVUWH<;?-TyS-;bi?D@K$Y(4BxmG(>K zYcKcFpIRFfru1qUf-E{Q(xMLC0CSX-^#=KRU;4v$J!Dl-?m2brYZu8BGQeQaoA}4` zTvLU68NNl8hWdn&XaUnH*Ov`&4c$E$@yley+VeMiJ-bwjCH$Xxyf5(@luTb5S~;7g zz1){|QeD@mD`jK&X7;qr#(d)9m!s$$U9LT!ive;}PRj5{&vVA(Ix;mn#?u}t=lH?R z+sCBjGL<4hMzl&}TiB2FV?@a}^E-)9*)Y}N7U9z+%}3NqiM)LeC^731is_V%G3NDw z20Vz}E?+AC$F}H_hL_1Zdlp1}b^R^@3pxd<{MsLY;17#+%kWc+<`jl-?iOn*U&54+0o0}rln0i?r2G`u9S56deL}mF>F@V4=%#87A`$E(om+_x z%uTujBm%nvmmLW8QB&YpQ1VN1ieCdejUxV6V@7^6=2)#jC`ugWTEhJgEW}nrLzA{8 zgr9(Zc?~;>b~byuoWYqV+D&XW)6bnp|MSt-#uH*Lp|Cx_Tg$50(6RwL z!vXGVmz>y&3b=uGilm`lKeyu0B()t%$Eb(Ld*0qeZV*bYcn@w`RAGYd9;MIXkb`4Y zjk6*1`9Q87TkQ`H=|}hRD;riBmcvbh^feaoJ4v~&FtY@Cd39uE`Mo-C zZk*w>d3(JLZPX@G5KlErt~L1!`6X`nqs;s83Hk!wExQcEQ|b?5kwIeoTjJ#j-P%9t z@>a6$NPp@Pib?dT__u0j#JtdDXk4p&DlVoxldS#r5*pftdd@G7r{C4ENVjB=q}Uor zbeFYtv6*KsM3vk)&XC8oBNINPK-7gMDdj=#Y&5Men@P@wweVe5KwwZTc_%$v<=lj= zwYXY_-t$RLH0l^Q{inO{LAnMgKVei34ZjxqgO#uj{zm0OK!jQ3)L`Y=59ipXmZsZ3 z0A+1RVN^BAn*|~ckGGda>~+A}(}-7-VGS?rBNEPX$X)#K)H9uRYRk|!)nc~nx+#}( zsvkBVNsiU#;nhYt0zxmTSn#LkWXL0#F=A#WE-VDl%dK_v{gd5+szM5^p$sXnFvSd! zO(8+M*?Lzlp0ISt^4h2DO#Kyl=-eHpFSv+QbRz84hr#i3=ACl3)B=Sr9bEsWli6jz{=kc2m)D>{x+dWh z|GFVBQE)8c_ti6?4$jW`B8BRvj{BZPxO)B|CA$fRgT#X1E-hT^ah$F;bfiU|f;$B+ ztnSnhl|ol}{+7W4B!Vv|z;KceALdJUbhZKVU2_=Y{Ai`x-|EC%IuQ^~sj_iF%gN*FIzzENe&e#1WsGJS#*{A2I+k@=A3r0&--kSj~u2+ z3tDn{_qMx)NdC_lf5( zuCxdGZ>}n|%6u-E|C+Gu|AGbUe{T8T7lD71K=7~l%H_9|J};GL}8rb4Y1i1ad_B!}x6wymKP8Tq4o|L=lM zZt&~x?KMqNd~ql=!x>p-)Uh8x#X251JnbJj&@R!NDqeVL8>y=Cdrb93uVLtM&zsRw zP7nH%Er{e|eyx^s3b}^bEm56}Q>DL|dXXKt#j)=^o6Hb|NLip$Do{xLblmXf4*>gA zPn7Azy#B>PeUc;Fh;5!FGrxe{CAG(`Td$PoM>EYc&C32!hzeM!hw>G4jK#G;3b;oY zi4`+qAmSwCC&H3Eb>vvvreHihNHobe`b~gA%nkAfpg0c0XXG4cLPWc!>`c5Pyn9kT zMCP#MnyIY4+Qag#;1r;xP0)>%+azpt{2^dfi=vdvpo2E`O%Q|4BR>4m&vv%7jH0WX z(E`TX&GblqhJmCg|LA5@Lv_w0x!qYeq`KA&xz%n@db{$O3SrqShO!!HIW^=y()kBK zEq7M>Bl4A~XCxY}2X&Zc$P@!x{BQglP5soP=EaAZCxu=c0sMye(HkU5@*-R=Uibam zMq=IHGQ@DcTG3ggC(B)ncRVJ2Em~UYw8xU6&2>b`M~JoMVTtDp`oFkSjNb@0oLgBoX&b`>X+14cT(J;pI92+_j=34@Up!Ve&-bH zEX;yWshE?5;@nAcd70Y|Q+MmAVWe_HjoP|b_w#Q)9GX%h=#xt%iU61evs8bUOVyvW zb2z56c6?1lK+v?jC%=iFm#K2+=B#*R_SG7v&()|e7IU2XJ)-n!E+nyO6qM`Qg7 zB=8()X}JVI+w{#ekqElg9H@fV>#C+^wCCr{x-(wHh$>ZQm=O?ZQnmWmbsc1Ix6Ib& zSOJw#xX{h;LGGPvyQ@ShzPlL|p-*=koDH?hHonBXSLGXj-K7+)Y`*?A0}rnDTdYU* z`#u?PSYJhQpy{slVf`Yvx{2@X!&%JS;!4iRZ0*9kow;EHWpU@smdA+-{pa`zqnN1? zcWc;>ZWR7K#&&0mq>DRi0ckqOACzZonG3CD72CU%j*brR20~iBo6P1cIj=E^1UMh1UBH zyQiPlb5moxPDR1(g;Tv~8ua3$7)@Ha-g#a%@bMMl#I=SF6caMVIf#Wh-NvL%*lKi{ zxs2>ukuE*kq^uh^>I8Z^l4=i{H9)c}X4Hu2o>;@Qv~O~pJ3IGm8RvUBK4v8e7agmQ zV|(KByr=eBEN(z@1AKVXB{7e(b*JNBe+$P7_9DL|vN=Nk?*4i#NJ2=}qMM#8t6oYwJV_)~Wf-TplNL88E$RSqSZ1$V`vPdU zEKfemp0`2P89*ab^)Y5B)EomOazLdaovD@~iAh&AHx&!lV!sd)8nl3^J(_3skQI<@ zWKCbDmh9Q*NuKoV`D+26bVfCZ+z|1^vbE*aa<-Hj@Z&l|6-E`ZX?pYJUPBj2Co(li zLVlaS=1E7lm(Z|ulYh3G`7PE|Ll+J;dKy<<>b*N#F}uu z^s7(SUJ6P0R++`46=f!7WPg99jSvV_#sB6W4O&LB(}|NG;<3XwG|%a21HDMbFvRpd zQmw8?=7$d2B?T!#MJ1^HaliIsp3n`xA|8=qE|T|kG>3dv{Gw|&_TDE!x)Bc;AOD$_`l{m(;BvZ| zJ`DHE?hRLKhR4!zKgsy-Int5?SL5Ho2G^0A;qxz?&maKN-^JGu2XmKh(luWlBK9_} z-_iU28rvilU#D26FjfhN+ssBo`>}mJpO7?sCn-MZG4tyQc8+Q*_o}rerk`C{&d~$K zr-k%q{QA3wsUf9 ztRvQTO34L{-T)`D?S$`r|@Fao|w;LE_Ji|znS@_LOorHnwAXQJ_Uj{ zGul{`luzj6O~uj~l07<|>+OiPT%0h@i#|@R$25@tsBtTqrNp1I+c`J!>+Itj4*mgS z@#$Bsnabh(Wgz#YY7nz=@VW%QFCLpFjB=*J#=_n_jO_!2hyh65XFDTW`gcRP{&lL2 z{}Bbbe?Id+E&~5()$0Gao&KXe+0+ZRKP*+_r#mmfwz3xAB+*!PX=jziYYd-U#$M6? zx`!)J8so7sr%-P%{i~O5OfrX%bnyDXSgUB&@W3ax!9}|L)BKQh0XYCfo-q^88Y7%H zK$cobwA*m)y5Ai7^)1P{MV~e=Xbt6od9*&K(92;vr(Oc=Ui(Zb%aZyGgBF6h_8z9k zW`M;fDlo895PWqlm6nUecElI-#^Nc&yI(};JzF`I-rNU!r1m))1g#Vo(|F4lB|*N_ zs_{K1Tf*Q~ekb?7Pwl%xp%IFzH{>tc{s7(>$;NILvK?hreo>L-{9=!HOc16GsP>|H3yD zkNB?XhQ;wzN~D!ru(q=AFs;Y}tzxq2#|vA2PLqg_j%;dAFv+1{*eTn^U}32uk+e8h zm6`%W`#a}XnfRglU*sGus~2eP-~Pbfoc7V2GFmo-cVT>hR^Fuh&T8#`IVxxKAKfmb zpcHp9xMy+fEpB+kYlbo2PnhW?KtiZ@&yd<4r%MivYNa2wZXUSs&hvrqmuz(Md8|TZ z$#0)v?jA(XCF%Y$wB@$}+u*~j?utPrr+t%+(xhe!RcvflZB0tP)+5j#GDM!nCl5zq zJYcu1+$gMp)*{fBKYDIsJ#;B>GZ;jhXE1XCE~Ak)2zzSygu)wbGyb>sT=&6)MBO}s zSkJu9+>w(6S_oK+RdEt~9mKPDoUcjiC}4@2rrAzj2mJ%kg3QCtBIpKo0eF7^YWLu2 zmK;>nCv7~~*jSsu-|ipXUplep68ARvrM-o>h=^hJ&)7pJCk?_}IIndmMx9fBgt6HZ zgL@mDiCroYN2qPUK_ zDww&u`n(CJ_oiq!^t=a8)Ro+ou_)KYknZdRK?IYx;?Y(lf3{2)&yhY7a0~M!e$oR6xI1{vf32Di9-51nM zobw}7n1KuwByBqQf6&5fC}$!qWp!-VjQ@pK*p+F^0z1`?EjBthTb8gFndX-k=8_Ey zpG3!~7ui=T%!QfJum~yym!f-Bh@7$~6e*OyqgfmkRP4y1fsyp9+G)p4y+woDsZ*{M zvc@;s7m$a*qsE|G}P{*;yVxrhG({(x66m>zw~Q~XU|!Nfib}ylu0?|%lalpH}&zY z-lQJz)(0WY=CIz@>nHLb4~B`4{F5d|Un=HFPs(MqaAr=wjyk(T5YNt@c2!C_v}nD6 zindO%A1!Rt8U+z^2Il0kzUX(vk|IDzlRU|)Z-mX7vf`@uJF9hbcTz+77P>NIB}n@x zD+{KM*~%Y(nb!<%``_dN(nDRLw#zkx^yX?xZw6_QMSSCl-AQ+}#!7C*KvK{61X2tJ z`ns$i&C~WdjjieVNi~Cr!95|(R|yI{TLZljYgTpc?c|Lq2ht6Vjd6plsb4seiuylW z$-c35(Cf|900g1V4>oaK=Z(_j2~kzPMFzj#p>Z8^E%D9IXr}i=9-8M|bKv#*=7%LN z(B7n!ZU@k5(ta?FLWADwyfEurCmpDxI86(ALN1)59;qF=S zLNKDmj(x&OR%kBGtDTD0B(YSI&Z#s{mfv~sc9)kgX8jh$K$85&L;6T1k<+O1*hb%P!m@J^;jqiumnM^V4>4@is*X zjmo;?&_0orO;02@PnfUIVqN5cOJLa1uS=1=1KNYHUK7#WZUaIE!Gd0WG#`h^p^#FbnyD5cAbr zRh#sMdykYglR$4%E(I2xmR#DPJ_C6e;> zweU-{k79-h=PTIx1>DZe?He=yynqvvm4xgFqR$uV2Fl7hPv*A~6I4GG^Ln7tLS!8F zZ}Y>UvR{9D^MwQ6&`E4rQ4O`0sMOWA&%LR)0fx`o)U)8j02wIPd)#{bxElYBy|)aD ztIP7f3rTQK1qcoWBv|l3umC{{cXxMp4J1GyxI@t39;9#*2<|Q^+=IK@OZW3WJ=61a zPv7^*^Pqw_n5*OijVYsOg%NO$B%@t>Qar??6YFo*p?F1CHeBbe^ zoIRee@h*TRF8eERMW$9p+Ja&x@YBabW|+@byulRl*8fUy-6e=t2g?f$jh<|A_H z^}VlZ7r7T^6Y8TmUEPF?cDYe4ZNE*vP|)rs;%VezbtPU{4{4or-e52hXH+JLo6)W# z6TDE~vDtC)j-{-xUk$!txlk2zv?UqCLhr33c6bUo;7h7FVnW6TEN04p&q&7=B{k6q zZpS)AKgOZDcaBt6?a=jnJ83K+Tt{iZbXVpzjmRu=IpchCQMdg%*VCNHZ_R7)y>4OkkTVREC0Q}p@xl}FN zV8*4I@$AEy7}|l}h8yIScq zh)Qc8DD5y+_i!dI2P7w`gvtop|wa zqGaK0BAUa_MST1%8Od{^CI*@W6n^(fd%AR)F}ad+1iuKF+34=+;nEvTJFJ`1Rc%;}z=yhQ}N@uTM2b-$3E%nhb_yNJP83Fv~V*wls1f`lc6&#Hfh=(f7g&8UK zejW6xOV)9P?bhBw_%dFl{2THygI~|bpv7kJ-2+GMX6J_!vxfeA=u89`UQXnW{gV5m zO6t}oqr7p#SbM^yK3*xrSI0Rct3B+RukM#FdHDJ{!%;U@lgcTQ?IA=gb+7Z^(A@#r zFjTIjMi#Ens&t$_>L_n!5jOKx-q1;lno|aNRfsP8i)bChXS(X=wV#TbT~9(sp3C3Q zH}3OBIs1ERV;l&sw(j&ZV%RF`#4G-e9iI1UX+g5pA-G8^9wAa%d0!>Z)ARJbO}2Y9 zGipOhq^#AXt?E`0EcXZ_@V`ia{;zW5e|L-A-^!H#rR}}|AFOEOe|5~(c-^ygETFF~ zjdH~TrPs*a_5Hcd$d;8kkaf(&61o*rJSMO{98af;Nef!MXq0X2OmAcf=i5r!Pk2jP z*ZAT|dv=tjD8JhLD%1DfmrI4;rz-h3h5|_7mT5kFaW4j&*0EW*>=7gLEBT<+jF!@M zhBOl8&sr+ZeZO!Je9_mQ@;srFd(ehQv2;7f{rl(x%`_PnMYo_<{K+TTH7e7jggyr9 z{@cqb>kvIR=c*!G>}igf7%=Y3ZM#fWUjz4nnfoHw<0^1)i3~YOSfp_uvhDyTokic6 z($p!?0eZDWO|o7l^|QXsLGt6-jnxfm@rLyx&a_B%K$)vsYNpT3tk1NvoR;}8G$yO? z41FQgGtn5$ac3LaI!UIasrnC6K#7a5GSc$q62KF?{FA`OObuG=EQGl3CCIfvk+jX} z1Oq}#wW~})<>HQP9?s8%A3*+U-wEq zzSDJq!8<^i1{;si5S6G{ip+$beE{+D5l~I^8}4=x**D9g`ldtM?H$pf$p^t*+eK-7 zM-dY_mF3a9$6_`ay)- z93vMLMqhOQ`1BB*QcmPRPk#0#x`SoX1Vpf~`;p!2MLaG{d^FsjM)f2^ zFZ|q*@(CAwlt;}D`X492Ynk?;b@9C#L^y0WvbaRlh~45!Y^7oDv{?{gy^@mbM)Vg7 z3ngVsZcxk(He#TMeXP=s#Itqt)+{t zon=~Gbu?5cIun7H*v=do5nEJn6GPS2oVcOfgb&P3lL>%(!~6{AF+1}A(S?iVkP_s) zbrr+$O{#EGOm~Y@3`uV+EyYS`uEA6!)` zrKyEPtE2Uje6#6=cYUiam=nHEon(yBO`9@ta&+OsRy!XT7SWwL%VXFiXlYM9+vtjq zk!0M<-3eMbIzRced}D9$ox#Dou+-0;<220H-Y#3b_5L^+Pb9)?du^8ep}J~+Z~j$n z5(lq20R~^Xp_i|c305G9te@9?fmvG0v!#2uGa54y=V#QfHX4xLp09IgEIt{Ra_?3Q zoqYVcv-ljk#4`?!gvVhEn-)hn8nBd3q>MKJkbrlTnn4E>4TK=^`xF11U&}}L*#Qc6 zpRbPP;#KA9b*^;EHY=UsN&iWq`^be*vno}SjMhb!_E?=tKTcmm)k8H1_mE{|J_1Iu$v_h=dCj$-pYw7M@OuL8`0?y_risz(rv-Fvthny@m z{d|p$al2X_>R0^Icf;TAmk?ysAWGvtry`HoP@Fcax6T=^tEmd3zP(?ABRJ#(?6rU< zO$Y{_0xh+#;sCABYi+fBNXLUX3m6U!L=+$U>~fPult|`pOh8l1YwCr%Q~WVe)8(~Q zf)=kZP-!(m-Xn%y(ZuD)zy@1el3IQ>Za(mqy!lIxw8-%4!s+xqo|G63e9j^5Z$mR} z(QUKxljLKeg`3K6$WJ*O+$d|`xAR(ptjE=io*1XbmAvCuPHnUkOW6r~)qRplB|eFq zDfT&cT4Ne2vN|mkMdSEROF6_@`WKfLU`oudML;#pOYsh%nR^FlpuO$=dy3q=rf+Al z!0vsCos*htkJj6TTjjqrDU3lFf3|$)FV!DuCv_?#`#3#8(od21C0#Oe>w`o^2~`NF zptYX9K`bJL41*pz!(?_5rhTFwcN(Kx2Uuq*Qm_k+4gw{ z7kFtd^Q2NKUSY)Iuu%p^ts<_Z*#W2^Y>M%m_Gqode(o0?zI3v{tu^Sex|k(m1Hi;Q zHceK4Fz5ciK|q|}WA^?|+dVcW?$d<>CF`t8NUi#(YU<9jbP8FBv8J1U&&U$@;wTyy z8Rtf=;_V>W0OWWNN7JoLa*XjoWjmK&<>Q@cKZ1oD`{@ygL7iq-wsbb~ z5()q5)T@KuRZhd%)aFn-AIKV%2TTev8t(k^x>l$lJezr`VzDcY^`W?ayB$>DX`E3V z&$GBk=rSdoenwgOAy6p15nbgR;8dojtV|#WI0ecok1(9LJ*MyKwX*KooL0vw&tLcF zc{feDqU^ba{CpU3@vZ1|)NpkLnVh-P4c(}W5aTk>`xX~&!}BpJX$UqL`@MCfeuRf7 zG`qUQ_L(_OAc|pk@pEj;7xDFeHPyb)Nc!ZCC9KL`_#Er^e|cJ;?pys zQ~hfN<{+e12A>`4A2nsZxt+XkkXAKGKh^W`FP2*4iUC-2e4!yR)e@bl;(2c zGw&E!eNoG6O$iB)``LR5B1Mf}t$&K?zNVAV@HTY&Ad#RDNb@FwmC{GSKGOacm|pmu zZ^q1k%E9#QyufWCeLn|K}B)C_L`ocS#N#~pzi71?(lN1g!&d@n>{rH z2H-wQe$Cari!_?vQ6GV6QI_2iAN1U!AsmaL>72u$RVn&03T;0=FI-n?{I?!P{$cA3 zCV3BAtK`18>u&7?a!$7AT--j}f{6xk7*(Ghj#iH1IS}Duf68 z`U63$3;7AJw`K0A4uV-QqMtvex8uD|Iu|4Vc?7CkJ+G;m5b9gvynY{CszK*+C@mh& zJvFp`V^pzHLwH@3G`_2~RTv*{&)28c7QosWdUD)S>e-fT9bS}aD&aP+mk6fIZ=phT zUH^JiggPvpeet;?sw<`I!-aCcs#Ln>=#F9f4atPuNCx+Er2P&30~`a9*96K0@D;(J zm6H-Tif|}mAoVn{1@P4|u*f=Bw#Yh}aM_(uFp&qlYigbUt2}s(eNrMK`tZw8=NDFcr1BPmCWys=NPGOJmlc*v$mrj)= zlk`6Ds}{2^qM$E=Wh@iyJiQnip2}5;8IG$!q3IZ2Y|jIA#py;zRzxe)@R#>RG5KiL z2Ib>ttyk#?F5DDcpIdnxXa}`=lWq?0mX(&wVQ*4Jctna1$daa@HslKZ&=FIpBOok| zX}&aWZmAu_!7^zj&9;3(2yDv&VX;`Oesp?f4Kem#E@S87ehdsW6WF= zHLV)sB7Tf$4C@78q2lZb@cwB^u>|{W2i>MF%TS=-XQn3pbB$b_^yS$?2}Jwo>g6{4ZFx*(NiJ z9+xu~qY<(C%Xo~Z(qL^jaD<%hkglmRZ4aI$UYeG;mzL#OBDFadwb-4>vKTol9yzuk zjpmJ1*rEyZRP|S_bJJd^!WFf6W0tejsv~v?^;{nfB9cjLpWFfP2eY)G8@c+aC- z34QV~b}cowWLS9`t@v$R(lu6{eNlQDw`(rFJG#|F{aKk^PZn-0QM;Wz=@)DjdsvF# zL$es;sG`o-DnRn46h!tk$Sv!H1Vv9M4lw#xtP99!=7@I{G6*q|N%-rEC-RZ9e(a*U`%;%)`R)0@Swu{2&7+MF!-7=~24ZUVLg5W8NGyd?@cA7O#A`TNA( zZ^qw0iRtSq;J7cl-4E#DV&c<9QIKQV)Uiw?9D<&%+mPL}afo7ItXpgGC;aEsp zEgoO?Q0ueME&zzMmi#_N;ZGMt}TQ1yk6jLKuPn z9s$@s!T%n=|A7sUW%LJgsRi9jU}I)!cGd^j70>iTF}Z#?rqNT$g(y17Oc--b`z0UE z8XSj)@xFBpS@XY zV0=jQo?TO|^k|*V0ZWdp`T8L}-V#lbZ=O1oIyM5euMZT5iQK`=Hq9*%Cs-g268C3S zV&SH&@UqO;QG-tB6je#mb6(*4vxK6bZu-*Mww9>e0pzP2!cSFGDd;Oyxp~*A_%5*q zVx*G6@@?-vYw0D+KCpO^@L4NoEP}30)hMESottoPq%NJv9QyG*-klU>4J>}Bj{TWF z{4H5e;k?MRdIRM|vieuW&w=;v*Fayz+7pel3RDmz_(4Bhh0K&ORQ#z4Gy{6YFC%`8 zG=q*Bp*TYO#s8rgyxAlo%O<^tHtyjbxcrFNyYT60ecjG zz*aVgFhn|kV-ZasOpZ=zopZl}^Lm4=-%P_|Sn~cb>)QAwNj;mdFX9TY6gLX-8Ta2a8T+eoU$XLf9XVf1HfHd-@9oHKs8~94v#uN5FD4&tJ78af zqf8RoeTqe{^s=dGOyk{-T|{q8V>t340SCshBK26VvW;5m)IvyniJQ*Yj71SG1qD$z zX%pZMz#qpM1{Z%Uz+*@h{Hej?#}AmRgAw?H2>fh^Tl_&7el~|kNAywcIR*2qF!3;H z{^orND*Dn%xz?eO5KlK70~T3aNy5!i)~66iP31dbOLV&qZnP%>k0|Mw#^kbwP#Jd? zA@4Tp7MPj2 zfLW9=cyuJiN^iWMMF*Ot%Iz3rBmA%oJLrg^Gw;}w_G_GHV{_Mwsd^g|QTkGq*4*4B zrpgst@*Rmpw7|=yy;5F5Tcgvi%6Ec+`lRBzChZ32LcNM(dyAeQywqPif}&|&EPo0; zDIwzHl4dp)J-k{Esaf0+)*MUb%6nB^a9iOJpkDg}t87R$volXaj7T(o7}2+}K@@8u z6)DJHySQS8J*uEfs4?A8Bg$+P?YK7WR$YZHR|`b-(ZeiX)0ACAy$%Rk9m*=*k*y! zG^;AWVFUL!n^i{divu@fyOD=OxZ(T|;js)gs=6{m9!#T~KRNmK6h1jB+Yskd{;_-r zorL#4m*VmdB^lTQKM#SwmvQ{-!iBIs0KZmN8P;1Ef&Wnguw@HBj|qqM1V-Q&AONc_ z0SohC1YiXIF#!d(2MT?!5Ta!*MD+^Q-vb=LJK}vnNfrPmn6I!{BlX?kh1|R*nDz`; znf9<>`Gex;G+jEPEpyQAl0^2cdSSVR3tsQ4u(vfReFm(Q-pgTAGG70GR2|qBmj8i- z3{0nk5%}jIP)fNac%YK!`mA>ROTU$Nbzzs_bQ+f&-168Yj&sI&kzzyajK#c3HPf`R z5&6y_@l(LTrsezkAur{E*&Z7X+e%G*>%t0bm-X;R0Jjs4w~GyIdDlXL{W(Wctl~Kb zVe?duAuao}Nu#$E!X_gZxr88v^Nzi!x^}&Do4$=Z0Cr}jxz{Lq^fNC**RGkRCY3*3 z(b3V31BYv8ourhOXzHOFUazBWW;2%ENxHZBZgc-@w;(pRMBhKET#>9c*CMR4?mU%U zfCCn}?$7l>Yo_*?9yF6ZOE4Gm1t6DbehoY|8UNDhZpQ?;kZO>?I|qo+`Z;fvRZC`W z+Vrz)z7rWx173B>`pO^kkEK1BEdBDGDX5@3RgX2Hk%$dBuCS2c}a?X*BQ7b~?XHV0GLs16~(ATD0 zFW(iqR7`&v{bBE?K1Y!V@fmaOo^0&x5B$g=b`FDH{N$?pw2o?bHV(|EJQ8zR*CiN;ZNB-!(ih{KDo zD_?j1m{PfgY&hRvPVe5J&;p!kA|v5I3z*p>xByTJ+*oqiO{Vc3K;V9<-`R<8N6{U? zOx<^I_3O=z?m^)l0Qy+>=)(36;H!FV>39Co9{q4*PHMP2Aclv4*n3`L!G~r>n77_F zhTTEmXrKd$%yp!eEVu@HkSTSgpEyySt=zA3_P9)_`VJ7G<~u{y(}en$lIO?tw&y27 zH!atN8u2;*91b4#7BB)Z0)Hm3&a1dob}nXG6S1vpb`)FfHJjLS`Q-Ps-mmQn_;_Ds zGU7Vv4zO*BcKg`&Mo8e$^bSy7{QO+~>k0B5pgaAxqoE6Qlo76F=n>eNE8-dnUKb>; z?Kr`RkP6u&j~Vdiwh!ARB25BPgBAXBlm6HC-b4FOmyugJ+PR@T!mV4(g`+wtgU~I( z*sJ?G1E5>239%2@UjsR?1=fQF5;y|(Tl|KZ5^8H{?*Jsa`^C=#(J_Oan%Y|g&NuJh zL*I*8&q*?8mkH$#(5ZS&UsXENAaSJoIQ_*EM#yuyq36pD_dBR`ESE;tt_90QHlIAW zcJ)2k>i5#{nt#vvv`P5Y*YLgDCJ*I5U1GdGPPif|whDE}UHR(~1ODQbVVkP_NkHO* ziy`o1`zDF?QOa7`bCr7sCTMa8X!gDB`Op8Xn+KkoxjFB|Y7<59vaInn*he^SY3+qv zv-7?@?O_V*Od~iVovrPxJ1KQ-_jy7};%SU8x;%*P9$_eT()0dpR! z|93MfY^TY;a8w10=>NG1{D;8!udV2RxcTR(c?`e0*WH+}Xunc)I6adf|G9PiVUMTk zqDfemfDntE&NSm{>}lDzej-tycLt4Dizn-+IuQ5lIWju1%n-rB+A}J$a9@d|y`ahT z?;hi8F>YQ3)I6^YI}@`*eYkJv2fvW%_!@7iCQC0KX2^?2yQ;9bGB*9<4mD>>8Lk9oSrQ zmJPa}p}$P+Uq8)DSntv`jY_TCNgY?c1t#2x+=~~QcYw;mulvPcH=o}DqUUc?@3oD9 z`^mn%@Adt3e|z&bXhC<|TLg_G>W;8u(`-e4AK$L{eQUQ*RO5a3Nu9eF6SYfl-39h?Xc6c zJ;|q`OJ0utdFt|4*7)xirTbgf z`7doSP2<1)IlxkhFap020oYbf*cT5Y03-1G2*9RcWTV6w))yyJkyS}+2?lK@QC_?^Ih3R@5BGmO9=NdP8m{E_rwaSn{YFGv6;YrtX$ z7y%f8-$wu@Yy5(y9u^(J2>hW0V6w&^ivRbScvx3q1bzwvFj?cL7zS9t10(S35P-=V zFmDed03-1G2*6~GU&q43f+84!pMwBQ*7!Nb;rH2kSf618{zw8aS>unS4~uhP1b#sR zFj)iUD0y>tTI{5%?nsz+{a-l0GcX zff4ux3BY6xn3IPQfD!n81YokpFKFsv(GiTmA4&iwYy6@3f1inmbrnY7ryu~6HGYa= zfCW4-0>2Icn5+Tw_WybWva_>`-FOwVv@GB|=*P`JSnI~_82%t4;6R;i9&tAiY{-L{ zh%8|+S+J$3^|n_PzT1%X^hbAqiH)zj z-FJYB$uuv;JAh{H9iV~swpaV4=Gvq6cHvfeUemX;SYQ`x_1~HpB^8EY0{|oNOAt`v z2s7vvf#Bm;09AODm5dXwP(#c|5N9cxclMM{7-c3UYw)z|)f91bM(W_{}KwyrRomMXaUjJ-)^R5>8K%E2Xt^>d4EiOG3uo4i@P0Un+tM^8mz z;B=KC;`%{;nayjI8bVVf;&C!OOZyF#Z&ZZQ>f>+V%Kb*UuPu9>asnU((rrt~Q8kqd zF@6INc~b1vnFEeg9jyT?$wxPQhA+wqbFn+}9A9{0x}BA&4_&L~ZY5yZS{a8M6ZuPI zJSNRToM^=VJaF|Wa`vrj+3S%;G5x{;{Wo3FevuUbG%PDT%{UXtk6$PF`I+VmFwN&@ z8jXLwkm!H^t1Xr7yrdo|$!pwlXEn*7JeL%KPPpsXvq;O@j=a_e!M8 zR|IMrvx4CS8c5WLO-Zj&`_7%SHaS&mUZ~nh+M

    )j)l09Fj1p8qa_StVfTcurBhoWUY&U&r{GPh;K8}81@4Jq)D1?PSeyw~Cfveom6&Oizz zm71hgoWj}`zMHQ(9vn_58&2}SgiHI`I;rD+AbVr;@pe1?it+Eqm)_QQ0MgHQfLeh# z*NORqEv&1z7pniRQBq;73dSEs;FlnvB2!*M6EDwO2?}R@L=50_)PmgX99byR$?RuK zLzJAZ8zvt1OUjMya%yu_G4r=K5BOr-zfKN4DrJlY*Bh&pzh{T^* zl%<$v)^u9#H)1kTv=Qwd`>5)=<33bQuU=DaJQ? za=lJls$9>pIWjvAH%d3ER2qFE`V1SCO}3$M$h2EbdyvqV2%#VUq}vS*D{OS%{^_ALtLPN8W%QS13%_HTMO$VTXIO+`YzRw=Sc-}Tmmp!WMnE_E6SN-6wb}H3@y_(dvW-E0Q64eNhZUtTW9sl(UFctl<3z_ z-?OUS0c15AiZmd*ANdL&%+l>VsK@gCR!LD2IJd^V)7x>7ul%%K#=h6cm4j%OThHKT zj^}N^shXQcPtOwfnfM`P00EDvefM(8Eqnb2q#?j&Z`VY!odPGCdF>USI|JHF<^}D3nj|k86=-9fZtiLA5m2(G&Uw;l`>lq&HgEwC z-S1$Mq-cDzT@q%VerF)`_akqZviqN*ly+4cs{E{s0E&6@EP}!Q=1h6;Ec{vVk78h| zARio`Wx`ma-cb4NUb6UC}8cm+pv2dwn z6ekG8 z*PH2~RwYr3NmdK<&-jhz)MjJ}2n(NVu)Obis(mrj?07N0!QM+~%GPa)(Bn}lio(&) zVBegitTz0S7#-e{ILLTT7`Tz_WNvDBRoljSYiMfrZC5~IbmX~LLstoni=}7QN|pOo zgI`EZu6W17tM*U@_~N>yRC`C@=ZHQoR`K%uU9iB)_y#Nh^SAL4C0QA%1UGeU`PasPOP}wo% zX>(CLlq9yg=1Cl+747{bo7gjDIi5&g%=L+~oO-P(JhXI9UVN2ivH+9L91XS2gqH;| z`carSQ>aa>z7p*p+wjZ}>CF>(j0gLA;1E3=Tk?zhst@rMtCq<$Iu6J}48ysyMoL{J z$QkE6#5xCg|lkiU;Z2A~^dV2)*JW+|r~~2ov(pE(cWeC-uiCA;E_x&k36eOUP{j-)1iHWGzk zCuE)Si1Atxlwh`M)YEnaM&J{Wq5oLEe{-@-oLM6{o}(Z@F@Jre!syiSA@Cx$Evvvj zqDxii>NGWRb}G%keuq~2EqHXfvZniWH^Z!28kl3K1+@jWqtS1EqG)46e&f{K76*hL z-C2a|Agx_MH~t^`2>bJACIF0Da;=r74y)5Yf}_yM^u2K_e9|gW4Qdm=2wdWOMjxt~ zG{Qy@cU?J-(v#krqsK5pw}a_&0(D2)&kS6p0vQ>GG}jHUJUtBVi^jBTjX56+=ISD{ z<16|w)=1`{AHSl@*S5@9{R*tdW6EdI`BpFbsO_}8syGKBB~AQ7R-#GIuQV3(WlBVj z@wq*16Dlwru2w!bS;=j#tTdm^N zAqTNI4tV-qsy&Y7DL*;5gIBh$b4&Z@U|c{fTyVP zo{8DCBoaQbN~llxzDK2`{vOBOa5r7}FHqmo)0a(Kv?4L` zktiV&ZK=SZ#%vmeJ(1}3-ty!@Vat2#@9QP5FD9aN)FrK-5|N0nI2{WFcyPuUlYY!? zF-D=Lkj_ZU8@qLG%Ccadduw00C;eR0lrTCf?CS%lQk+C%juU`3_)s@=HjTzNN7Dh*50+7lkqGFj_0b-fO z!Fp+)<^dIKT~P^e9tYRAaXJ&6@m5a<;6Tb4t^>Cn^_~}sfGTWs%%oV|iO5xfS%ew>Ulwwt>&k&aAZr3n0 zNHxG&H`%d`UW`R*b(=?Kp?jufD1Y2}I-~ueNWHzJqd$zUgYf;e?~OBPax$TS@JsqY zJWCBZfWMaHQqg8NyWVbu(~g;L+6FfIOKGxinSDTle1)84YA&ZZ=Zbgef{DN@N!~^{YSC0r{{+$9^Tja^{{ z@d7Ec<{h9!u5%Ua@|c~jcuKB6MJ2b+IDhVYBd4l;8<~HOox~Z@*UNFAE3tG*7ENn& z94fhr2aDm{H;y9xIc>O645ra`s*x7jShBZ$qg2(#x-8(i-fJ;FU%K34<+sns-$sgA zluc9-JX9!#L)3da*Mue>ZC=&uZo{@#ag=r&bXd}aCatOT0+f0fj!)*K;**};s_)D@ z9CI}Q^n7|;1S!*C|0Ac_znJO)6X1Ta0sEOl@t?nM4$#vGLwGQ+qq`z^Pe&hsmUpS{ z#$tlr1Wbv<$}&^8-~-fIGN+8jj$+GV!BjFe>(m{nTXEk3&4eCs>mYN1o(zVogw=?y z%1XUeq$&!{f`@DAG*mR7WK)|dnXzbB-18tlYS^aC!I1$iID?+>P|GzJC7e2-QeQ{Oz1cKyOm41)^fqO&ul3P& z{)X4 zOS`zZoFq12B?elWjz35zG%?(vsyCzMy*!_}U{WGkB9)_zP%ZTpp?Sg$H{+4_vZbTG^wdypgbv zm=p-4n@H1wzNPUvNSBz~|1b;A?6B#@tGzJ3jNQ3kHX3}~+r1ai#!^)ti?F&<%2&jr zR+%Ol#bBn5x*Tq=={#V(GL^&_{>X>#fzcZ<0290;jhYb+cn~gwr12Fq%iKCwdn39F z)L=Ro&34sj80}#?&EdsdJxU#@pF~nCyL&aR=FG#rw^9*ecZTDy{)$&`;b^o;?)_`c zvuO#q=y%J$S5()>{R;0frayi<6f1+M$BK5Z_WD;CyQD8ItoW!GnUN_s_(Pv&)O>Td zFN7}ybf29S^n&weI^Kweg%!Pd)kzagU|17u5b&65OkY_PPpY@Dn1!4IYF-8$`L+S`Mo$H~$s zD=d$8ML%v6fQRhuf7?~>z?F?!uYa5^yg>fpCM|RpVcL{cqzm4bwwzfRX z!|2WZ^hKq;qct?@Du=RIrR$e<%5!0(o=74lR71n zAR;@T4V(Fi!d?V%KmX5woBfwQ9KI@}qjG9;tHk_X&D`?ymr75HU+6qH`y~A61#=N_ z<(=~~`N>)t$IO{SsR1i%Df8R(665n4aKjgWLPnCA4$elso5d|9s?ofW8ugj3GLUI% z(?j>>!lPZ>$S(OHxmk;|K&w&)cDKcP2VKxRfo48|vSrn(UW$8vW=|WxjiMkb*v>yN zzoUr!cE^Afg%DVx%^$Pa{<#4kS9Yz!R1fV9-CQjFjKc5vQde%Y015(TzpeO+Aw)@`*^vA*8R< zaXIt~YoMZ(Bc0A57~7aM4=61CB|+YtCJcuW*h9h1NW(=t<>M@?GGW?OkA+?#f{kEN zWQat(PI^&@4}DjIXA?Gy4F?hIpck!N^jB{SBbZ$+aOM8NXHjh5*w7QYP4qEsOd0e! zzy@(RtOC7~#Rsd@Q3eZ?T{g$4{w#B2;su&FY9cAC`g|Xkp2u!?2hZp#nOP_UF3n9V zRvLa<-hMww@+j;MKz9rptN(CQI%xean7Yf@EJ=+dNWksNP(UYc?#HD{Dgs&q_=$rc z>UQ(a$&TjfW0k0M{)eolZ`#&@nd)CChtzkWryV4NFD_SgvcrRbx@zUo>W|g%T-@{o zQiU;!nPu6BGgMRFQjl~J6l&-cL!}%w!DzUB4#Ikw8>4lFE~*{vMgfu0vLLd052ceh zb_JmYabyMrgqsBq9Hm)OPNE7FBEJ*Ts&@|&v5|SP&BC7JEEiN zHWThH$LCaj1%4HzH0~mxj%4C7iI7^c;Soq4lKnBowHX13*OzQOnp3~0$E+i}5kgPn zzFL=*lhoTat2X}3$UXZx)nmi4WV+R0Gxn>Z{1+(0aWAkIR0hP)mVtcbEYN;-b=+rY zwZ6#Q>&KrUywBWS8CN2^sd^vxce>M>50+>TGtqhmg{7u)uIzHRe5$XX{@>p&^b7ur zu;L28Imz`4j_$uuPUg3aP8r()k=n+SBMr@(-4#;9>*n`~F^4xiw;Y3;)m=4+ZSGVc zRW4^wK0MXiftd$YNs;9?6ehH*m#(M%oO9wJ#7cZAJ%TPS4nd&;O1WmkQ0I~#OcZs} z`;x>3DK?_$Z09P`o2M@^K6Tq7)(4FD#F-sbQHIxTOK>Z~@3f2l+$)zvN zrg%SKQ-ah@I3eKXSEnhfuv8jhUV#hKNQWStkYquV=@;mni$~tvYk`$%Yb*@Un!Lm0 z_pQU`(n)4sxRl^9AUlZl@7#J@HtbKN^97{%FIbxs*-5PvtK>#mGFD-%?Q5;u<~TvR zEuJKt24?bZJfn*S%J}?1Lf9B7KS9xv{7z85_(fM=xp+^n>0K{^e_^}k7`A=CstHqj zVszU|%vf?U*Z>TQ>82?jtI!@~Y_tQ@v)kW{^A+szmf?2nN<5!Mf=~;~k4$_GEG@By zVB6rMFI#}q9;F|g6kWa^YIWir?zJmPdjK@miIMPDt|`9NBbOM)3BBdp)RrzzKHkOM zCFYhg2npFd`$oBHt>F?b1Yo7vem-_QShl6u^d*622t~{9=ov?x{SqT9^cX5>U;WCi z(;S*Nl)Q0#!#VM7_LHA0{b73@<4MtJwcYe<1(wKJNyp?d;?R&vbnNCF{a`AjXq;EK z$_)0M*llL{e2)y`v-5^w>dQYS0GqV@A_4b59!l7Y{38NdrfnD=cL2%9)u*fkuUnFG z5Mn~@_ylg0lic&4*)l6-zaA}k3Sful*uuFL!SGp>)ZWDY`S~@bzWKvTV$hDwb z+aTEKIOup%82Ng1h7SKCWtkS@A(NCRoCCc{o2l+wU*ldYv+y9E2xJ3TzM~Ixn-t*m zE*xJ=1rhOP7z=S56Rwpkm~s*FntY^r>kfR=V3*GlFUBJg(UE*<{7n;O>H&#Fc?*1e zwHKeSzHu%q*rlNrUr&%=kl5q61cPY}I@eb;@B*PWlx%b&B-$1TrEV5qa;MxQUp{=8 zN>c;h3&Ba{sgWItF3%a}rOn@JyBFW~a;*EUq5-9yDNZ+V1-u`h<)K#?)$j-|OfvFE zEncOXPwp6-nqKUTH>+43zVu1TZaBaXOi()%s9cH7+GZ8a(&z4+?dr-mvF6fx?8a78 zgqa?v$xmOdEn=k~XfAU=V1z(8z_p&EGQMkVQ+oD&`n_QXOQ_N1az=6QMnH+@Mf2&xQ>0kca&BiRz2@s1JA zo@4Pt!E0)LA#c)(Ev&AmS~e!?rG;BD9KDWO?ee2Z`Us5P&XYpQoU;f^HN;m@OC1D> zLo`-7umGDC79;K$c+=t+cx;AdXC#it*)NR>7qR_Q6O$JqvZxlS8f_z)VV{zP=Jc(s z5s#_LfE%2jKUk{ip{u?2gX5EVNUw}<@!*)CbOQO^VWg*9v@quLJ z&-Om*x5mcl$bipq?kcdh)~kFcY>nGaa0>*gPqdj!GBBy^SCgRdKIapVDE>C3juiI3 zmKHtxm~=izT56PoyJ{jXlY6cf)x(s}HMR2T9y#BuZ7I(+&^oH>!4Oxb8A!nJaFA(VHUybYc>Vje?6;O)p;h)#2I3I98O76w5eHPJGPKgtX5m zYm%y?yKS1AdKdV!Y9uX^aPOVyR=IzCJQa-6AxXS1Agm2kDig_z8=;bAp4R5fByUc5 z`@|=Ukl3}Th`Q|5U5YvVcuO5uL!-4U=R2e}dP&}onJ}E6Wvdh6Bv+YkvP`^!X+n-Y zBN*L6jZDi?+qurr7RQGmW5C;9L{i;Xpa{@nl+s(pC68o4a)&JUsEx`9{L`?+EFWej_V5>xBuLXNO~+6=oa*3 za`X%3RJR3YCVG?>z=2Y`XD)G_N?>Jd*up;cW+`&6HDsjX0FpHwQQdaX2iFzmkhjrq z;pW>u@p7kkT7)>V56)PfoyI=75R@Co=)h7IcZMG(Y^ z!=ySN76koM5vaq$wm1XlcRy%Ae(lS2B42y3rKS;|$?h5&rO#=fnnjaD>Hc2Tk;${5 zrTAmRdSi${72qiEp_N|!DIzA-qA_G`Q-d4Qkn;Mb z*;6wO=~^U`f=Eqi`*a%C^R4i)%R2829kpqcRTkhagTU4HdcjBwadx0Aw zChb%rbmHmm1k9Hq`JwXTlZ7cE^q=?F%K1Nyv^qSh$7mTdq?)U7`gp;Vv>XfH!%BcI zuhRIsJoWKH4+<(A!O0+yzYu%lFagmo)NqnlUJE7sg$ZqzGNz zs|6Rn&D<=5~(T(`5M<*NsG6x5{4;Y(GslO%vL6=L-^46tP8T z8hpf(#fbovL+O+nrF$8lQ&SVYyGZ_FugfLE*29a1eH3JVmbjF>5KG|(vezz6UA*EU z2^UN@mP7k0b?FDKr?+N>&b|$pX>D)}5s#YysgrrWLEvPmyBSf`hhg=_ND-S}}ZF%jj$$HJLK_zKY@1EYUsZY$Yjyt1uCR33#Rp=oL1rCnC zA6yM7a4eE3<^5cUWx%H5B~Yld<*KN~R{XeJ4_OIbEnS7`LRI^dC7Eqyykul~m4J5; zT7*VZp6*R9lKU%RvN@_;`pi<*j>}kRVs*{Pnk^OM+A@7i2iwlQ=wDg3UXiREO|9L7 zscC~!YS``}GM8y@QO(SRxk%-YpT?3tQ8YEH4J~GAv1j@Ircf=?yZnYORI#5pDIf{V z{q2}`<32$wH>I9aMF*e1ev>tm9C9X8UENq4U~L%YnjS;gJospv+3u2T2<_$8<-vOPe>BxjFW>z&4eGX0!$=dkCNR=4v+0irr={A!riq#@~jc zVOl&#oT|p4t;bH8&-$?$12>ub4yz$YbjL*1G`xI>NLp^FOC=<<z=FJKX-gYi&jKc+-2o(BE6jAQNqfx`i{J4T{!2^+e_?-Lw+Eb7{~&Kq+wCBZ z{A#r+;(S7-2KY5*t)`cSOFHu^q$!I3qWG0Cfw zPmpUKhU&+zli}H~unnmKZRQ8>GfIk0RrqWbQ3Ud#;U&MrRzJlA|iC=#{3uKQc(GRKntOGB?6N zD%;oT$ z#EIQh8GIXW3|o-rnEhv%nHOvch<^7A=~z<&nPzNyr>ir z6p7TE{mvh>rSUv&YNkm-HOE-Alu9D!P~}x=xU+l z7u|68qRzSx_&y{)9wm)Cvplt3-35lhQCSW=Eiyi-o zz8&z1QX5(6{If=;jV(8ohKE~c)t)V4c0w89_RkG`!{P%@@i7be%EyD-VV0>p?~Fe9 z%gMXpZtu5=389l;ECH#mv-o>iQ}VjtbEnir)NY%`pNnF3%ZC`cA775uLsz#5V%4yv zD$CJyB>7hsR2)m~F^VGrAsAunbTurzYQ>|-w_%E_0A{psaj()GJIldN#+vq_j%=bG zcV*koPQvG_{+u(Oj3dR&()AL`FKwjV4}jV#`GRk&M$(Nc01rrOY|;*MEScA8-ctjR zMj=vQ-DvWZRFNp=r6sXQ>4`I*9`Edje5h5=#KBjm?>8xA#Xi4ap+2&m<__bcO}aYU zy7Y~YesXCliRMdcl0$#5%6%jLZMABdll=0we7naARlAKv>^KXwMIhbI%)VBd(S)dv zf5m>p)52P{qM`agHr~Zxn$&8S2Zh(rJ4|8?NbF!IK~oCC@&9DkQYq39syb3=&21_0 zXiaBG%dpm4KU}h@qX_1hvp{0|-L>LrC`r7N{AZF#Pjxqkj%78j>4%2l-8VZl=y6-M z&%V!YISt+BFIh98Wq4hlzk?q4x;?N|6a819VE*4XI={dbxF;2eX8cr$XwUFMnv_dO z0UV&z$_5^XOh8%b@&tjyzLVxI-KaW{8HdQ^Vi18NS|)f8J8gPvg&wqvgh zYNzKKSWp_S-~b{2POz}% zMZxK!v`_DOet$AyLgl?edaUy-*s2t=KGXElU|;ovb$;1Hp>5Ech;<@T{cB%MmG^mr zh)VYd5cqS#`9GPI_CMB*`9Bo&e`Nj3E+vEC?^4s+Z~)P2S$*{^$)>mHhaFW-#2Us=-Otgz4Tj>p-mp4E+`N1uvDs3+v_qQvzr4BZqHU07=QI zptK60ZkaJU-|k{mpOJpc<0Jrn)nYHeG>1tjRU@O376X9#4rIS5Orzq9t@D=Vapb!Q zNs$i95q7h3S>EOqh>E_g8n-8_u;*RdEOKQ#x+eu0=w_LhM2#}juK1HVZS6P~%N z!AK;SVl1zVS@i{@x8x40TaLTC!J;PzVI+?}=t;6r(G~~@+16$Y6jF9{uW{WqaN!YG z8ST`U1^07*19RwdkbSZw>$Mkv1Q$H330i9Ltf!gUxX|wauE)(CgqPg`J~^!`j1n+K z4nbM@ouNPA?+VtVE(NmK}|LF}EgW2&NOb^ri1(*?G73FF*( z7E|p%nwwV`(HGu{!+iG%p*s@#jfr2fsOnmiFBpSu^5lI=Hp z9p;b@_A;`}admZDEeBc+l;3l-hTB6#gT{vQGlIljQ3gW{#;WyDH!8+>i)VGj-@PV! znSRtkc?Xc%XA-ZzdY)RT_v9j01=7l4R{bIHzSF`}JD*@2r2W%-#iwMiB%d)5HR^p> z8RvD+HjhoB_T-d_rj*3visV4q!2)L!=8U(gEe=$_dWmdt5iN2odMQP7&BJt@)=1Wuju@Q#S{qG++79pHBB8SX!ahBMo(W<^{|PSB z=tZ)ue~;gvAz}M85xFhFT9H%CZmb@^+TtUt4i43gq@CPQ(YrxojwNFi(OFqg>W?+Q zg>OsIQq`o`7CdiE(+zE2Gc=(DQ3hsxqk@pkek|~md6FhgoqD^*ho_p9mCpJt>&1egj`3J#=kHa|M67~x-L_NY_rJ^$I=vFs0jtTNKH)WT|-kd76MYZ@^ zUeM!32`PANrlILt{`8sM5cZ3-V_39eze3Q_*4`|B^sYA3-hR?2FY#=;DBE1?Vn3%6 zOpW4u24mkRr@OejQ`u;8XtKtl)QSbdVuh0_xq`hx3)ptRXz83AU`?=8xfYS?+9FY$ z3cV>@Db}AUI96M2|5;hw`2{^qhFt~haTP?)$C4JeC$C-Xw<2$EnUW90eH~LKU|toPCF#T&7dP^$=tUBx4hPXREBC zs^IiU!EXeHlavNkwjsppYib$^k~LcNG=%wRb?=hr=FDQonN|bB$u_h|yxya31_k%1 zRv)SHGvmvR{n+7NKb9%adgQD=OQ~VKd|tlDmq3TC%Bw9uE#lK%~1jIu`RTDOQbkSg0pW@uVRC&-aAw zlJ}Fmoq7#rBc+SfSi4UJ z$^EczFqN$I2`)y%#1|_!$!7)So+4vQdn7rjSX}%Q9c2!qIjV3;gyB#+c5N{!HG{E8 zb39F+ZAn#gc2{a`^Cpd@?y>xOg?1f5-ZxjGD7yHe*0$KO6J zlrZSk%tJK08%7(ESV-t=46=r~B1HSNz3>AIFRETK3%seK7hGi+o-nC%bM`AM8+Y)l zoLv1$bhM%{Y=a!JW3TfpeQ61MqKWj0RyE3ZZcjd0wGTl~l7dzMJD|X1&bI6yXk8F7 zo&S*tB-fOF+#?8cY0060tSiXfw{KT`gYg)Dy?md$Y{wGE>ypBcMrb{gDJC#IXim<@9x%*jRjP4VYMM zk6ysOQkQ5>loj_sG!|`HzmU67RQ(Y9q=Tb*tGkDD4TlH%ZvSG${b^U{>Tw^IsPeNB z)yEo^S!zM_kd^3zC(r>3ZTBIPub;Lt4|II^@Mc3x%lYt`%vE#2WU+CXu6W_e>x8Fw z0O9ZEmRjHC&6UhVs4HUH!Zy+J2H8MJd`;wKVA(jyM5(BdW%AU@S*IPdB!-HZM%dZQ z2dx%^NA3`9u+(YwMs9IN2Eu^8oSFy^u zVu+ZJ>{G~P6sbq$?v<+J9UyRFmfbTbBNG|3)bW>0x=& zenz_v6&)%B>6hE5ksCD%)*$a{?}x0%&FiT8#c{6YqM#B99RV4)Zs_yjJVkWfXr}Vf zOjaTl8YyPL_Vhu1uwdv}&8j|60**z>!P@t8V(jXXXDZKEF+yI>@O3QrF=1&{KdnxW zlAw&?^b3v66lTIw`qJyR?zFn`Rp@;=wM!y%6$#D1^C|bD`8T$bG*iQvCUrs`?j4 z)U2h)JYzRn+E!Ci4j;(Nq+3MDDu)fj_M}sZhS$B>2x+`!-5?oyqkBuWBdbYBbRB@3 z1q@OH_?^%%(<-Z7(^&jFFyI8wzKHQ`Xo!h3p~z@fN;xEDvNu;-$49tvs(S*12U49o zU$Hqf3|4O~fGQE6Ua>bsWhU~SAXl5)-yPS3zCMg?+O~x{c zzm7`c-1ULxD-1gLOe5-%mII_PfoDdEjSk`CusI ze+f*|e~FA`s~I)X#`;327+B3nrqn#;dGVSUT}}5O8E`9RZW8DFUqg0bt*eQU;B9b=`8nl*obtVKk_|A!)=3^cAdzS*Y+ z@?Kw~?DwS|y+0P}XW_jAxSRH`i^-3kNILR@-iG1@@$4Z}LNZhvh9eK7e}1uH59f>k z={k8Rdz23^`{6X!@2=p|P$=~Y^R*dCPLx=f26WXp$*FR8G%Z_RWlp9_?to})?pL1B z->_>9Tu3b|hh$7zm{wB#cmx!^H9I5L?xiyG34M7ZX?)z07Zz`{~XM_PyDCIy*96F1y*BgTGEvT?>vH(QM1p;Mx3aLZzG z9Dlso_;s}*s&(~cMZrC_Rblt(krKVT&N77p34i`j*fU4Zt#Ow;pXm$FS$L|?VbNuL zkd&vX!Ls<&X@N7pRW`5q`5u3l$gBlf-dY!9&3LxHecbUk55XLQh=tGE%}7RZycncz zVQ$wdmT1hgHQ~=aK1sRRdz*wQxaY2}S;4tEXrD3_(kvee77qQq@H zi?D}{RGt!9bv$GOOTv_ia|Sh+A>ZxW*)PX3{j!kE3~Z|mgL^pqbc3TC`A#Xur>f!; z#m`>_>|!&C(L$+X-$J}<4V8I}-CaG}6w?TQVls{gajOlL4jjDD3MBbp7_vMv1{YJ- z87NkS5$y$iqK$gmska?e!Ll-xt?FSg95RmUzQiJXg%@P;F~tr9S!FqGdzSg5N0H$@ zb4O^mE6`6N5;Kp4?7+fV;xHs^Kyy~Jw7@3G(1dvo$_AgrZO^K$oLCb2Q&?MnVMH?i z>Iqb8gPTRip|{Z10yx=^qSA!sevdP*tKQ5J&;2Wvy|uNU6P#In<%!g$`H?{VTTwU` z->$aW=Tnq%I$oXu;AH+6hWGqL!g9&^ER3crC1N2V$t_r~RWem^aNNlKtLM??rnNT) z#E_5{UKZ2ac!J5x=3Nxe+;cSyzQ(9aeKOeGyf8S;ufti=NO1^Xk?_vCQXSPAKh5T1 z;hXKtyVU;-mlmC!ZJ3|7Zd((a5v>C-PhYE#QVlVO_H^sfnZ7-ul(VJf3laqusktf@D*Md& z)(c10lwmH%i3Obf`P|F@YfdV2l3XqK&f46sipqU9EkZ+68q*Vvl%;-DpY8Bb9y|;t z-W5X=en2obp!tfrB9J-+j242Hl&JIzC^!CxS`7c~$g3sZNY#!|HI)0&MhdCfd@y3)93qr45f1rUAuLg~3^8v^ zx|x$r7FYHhCF-@;WgS=_3D?o;)JT5&kZ0YQpVs}_T}@>{nB)BIAQ@Y9n%OhfB3>0j zWa!)F3=6YEA^9hiP4tv*$}sF7)jx*5S&gI#`HgXoileS7>bbyxDHyQ1}l+m~lakDKUSy@(Hf}_z+{dOT)o(I}o z^_gXF?ocSat>qr61Nr5yc66J6dfay!uE?H;OspXI05J1~ddiKnv$(GfPhXOhL}AYS zpae+nLJF8t>#&6~)w3jKT+!@(m2Y&6Oe@-tNG9g{fn3tZ*e_QOZ*6rTz6BrX4sL5T zJ;F8t6V0Ojk4IL#`18@}Li4|fgV3A$wZ~N!zwJI7ObP3njHD{72zgCik{v zG+(Cdgj)SLq7Mx!@2<3`O7Q76Ia{=g*%s2CzvUF;91}`*mGX&nHEcF(TsXJm6pXbc z@Ve_P*BLw45*O)<^;_ar_QZ^l+>QiPrEoCuvEVH0TEBg_jovL)FD~ z^`$X_OdwWF5zDWa5_+5R8q-~zF`>@)w}Uoo70v=3;G@tjX%kmTqe-8m0-^Z5yR}8t z?D%nyE*?B&oH?V;TU_>d7hNZymSkB~3jN8%%<1r46dw3Gtz&U;WqDs@I6Sl)q3P~Y zj)E2DLe{}u5HSrR3>>0vz1PV;R>uU2X@8zp5i%v_)QRz6t%Vds zz35sf%z&zb#wURy%c~5!X#GfH@zSW$9a5?-S%ZXL%D664-<(!jLeN@d*&{AsfG^zw zBb6Zr(V5k5COWOK-h&)Ha40jcF#W?sDk7NjKO2GUtdC5eCb`V0Pg}BSPV;)@B&07@ zjlQLXKTh{a2p`jWNU!NcO~k0}YpE|i)B7tdl`>2YVIYV4jOsTH7Z zUaBT0el3R!MGZP+qo*&NTXNI5B5PAM(n;)4?m?tB8Aq*tfu8xFWBLBA*$RFy9`&@m z1Cal?15^T|oyX=6x3R8$FV+4mQ(AH4U(ec7V_y!{wB+fVdJa?HGh|@?+V_+UNU>od zs8(qR>6ELZj`WHS&EKY&tcZESK1n*{b=rTBeFxZv&$YXBN;Jq!OV!cCQ-GyCh9~*f z*SBdcw$=IIb&B>Afi>ml@#;8}C;89^Eg=cRLdN<&h0xeujInegHJ##HH#%d{ z#S395r?VY}EeqWNpzU?2O}p_VpU^`{Ib9wT+MwDdNvx{t(la^dNmV>yIFg7-iRB{$Yp<11N>s??5fL7VE{r(^q)*m) zyT_}qBnDzWdmFu;i6#32OgwX-8cPn%=;AUeaacV7f5HU7z=;l1ubvnd^Uish??t6g zZyGP}wC=>oG=Zy832%;7YF{r0%P_9)vMgG}f9|75%+4AX?o}N3t~pxJK>PGwE9_B$ zd)w6ridgZdlG?X#6Jtf73aX`^;y>F3xDIEuECGHo)RRqH4}t3$oT5dhu%Y4?F6tOq zZhC01335za$(sFr%5x=>Zbk8RZg5R3KD5X%mk@NBY4MXMjanUpG^p+&2cQDkw{=jW zQztj?$rT6me4Xi=6moz$`eBS~Iu1WfT#6>KK$<7}ysw_aJQhV3rZBRwaOF1UFV307 zR2ws+J?-kxB^^bCw(1)CdB-!`hXIVNE4IOGbS?f{5rE(^;@WNz|kYW^q>%pp4nCZ{XqS zeNy|a7ZT%17AXV!`&{8V&rHm04AnLyPske3DC^?}JA;upyCcS`XQ{^|EkBJv#p$;d z$d&AKK_vrhpmNU&NBd_M>Bv|`vdfF^pYUy;e8eQ9W%aJGU zRk^KvDu-((K)Y#X&!C1TA~`IG`e8Y1D9|dULO;aD6R@dq<=>EHX3tW4gndE5$m#@f zTRDY3elrd!(CUfhc`CE!)AyQJ=-LF>UBWF%M8nI@v*Oezp%B+waWI=dRy_4BQZSg| zm@#Fq_575~Y|1!=Cz`a&kOtD*L`%64b}Bvxb2oGh4d|T-S*j*vvD~fjOnSI98#;~0 z3TCXHdo}`J*M9A}jcfSxF7l*k_R+l_QC{!K)kY$27Rw7Sm61h@;B0D(&rs|5ABKlF z@R=kX+%hjSBi~p~cm^OfT%AIl(=&QPyoPOwf%BmhX!CwnswcL23VF7n*=Ybbv^qR9 z3~5pWb4mb~sI(knLgIgp5&p-6fB*c}5%Hvd9D!fhnSy_iU8!L%x(pG9q3vU;ME4b6 zV5Gn36c){tCYB+QU_=!sG;G4P`tETsUnwa%$Y6sNulEYc;SzlA8BEsnvGAn<9{ih?vb0LW#}gpbUeowZmX-& z>=0nod~^}5X6n$&l< z9W0gM&UH8`(7clzQG-gVCY)jZl%QG5Wz8|nDw%w#)I()q_O<+2dR-BFwOnY^wAm{F z#pk8(lXKKsyEgOmHk>k5B{3SrZ-`Kd=*u)l_ty@r{XcMR#PF|&^RZE{qVih_9?hs+ z3u|gDni6m+>#j$No^5Itx%h8`hA4|G-e9J~4vyRHTy88No?aSAK$B2IgHb-cCh*ls zho8nz&zV!?3~7qceX)v|u|c8R``;+R^T1oT55eEXA9vXghqE_cIbud(9ccR(ji!&sq< zR%_H2FJ0d*UVZaV>ca%bUT5wM9hpB~O%Tl;0)bx$fnPSH_S-wgf3TXzI~F-L!@7~7 zm?xZ55KN8tE?oSvn4>k>Y-!9f>&A*=9+U3~jP$c5U0nMS$nd4cdzW@C7g=(&pT5@fIwfl(=J55Jb$E%l+ezjojI?NkE8DmdXa5_1)8mf`PH>uW{OR|Lr;25x zsAt+h(=u8-W5)-c@qD)ovE>kC>_D274HapMp(01N$8($2kst%)nU{ndPmZxAjPoyq z=pBylSGkPgo_F(r$bEx?#w8PHiS=n@({X#9&En%l*V+e(a4|j|RpI9+Uuek=7`f(q zOw!p_c;r1o9?(ads3;AaiFq3%+$%pM1T;#QJkq*^&|p}He|&jDc&~h`&3y0|s=tU1 z4uQZQM&Q>~XZ~-f2EIR{dP$#yoKN?FQvMMoKu#&FbaF9li6F=jiv$B-c(YhMG6jUk zk(4|%rkzAEnDN}G1nb8NO2p?IaU1hmH_LPrrl%q+aXIc;n&#wsQ%lEhOc*o5I8XYp zmNmChlo`DNgaD^p9RZc(d){~eQ~>5kX=y3KFGe7MK;Z8p@XNPOe-|M{0R#dF1pW+x zUpZR#hpoRq!wqpR0)bx{fnS>a`juZi;@J@h{8th9wF9XL>j8lP0)hVm0ti|I5tcz9 zfI#36Ab_AX{;M|H9{?F~3j_lHW(Xi?4MhD%Ab>#N4h{6a25D19j{bRJo zl$Nh~*8i_-FuJh9T3bJO>*!9P__uExzQbJT-Ped@~Z!xx%~$=y87 z0hi_h*UzS5u)>RxhACBw<7@apf3Vt(x!x`l*5z&a9iSl>#sxN02lww*+}`RPrd><5 zz`&Lb`&)N_P>Mf013o+(K0g3-+WPvcVOioVI^YI24W1vp0}uoM;~s;z8Smf=g_O^q zpMu*Vh+80lLVQub)jdSLP`g1iAc&S0(O)1sR)l$gu%i$LC&Fw-_!kH-3E@s6{B8t$ zfS_3rWD$Z@L(q@^o2W&QMF_G8LCYh83y3HTB9?>*x*?*Ih!`s(7>x+2BeVwykp@C) zg3#n3ltl;$7DB7`e^h-4vIs#IA;=;GS%e^q5M&X8EJBb)|2||<5`)1vaC4W8FheZ7U~2M^EXv`{ldI>y<^rY#&TS$DcYsepTF%!dlQ-cl zt>XlDh}61=(xUZ+WJU$J+!NY`!M7>)QqiV^h;(Sjh_M!+PZi$Qdr03(E1 zu!ArQ5M}}3zuzpVYQVp+^$=t!W*MwJ_9gu?9bzzZHO{-#fXNnMR4!J{OKxKj8E+fn z4%J+{QCn;~*eUO<5MS=Yuv4;PhuClH;COI{Mrd6t_=|%_z-KCh^eyU>_zh)*h95!liomL#&37*0I0ruNGB>{x0sC|NHQ; zCKwU0MOa7(3khK%AuJ?>g@mw>{-lyZ#N!a$Jc2ku2>lQe#^3a%2$>E-rh_0(;Noe7 zS@3UV78pVw8%WOH0m=w>?=N@$CXSDU>s@G^VIu~mxDXn6=p++#Fvb-+f2|NUAS(p# z465Loco$2LH|LDw;`5zoE;5sSo*fzXFua4Qi@-gVg3yL=yo30Xglo8P|At|)>=1V* zSBC^l$*{<4!hcR4J$MKBbLITT9pHD`c?Sd+gNRlkMpq!ljUbFJ#K59I4c_`EN2{uP zQKg7kqwzm~?V(|Ij-F14bo1gKGZl~@bjZ$o)BcXgptfLUC2IJvphR@YKR83|Jss!6 zpJb~}>$FMGP}8wc(?P^aQ2+B0&~-eNhKV@rpHo~09Tx`a9UfexAS+D(&%BtV_H-@VwmZL#0}zI{V#s{D z_u?hZT)4OR!~5<-*bkck5H|ooAAnj00K5PoPk93f0Z6-WV16-JzI^%Oqv8e-`54Yv z_!4zM@qx+);0n!AAn_N&0nt$W9s$+5hSK3BoVgi^d$`^@MNu2V=4HFLnNaMQvn4+4 zgnPRoP|QkAuIqHiKO3lA5XpG_C(CeOyG+^eIov8b!U-u!Xv=rCYmCV=&JNB z`K>DlwXtpLHdY&20Pp><)+^PusGQfin}d2#P@o7YyhW!d3f2no@SzjL0XXCN}1H!_^9d1MsDqaZN>bP zop)a{5)&#IF?mZ|^>6v;kMC7Z?|(jf(XH6mrxlG#GFkh>D*^Y?N39e{vN=`OtR-1! zA(+6}DXJ>sQ|DEgJ4F$Cva7kslCD;k*oSXOnM?6e$h-s>C9jzE! z1(Iy7SHP52kW}sVOHAaEP746>-3t##qFq3H_40m{H&ghi2$k8GJ(sFY-J;UR#g8dh zyZ|`aXcP7J-IZ@)d4_agzM zk?yLG*F#E~-uk-Hy}Nml;{l_AjQy|{GD^`jBpKwyTJ)1%KP2@k^L1TjK^vx=3;YVk zVAKPCUOvqjsMUH`FjtqKuJ(MJDEyns%SK8k`mxwohnD^|ZfE5Y+G%;luZ&cX0X`E@ zO&#>Ma^l_XsVOnLu%d5|qLnf3p}zODaI4653wmDQu6yX8m?~vC?b+5z#NGjky= zqu-|q=oryg6)S9w2R%b~e8>D;ZcYLVlHHgWtQW`=DMhf=tS%3&4Wpn?@X?9R7yt*y(xXfS6843C& zTBm>Ho#yQth?R_`1qrS?h0)L9Ygx&%_nKG`vm}*~D$FyL+KL=}pSH6Nvk+7t{BWAR z6n3eT7I^NbIvo{auU!#ZD$FT;Ft*L~JQd6@eq&VBe@Jc-b66gbY34;dYh;8cxBh%S zq?{i{(`z@!GjP(&)}ob{ml}ApN^%&}0c>^6hr+$(&J?3NfF>tA8=y@=1L7PHKJ zGjR-de@@oOr*P5-rqZTpos}74vgDxah`*kExrfvY$+W*j+w|a1ncj%|pg?PJ_A$Aa zj^B8{KY3S&Er3hV6IUjPHq>iCnJB%3dG_*zEH~p|Ex>k948pI9523C4Fzx{tIYm#k zma%c1@=b5{PEQS&{-qaS;>rBD$$#Wc|BIJlin<8QsSEQjALN;2ok;1>@!bcDUX0Tp z;wrh5J+@4>EY>M{9ae)n2lG8FkXxe5JQE-U;_u3KXC%uvm} z)%&4TL{xH=ShDOF!$Qs}O`d5o?xa!`PCwUBkso|&)-Hh){L!$jXDwwPE_sVUcc z!AHoPmMv0i2*c|q+py6y9oFr@$)7F0^l5A|A`?Abra#EI@XV=$c~gC>0GZCIf>sJR z8Wt}UZ$w7s?f@^bd#2M%t(j<#)QI#BZWz2M%&o!64~;E{GCKwO-@&th>8>(Na+l=1bD6xCMP$5G< zc5X;gH1&h=qXQ1#wjE!P1EpF@4y^plJ~FGSQnYbC?O$-YWs10PyLVB3stmY~13!vZ&gjfNcjHV@$;nvrK!V{Bh!x9LXa`};?~ULOH9m+HwkpKRgGY(MR$cZ@Q( zUSHqp@w}wSx7EXP_^{CVg7x3>S(sk>$cAKEToKmTeZ>Ib|x2`gJmVr%Pw?4-VnM!~y5 zH@O;*b7^==4lEAzS0+?CV1aJy#{QPnVJsoii=JZYh7YB^wC?~?9fu#@>!EKkwoty( zI$FO{1qK`7T<9sN65Yd@j|C1Di4cl72&SJ@(I2b8^%|`g`j-QHQ9~=xUbP!0+(qB@ z*VS^;kzj7UgMz*qft_E0oj-;KDdM_kbxayK-N1z|Xn$}2gv9WPmKbr!{g-J6_!~T( z)a60kgb#fr7MKPs!^JG|%wIgY2WeDdTgEjzXMfRM`!?$k!3_D1P){Yvo$_%aEvLZ9E+33xt8F%-Lab^jov2I*%F zJnB;a8TO?CYk=O5Wdc)Y7IJ$caZ(Udt+{E@YdNBIqe3N)l-PfJ5&-P09h((vXj8<~ zCz`nf%uCweZ2x76I>z4;!rS`A5XfFrb)i+CwkQtI;%jP?;3#RLZlBiq;>X{P$;cm_ z^oW3GYA2nc?}e>4#SQh`3;4va=P+3r7=9C62* z61%2b_ptcq&HArj2hAMRN93E8^j+1bos~Kwp(gB?uHUA#S-I#*j+IWXd+I4%YYKS# z_Bl`4!v>bBr8b$;3)Gf?H^Hc|nyk~mzL_h64DaJ!{e(?{H`@R9PJw?l@rFgOkewJb z^g}^B)x9<<;9gPxQ6bl17l9XdarK8!=y;#*0M#A9?%LrFT8)db?#*IsW<^}sHt8LJ z@#On&sqOH~5*FHg5sdaBaN!Nswt78Bg6N{w8L-s{_2!|^t;N%JrO!f4UL6}%RQ z!i_2v{oE63?{xhBO=-YCH#c=%52dK-hci1K=zy-U!TGN1ZO=bLW#dWl9qBhjVP)%3 zyi{sWfe$t+BhVDot8wRd0yCRu!#UkQaVl?Ka?ei4RtR{OZrK&L767pyJ3Ha#_dzWM z67q_eAg#!6N3#f~WLDPGZ3Mlf%;HLyra`p{Ct;}8T{4`i30h{nQArVvyO#_FOpAsx zp;GTflvcAv?f~q-p~^zP2p(>R&zEvB$=5fnaEoQBp5V>ING8pyh^FuUG43tXz;Ef- z>E)PuMJV9(&%EayK-&#BVM@6< z2k(qhv{a4hp|^V?Br24n? z>%2uC|D4V4%BtQ5Xa?j2y-A(QBQa_>D)g0tZP~A#Vj$My~(Z>fW&fVft(oXDEk#-w4 zM)D=9N3AC)S>I$Z)Mk$P&>xx)SB)E#`LEI*;~!IJ6B3;mD7l!lS6fA-Xidz#b{J}_ zq2p4lEfSt^8<`_IEQn7xRXyXyp5-dC>I%gxWR`OxFXQ$1u*lpb#;*%)<&|O9DmD;f z`iVOmkneOQo`G4ICFW^W%&cf~?q`A;_0wsE`wWM;RW$TxB*}bI^o9F?r=F4FKk- z)>1F<1J)b#SX0%13ICy1O^TGfR$9K2!c*Ldok6RhB>)YQ^l3@W7u157R zFSc5)d7g8dX_9V3uv%&Zu4FkOk^xDK{#im0)jz?79c`OnVUJCWgbNTi->lN3?B=@Beif%e4xqj?Q# zTerVJ*Q_qWJ{Ye#Nd8>hirSik>UK#0(u#Yr^uxm-ji)SCNIqod&79%u>sCjK&-90F z`WGGImKJYZQiv)8YdZtZKW_oC<(7liJ)SM2TZ*)rRX0VVR6J9B_302={c`2WNr7{- z?u+u_9r5@)B1#K8724-=MAzw&Kqg&P2Xzg~1Y zzk2##oTvZucJM#lt|AhlHha?9X}NEg?)ym&#`yO$D)-U1$Eta1>^<#Cd+~~@vqSDp zGGp=64E^x~b;z&(6we)l$8*zz|0B%%NAt_s@(t>Xlg9K;?Wx2&J_JqFmxWz*yK8_) zd;CbqikaSDjR|fu?7|~lY8-eUoH`464`4aGzTEDKf-0eHwV!ahIejNPvS}{e-`CE! zT&2%pjP5z?s`3&XVM{;n!!jXHhOF)e9fPs9qFgxDs)ozdXS6y+nbY`$6Wl%mW6POR zMr%DYn%=L=Mi*GAzeA1b#=A=J=@_ae)9RSgCSY~g)LdKj$9DaYBA3%i>iM^d=XwYZ z8vD?V7L3M7`%Pn8Wq`V@TbC*1-0~0h#p3SAtJbJ$us;1DS{+UQtN}N~Q+TVQrH;T9 z$5L}w!?04P2xUt-A?8Mw{p7r0uEuS5<5&5Je=*1Y@4Z$=l6Wom%G%s)>Lj6c!)!v6 zag>y*vWlAE?&^;W%Y9B#KK`CI=Ks$^-|qnLOz!|EXmI+e(E(0H=HB|H!94;-_{vVQ7)%;_+g_ND`P3Wuan+IAI75-!R{&R$jfo`GC))G$eYwvU2{xtTLy9!9RCp z|Bbgbs>6IT%pvt>-3+9sUMi5K7Z$t<16=tE=W~-{G)aZsMK|J)jkg3)qQ8q=Pd`t$ zAH7tCN5?aUOIg^YEzeXH-rVvo+k2VN#mWwp*0EGfYKY;U%!l?~CdBCoz8X3ypZ`sN zFzm(4yJlx0UTPu!>r~Z}zJPRg+D%p3BoGh079=?cQw7pX7117nD&!9Gj#D-77DkQ5&peDmFQ zGn`RNhoiag^|Or7Qk*Anu^D56n;}}v_}d>P;az41miIqdvfxQ&k$=0FKU!U(VqLn; zBE?R&TdI`3S!X9PaQ3}o$}uK`FzjrKVZ>psie|%IMQc~!r(%eNyCvS1fVFnSiST#7 zLO%yV{x{*9gVv~&RcyYym2oK+i#nKn3bXm{v? z)1PVuef-w4!J*A21i?yZ^4 znlzee7a^N2$8B5W7nIjVwGp>xp?=;^4N#~F7(+&4*L z@_SzI;P2SDk`+n4Iu6ut*3pmv6>0bk~@YRK* ztD}3mcHL~xaCN`;P}O^s!4B>Xy}yQU^%NY-)F8&Be2+NeSbKLmb~FOOxp2Ue%|DjK z)Ip**MieNJzX$5a{Q)am4lUfqXiZc-r(F-60Bat$kDvO#m(6vLv~S%3=DWrEI*N~a z2;Yz7cU`(2Nl%EBcqQi%LVU`GvK9LM!tD{Q-ErG>4v3#N>{HSuA@`Q<<53cogA*Ld z)%DKJiKFO=G~am7RY?P5(`^H_{DS~2F3 zmp6|!`a^(Q{BYfed7sC-JAgv}O|YsxT#OP1UeQ#)QCJdY)sNEdBeXN9dK)Qx#3@Y? zBqWj1gJbcn1@o;lL!?B#xS7GB#OIT$k23ar@4CWPIV8_Jd!0q_4?P4nuemlT>yQOS zFAd4M?*IYE1?$cAYymnzQ(VQMkLi?d;aBV$L@=UHhWXtHtPp@cC|(Om^(K7 z9irpM1!`?DrudghOfD=2(W?*_X9J%;0nCO(+oOwy*0^+kK{Fx{#n$fo^(*LD<)w$- zl8oYOveJ-r$f;A2{jJ=#wnshNAzcnt*p#@U#8r)Y6)fxYJHgak(b+~?loGrZP{)tX z0_%zr{o|R1X1}Gs$0Q;=cLV|m1pfa=ppf_wdK2mvaJ>v)9v**LmlNJV7RuiNJir^& zwHEE*BknDVeaP*7wOmu+5n#AsdyMTC@V9qVt{T)fQiG1~071}WbofB8hU;gpcYrpz zOTRn7wBBuG?Hyn#>!$e*uv-Z0QWGHrcM@#F>ak!S%j_2eZhWj7win<8Khy7yOsSda zWVkd0H>{YBfVXpCuWI$$=8w3{fu|#P04iO!rTt4{Zi=fdaCojRW05C?w*S`ribe!0 z4W)w>=T!)d+oDaCEp0n|R_i91<5{aAW@tND0otDL^1bcE(~cw;9AW&XGQ?-FvVV*7 zV*I;VY)lrW9pi>!+Q*JVzxg%f=*Sn$(cSz%*Zce6dW?JU-oW!|t z2#+w9?FBjGsAjH8vxn@+nUeSIpVg+_@Yj6%pV ziIR2FSS$N75sASt!dQlx@ys*7+d0?up6WfF>zvo9*yX>T;cuPv^5F{K2I7{T63p8u*a#Yl{;TWd4TW1a z6-xN|AKf~)uo5@DA%->>2?yEQ<5x)Ig^S4>9$x))N|AT3#+mibe|J_6N$eI$sMF?N zb&1pFCCFR5ZQNe+>LajbQ*mwDyW>M+*6U4qEIP+-OiG1fz5ffh@TGX?x`xwGCmxD~ zet+XYGZ;;W^)K;JlGq+oh|3_>km*O~5ok)K2c&1Ai1P76@gP<}I zbp0WMBio}44y=_h5{QXm+ko@sP|N)cngIP;m2gZmbUoEP%f~XMq*5D=*05_b7PH;^ zqUAiO)lt19UQtoKoF%1a*5K&&Q2IrF$jX4M(QyMmeb4usaf#>B*1*|q)gPvgR&q^4 zC~LU}PoJy2(@pOnxakm*{(VbcdH0@XY642%C+5Q1P-}11Ii~+w(uy_uMZzX|Bo0hb z7wGmT9<99f;=rv}vnF(mF-3~F;#Ye|l-I=lZ2k0<^=pYo-Ko|CT7IlII4}F9V{3+v zt{F!8Nl2ahDIsXKzHE8GB5`k~zdZ=eDVo;>V8wDZvo1>)VAYqr90u^H?B=H&XU?K$m z(3xaLo|yH=Q;^?g^Y|vF7EyxUs~~Nlt!@OJkCMXnNrT#NXmZ~-8iCcX;)uj$W4R*D zwb#&NbMwKg1c0}Y@y#_V5d7{D-N+`Ol_VVIqcW2@BBPKrR9T+_B->Fq=xeQ@09t8J zKF>^--jf$uZ_IUhc6kLC{pPQ%K4ROYdbRnL#|(|{{B-I#ALW*hfK-ItDgMhDcz2?6 zvbJyC)+iZl#|82oATOSxuX*2(O>VcZ>C_PGGmx0FB=tZON<(QcGusM-58Z)}(l*~w zQ>#`Doj)Wqj71&v5`Uz?PA)&a=A>=(30cWDwRV40^qAak!%jBY}6%_=l4jUCJG_P>P`KSY71jQ`)G3-gbAFC`vwlrKq1baA5qU<-wL^ zhd&I|na97?moYN5635Z-MJGq7V)GyzQW#Ft|M`)YD~pu;iF#+&VdFPnKs>u z@XZlE%K9jWsxrC+(FgiZW-2^*pM3*#^aI?e#}Ky~Ull$o>;;{75zSrnhsr_2AQ%Jb zB}=mxEfu}%J?r3^`AH8h$DazX6LC=0-&i_VXl>kO(;&vtjJ`AvXPI?pVml19m#u}g zm__^C3Y#wsE6ho6EJ&$1SxTVerBjV^qcb{6k4Hp%xXFAmy8{olJ0_^MQI<2VFGXe- zmA9#jrK`oIT)}XL zm1^zw_H)wmx38X0X_6`-;cv?fYvf!t?pPTXc!DAFnMyJg^^mpLr8Ind&u&%L+=~ux zC$=XtsUapCK8oCZuuk#vu^797da}q^k+{8%CQa(5`B4 zd6u1weU`f6>JiVmd&~V}S1?JtM{g(b?st8RlFHhcP(_Fv>SQj*26;<|xT$<0i?>&1 z+&C?Nz;_B%DPOLFF?EHusnBV*d!PRK{XdP_(hEG`dAKes5=D9I`~1A7SbwwN2%sjN z9fMXArWtuQ*DRc(++0$%;4BFxb)>!r(L+x)Um}a`j zfR?l+BVl>YTI(f;&7u_d`0i@wmSdjGjr8CKEq?D&EVV}Bc za5_qYL7v_^fcF{28d2KL?a@N|^tdFXnrm{O;A z*FyvRAMCW@L;33BGw)quCNxVtZA#Hhf^{{v)xW;4BHJk{dnH|0>>^%zR2WvEy5J^l za=fy>eC#`4vAtH$S_R(N$qj^hYC%EHk0)krQ=u#|d`Ct?sH1ZGiDq&0;73ll_;g2~ za_%+Kg_ipAmyyG&jNpxwy4>VClPyIwKRwf2hqE-x{T?!IdXXpkEgSolDz5sfIQ#gC zCwMI}v|Awl@S14)cA3c$B4xm~WQX*tqT=Agg*Oti1A`Az=9#Pv@$snbn#lT0T-c89 z)JysFbA9U@y(e7(7;yvL0y>lH9Xd0~F!Z`{L25rK&>iBImD?C2WOAaGc>X2JpAUAE9nu+TWH z?=@9suwtTYcn)}qounP?6pH_YqSyCk^784OQZi1B(yta1d9z8rs>0La{gYeQLbNo~ zVi_S`u&d9CyV%j@v`*1k2qj-Pv^28D?L#^rMXQnE z_L1LyybO}fnzjatRS0f+TD7o=^&us6Kz7}yEf zang-wyYQ5fl*e%H!mLVkX^}b$%@sT4Nhrl-nA<;F+?pFamB>dejaBEMpw9-+bqu=| zThenrEI9v>0(!vTG>U>RW0?x(@(QC%22Cu^&w1(C#T&quDIP#}qS=z91I*%wyoh*< zkDI%#3wl`XUfBD%H%01ixK5KMR~*j2yWu_(PIc}YEpUY4v`L*ph=u2`x z)r%V=>%Fz=LP5I_?1%`VV-GPaZYeNXG0#G^Yww!2H+X&YEJu|$HWF)6JZ;NBf2|rb zgIIbMX4qb5sL&Q=Yd=s+Kc=JBT?y;jZB#HC?}?bwh;#p>c3CZFfVw;m$^ZBSM zB2xa6+;xVJYEs~%)z=;&}kdHbDuj!Eq}9$X@~XUZ;c(G?VTHPz zG*~#%jKK|hZxJYeX|3fwr_&{s6Ke_x?A^ZF;L9~rEUyT>Xx0_*>K$^xI|d;J;UKh@ zNkbM9#erbP0wE0qb(!%yocP(61F~XTsCSiTtV{hBH7_+%qBXLQJTj~u6*k~RQuNd0gaiN)`g_pZ=2&@#z#T+ zW1Hg&T*in`w!xT`%*Xj4N>O;fqAk(;cJUG64ymVJI|EA2_g+VAg1l4+FZQtHd=n+| zF~`QP2w!b|CkX>P-v)i|4nvEx2kmN-+B;ifZg!*s$6uQ`=UIgO*4CRr ztmjzS+R+^ZUS7ifGeOI&f^QHXKjWhYPx)&SyZzA9E(Vd+A{+B0mSFWkM82gGb_i=I zmhZG=xHsZ!&rXh3$~7;)ao#0FN^bD4U#NGO83A7_3=+R746m?%Sz#C)R=nNz$-LJw zt2F2X7q!`e^eRrkG~2Agf0g3GX=F z1Jh#Ti9Nz&q-DFX*L{DURU74n-9|sF$e9JFm4)HD(UK^aK0K~wd)oqJC!{pd$=YC3S;Q_03OoIp;t9&aIftnPkNFh%Ed$zIx8a+m4 z2C_YnD|{4378IXPQa&?TNz^B$-ykir~Q$+Q7bxt+igUA1#8~(9c!S< zOb~Q$#>SrV{TU#z2FK_rq*eJp2v>m2obRL}h62dm0gwj!f&T8+)$m)S|Eq9(NclL1%~7DX`Df3)5&`3lK82@k~tj znOM4g*tz*T96UboQNy`2S%|UDz&$gfeIVe_u5rua3XpgKQWaqaks`N$-rxZj>Qp^) zg*<7~k_xLeJ-d)op$qw;`&R%zKJVPW?Rn|{o8&wXe7R@wB-i%j+%CCYc0R-> zz{(U}kE{j;eK4|%z(?K4#2`zcMZlof18JJz_DiW!eAEK+b*LX~3bO^ukArCS>R0NK zyaB)Itj_J#C!!L!&b!oEq(XMrrf*MI*SPk%#&RlYYPiCeu9^4!V_m*`_7?hgFJJrG zq8iC^@@xx4oI~%e{*V?D0FoE%J}U!vj9G4><8EkM`P)TbjLUb`?!;s>mt{lgmY=pf z!p7L1p6{+iTSbqTYN1+MI|@_;0RaJlzdM0gd#whyr 60) { + print(paste(all_T[i], round(time.i/60, digits = 1), 'mins')) + } else { + print(paste(all_T[i], round(time.i, digits = 1), 'secs')) + } +} +print(paste('Total time ', round(overall, digits = 3), 'mins' )) +``` + diff --git a/vignettes/seurat5_large_dataset_analysis.nb.html b/vignettes/seurat5_large_dataset_analysis.nb.html new file mode 100644 index 000000000..dbd453ec2 --- /dev/null +++ b/vignettes/seurat5_large_dataset_analysis.nb.html @@ -0,0 +1,423 @@ + + + + + + + + + + + + + +Seurat 5: Large dataset analysis + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    + + + + + + + + + + + + + + + + diff --git a/vignettes/seurat5_sketch_integration.Rmd b/vignettes/seurat5_sketch_integration.Rmd new file mode 100644 index 000000000..c9f8c390c --- /dev/null +++ b/vignettes/seurat5_sketch_integration.Rmd @@ -0,0 +1,131 @@ +--- +title: "Seurat 5: Sketch integration" +output: html_notebook +--- + +```{r install_seurat, warning=FALSE, message=FALSE, eval=FALSE} +remotes::install_github("mojaveazure/seurat-object", ref = "feat/CalN_generic") +remotes::install_github("satijalab/seurat-private", ref = "feat/S5_transferAnchors") +``` + +## load library +```{r, warning=FALSE, message=FALSE} +library(Seurat) +library(BPCells) +library(Azimuth) +``` + +## load data from h5ad +```{r, warning=FALSE, message=FALSE} +t0_CreateObject <- system.time({ + mat <- open_matrix_dir("/brahms/haoy/test/mouse_1M_neurons_counts") + options(Seurat.object.assay.version = "v5", Seurat.object.assay.calcn = T) + obj <- CreateSeuratObject(counts = mat ) +}) +``` + +## create sketch assay +```{r, warning=FALSE, message=FALSE} +t1_CreateSketchAssay <- system.time({ + obj <- NormalizeData(obj) + obj <- FindVariableFeatures(obj, layer = 'counts') + obj <- LeverageScore(obj) + obj <- LeverageScoreSampling(object = obj, ncells = 50000, cast = 'dgCMatrix') +}) +``` +## Sketch assay clustering +```{r, warning=FALSE, message=FALSE} +t2_SketchClustering <- system.time({ + obj <- SCTransform(object = obj) + obj <- RunPCA(obj) + obj <- FindNeighbors(obj, dims = 1:30) + obj <- FindClusters(obj, v) +}) + +obj <- RunUMAP(obj, dims = 1:30, return.model = T, verbose = F) +``` + +```{r} +DimPlot(obj, label = T, reduction = 'umap') + NoLegend() +``` + +```{r} +options(Seurat.object.assay.version = "v3", Seurat.object.assay.calcn = T) +obj.v3 <- CreateSeuratObject(counts = as.sparse(obj[['sketch']]$counts) ) + +obj.v3 <- RunAzimuth(query = obj.v3, assay = 'RNA', reference = 'mousecortexref', do.adt = F) + +obj[['refAssay']] <- obj.v3[['refAssay']] +obj$predicted.subclass <- obj.v3$predicted.subclass +obj$predicted.cluster <- obj.v3$predicted.cluster +obj$predicted.subclass_smooth <- Seurat:::SmoothLabels(labels = obj$predicted.subclass , clusters = obj$SCT_snn_res.0.8 ) +``` + +## Project full cells to PCA from sketch assay +```{r, warning=FALSE, message=FALSE} +t3_ProjectEmbedding <- system.time({ + ref.emb <- ProjectCellEmbeddings(query = obj, + reference = obj, + query.assay = 'RNA', + reference.assay = 'SCT', + normalization.method = 'SCT', + reduction = 'pca') +obj[['pca.orig']] <- CreateDimReducObject(embeddings = ref.emb, assay = 'RNA') +DefaultAssay(obj) <- 'RNA' +}) +``` + +## Transfer labels and umap from sketch to full data +```{r, warning=FALSE, message=FALSE} +t4_transferLabel <- system.time({ + obj <- TransferSketchLabels(object = obj, + atoms = 'sketch', + reduction = 'pca.orig', + dims = 1:30, + refdata = list(cluster_full = 'SCT_snn_res.0.8', + subclass_full ='predicted.subclass'), + reduction.model = 'umap' + ) +}) +``` + + +```{r} +library(ggplot2) +DimPlot(obj, label = T, reduction = 'ref.umap', group.by = 'predicted.cluster_full', alpha = 0.1) + NoLegend() +DimPlot(obj, label = T, reduction = 'ref.umap', group.by = 'predicted.subclass_full', alpha = 0.1) + NoLegend() +``` + +```{r} +all_T <- ls(pattern = '^t') +overall <- sum(sapply(all_T, function(x) round(get(x)['elapsed'], digits = 3)))/60 + + +for (i in 1:length(all_T)) { + T_i <- get(all_T[i])['elapsed'] + if (T_i > 60) { + print(paste(all_T[i], round(T_i/60, digits = 1), 'mins')) + } else { + print(paste(all_T[i], round(T_i, digits = 1), 'secs')) + } +} +print(paste('Total time ', round(overall, digits = 3), 'mins' )) +``` + + +```{r} +obj[['pca.nn']] <- Seurat:::NNHelper(data = obj[['pca.orig']]@cell.embeddings[,1:30], + k = 30, + method = "hnsw", + metric = "cosine", + n_threads = 10) +obj <- RunUMAP(obj, nn.name = "pca.nn", reduction.name = 'umap.orig', reduction.key = 'Uo_') + +``` + +```{r} +DimPlot(obj, label = T, reduction = 'umap.orig', group.by = 'predicted.cluster_full', alpha = 0.1) + NoLegend() + DimPlot(obj, label = T, reduction = 'umap.orig', group.by = 'predicted.subclass_full', alpha = 0.1) + NoLegend() + +#saveRDS(obj, file = "/brahms/haoy/test/mouse_1M_neurons_seurat.rds") +``` diff --git a/vignettes/seurat5_sketch_integration.nb.html b/vignettes/seurat5_sketch_integration.nb.html new file mode 100644 index 000000000..04479b6c9 --- /dev/null +++ b/vignettes/seurat5_sketch_integration.nb.html @@ -0,0 +1,483 @@ + + + + + + + + + + + + + +Seurat 5: Sketch integration + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + + + + + +
    remotes::install_github("mojaveazure/seurat-object", ref = "feat/CalN_generic")
    +remotes::install_github("satijalab/seurat-private", ref = "feat/S5_transferAnchors")
    + + + +
    +

    load library

    + + + +
    library(Seurat)
    +library(BPCells)
    +library(Azimuth)
    + + +
    Registered S3 method overwritten by 'SeuratDisk':
    +  method            from  
    +  as.sparse.H5Group Seurat
    +Attaching shinyBS
    + + + +
    +
    +

    load data from h5ad

    + + + +
    t0_CreateObject <- system.time({
    +  mat <- open_matrix_dir("/brahms/haoy/test/mouse_1M_neurons_counts") 
    +  options(Seurat.object.assay.version = "v5",  Seurat.object.assay.calcn = T)
    +  obj <- CreateSeuratObject(counts = mat )
    +})
    + + + +
    +
    +

    create sketch assay

    + + + +
    t1_CreateSketchAssay <- system.time({
    +  obj <- NormalizeData(obj)
    +  obj <- FindVariableFeatures(obj, layer = 'counts')
    +  obj <- LeverageScore(obj)
    +  obj <- LeverageScoreSampling(object = obj, ncells = 50000, cast = 'dgCMatrix')
    +})
    + + +
    Normalizing layer: counts
    +Finding variable features for layer counts
    + + +
    debugging in: VariableFeatures(object = object, nfeatures = nselect, simplify = TRUE)
    +debug: {
    +    UseMethod(generic = "VariableFeatures", object = object)
    +}
    + + +
    Q
    + + +
    Timing stopped at: 195.3 12.01 2058
    + + + +
    +
    +

    Sketch assay clustering

    + + + +
    t2_SketchClustering <- system.time({
    +  obj <- SCTransform(object = obj)
    +  obj <- RunPCA(obj)
    +  obj <- FindNeighbors(obj, dims = 1:30)
    +  obj <- FindClusters(obj, v)
    +})
    + + +
    Running SCTransform on assay: RNA
    +Running SCTransform on layer: counts
    +Using block 79 from counts to learn model.
    +Error in dim(x) <- length(x) : 
    +  invalid first argument, must be vector (list or atomic)
    + + +
    Timing stopped at: 2.016 0.048 2.061
    + + + + + + + + + + +
    options(Seurat.object.assay.version = "v3",  Seurat.object.assay.calcn = T)
    +obj.v3 <- CreateSeuratObject(counts =  as.sparse(obj[['sketch']]$counts) )
    +
    +obj.v3 <- RunAzimuth(query = obj.v3, assay = 'RNA', reference = 'mousecortexref', do.adt = F) 
    +
    +obj[['refAssay']] <- obj.v3[['refAssay']]
    +obj$predicted.subclass <- obj.v3$predicted.subclass
    +obj$predicted.cluster <- obj.v3$predicted.cluster
    +obj$predicted.subclass_smooth <- Seurat:::SmoothLabels(labels = obj$predicted.subclass , clusters = obj$SCT_snn_res.0.8  )
    + + + +
    +
    +

    Project full cells to PCA from sketch assay

    + + + +
    t3_ProjectEmbedding <- system.time({
    +  ref.emb <- ProjectCellEmbeddings(query = obj,
    +                                   reference = obj,
    +                                   query.assay = 'RNA', 
    +                                   reference.assay = 'SCT',
    +                                   normalization.method = 'SCT',
    +                                                reduction = 'pca')
    +obj[['pca.orig']] <- CreateDimReducObject(embeddings = ref.emb, assay = 'RNA')
    +DefaultAssay(obj) <- 'RNA'
    +})
    + + + +
    +
    +

    Transfer labels and umap from sketch to full data

    + + + +
    t4_transferLabel <- system.time({
    +  obj <- TransferSketchLabels(object = obj,
    +                            atoms = 'sketch',
    +                            reduction = 'pca.orig',
    +                            dims = 1:30,
    +                            refdata = list(cluster_full = 'SCT_snn_res.0.8',
    +                                           subclass_full ='predicted.subclass'),
    +                            reduction.model = 'umap'
    +                            )
    +})
    + + + + + + +
    library(ggplot2)
    +DimPlot(obj, label = T, reduction = 'ref.umap', group.by = 'predicted.cluster_full', alpha = 0.1) + NoLegend()  
    +DimPlot(obj, label = T, reduction = 'ref.umap', group.by = 'predicted.subclass_full', alpha = 0.1) + NoLegend()  
    + + + + + + +
    all_T  <- ls(pattern = '^t')
    +overall <- sum(sapply(all_T, function(x) round(get(x)['elapsed'], digits = 3)))/60
    +
    +
    +for (i in 1:length(all_T)) {
    +  T_i <- get(all_T[i])['elapsed']
    +  if (T_i > 60) {
    +     print(paste(all_T[i], round(T_i/60, digits = 1), 'mins'))
    +  } else {
    +     print(paste(all_T[i], round(T_i, digits = 1), 'secs'))
    +  }
    +}
    +print(paste('Total time ', round(overall, digits = 3), 'mins' ))
    + + + + + + +
    obj[['pca.nn']] <- Seurat:::NNHelper(data = obj[['pca.orig']]@cell.embeddings[,1:30], 
    +                            k = 30, 
    +                            method = "hnsw", 
    +                            metric = "cosine", 
    +                            n_threads = 10)
    +obj <- RunUMAP(obj, nn.name = "pca.nn", reduction.name = 'umap.orig', reduction.key = 'Uo_')
    +
    + + + + + + +
    DimPlot(obj, label = T, reduction = 'umap.orig', group.by = 'predicted.cluster_full', alpha = 0.1) + NoLegend()  
    +  DimPlot(obj, label = T, reduction = 'umap.orig', group.by = 'predicted.subclass_full', alpha = 0.1) + NoLegend()  
    + 
    +#saveRDS(obj, file = "/brahms/haoy/test/mouse_1M_neurons_seurat.rds")
    + + +
    + +
    LS0tCnRpdGxlOiAiU2V1cmF0IDU6IFNrZXRjaCBpbnRlZ3JhdGlvbiIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3IgaW5zdGFsbF9zZXVyYXQsIHdhcm5pbmc9RkFMU0UsIG1lc3NhZ2U9RkFMU0UsIGV2YWw9RkFMU0V9CnJlbW90ZXM6Omluc3RhbGxfZ2l0aHViKCJtb2phdmVhenVyZS9zZXVyYXQtb2JqZWN0IiwgcmVmID0gImZlYXQvQ2FsTl9nZW5lcmljIikKcmVtb3Rlczo6aW5zdGFsbF9naXRodWIoInNhdGlqYWxhYi9zZXVyYXQtcHJpdmF0ZSIsIHJlZiA9ICJmZWF0L1M1X3RyYW5zZmVyQW5jaG9ycyIpCmBgYAogCiMjIGxvYWQgbGlicmFyeQpgYGB7ciwgd2FybmluZz1GQUxTRSwgbWVzc2FnZT1GQUxTRX0KbGlicmFyeShTZXVyYXQpCmxpYnJhcnkoQlBDZWxscykKbGlicmFyeShBemltdXRoKQpgYGAKIAojIyBsb2FkIGRhdGEgZnJvbSBoNWFkIApgYGB7ciwgd2FybmluZz1GQUxTRSwgbWVzc2FnZT1GQUxTRX0KdDBfQ3JlYXRlT2JqZWN0IDwtIHN5c3RlbS50aW1lKHsKICBtYXQgPC0gb3Blbl9tYXRyaXhfZGlyKCIvYnJhaG1zL2hhb3kvdGVzdC9tb3VzZV8xTV9uZXVyb25zX2NvdW50cyIpIAogIG9wdGlvbnMoU2V1cmF0Lm9iamVjdC5hc3NheS52ZXJzaW9uID0gInY1IiwgIFNldXJhdC5vYmplY3QuYXNzYXkuY2FsY24gPSBUKQogIG9iaiA8LSBDcmVhdGVTZXVyYXRPYmplY3QoY291bnRzID0gbWF0ICkKfSkKYGBgCgojIyBjcmVhdGUgc2tldGNoIGFzc2F5CmBgYHtyLCB3YXJuaW5nPUZBTFNFLCBtZXNzYWdlPUZBTFNFfQp0MV9DcmVhdGVTa2V0Y2hBc3NheSA8LSBzeXN0ZW0udGltZSh7CiAgb2JqIDwtIE5vcm1hbGl6ZURhdGEob2JqKQogIG9iaiA8LSBGaW5kVmFyaWFibGVGZWF0dXJlcyhvYmosIGxheWVyID0gJ2NvdW50cycpCiAgb2JqIDwtIExldmVyYWdlU2NvcmUob2JqKQogIG9iaiA8LSBMZXZlcmFnZVNjb3JlU2FtcGxpbmcob2JqZWN0ID0gb2JqLCBuY2VsbHMgPSA1MDAwMCwgY2FzdCA9ICdkZ0NNYXRyaXgnKQp9KQpgYGAKIyMgU2tldGNoIGFzc2F5IGNsdXN0ZXJpbmcKYGBge3IsIHdhcm5pbmc9RkFMU0UsIG1lc3NhZ2U9RkFMU0V9CnQyX1NrZXRjaENsdXN0ZXJpbmcgPC0gc3lzdGVtLnRpbWUoewogIG9iaiA8LSBTQ1RyYW5zZm9ybShvYmplY3QgPSBvYmopCiAgb2JqIDwtIFJ1blBDQShvYmopCiAgb2JqIDwtIEZpbmROZWlnaGJvcnMob2JqLCBkaW1zID0gMTozMCkKICBvYmogPC0gRmluZENsdXN0ZXJzKG9iaiwgdikKfSkKCm9iaiA8LSBSdW5VTUFQKG9iaiwgZGltcyA9IDE6MzAsIHJldHVybi5tb2RlbCA9IFQsIHZlcmJvc2UgPSBGKQpgYGAKCmBgYHtyfQpEaW1QbG90KG9iaiwgbGFiZWwgPSBULCByZWR1Y3Rpb24gPSAndW1hcCcpICsgTm9MZWdlbmQoKSAKYGBgCgpgYGB7cn0Kb3B0aW9ucyhTZXVyYXQub2JqZWN0LmFzc2F5LnZlcnNpb24gPSAidjMiLCAgU2V1cmF0Lm9iamVjdC5hc3NheS5jYWxjbiA9IFQpCm9iai52MyA8LSBDcmVhdGVTZXVyYXRPYmplY3QoY291bnRzID0gIGFzLnNwYXJzZShvYmpbWydza2V0Y2gnXV0kY291bnRzKSApCgpvYmoudjMgPC0gUnVuQXppbXV0aChxdWVyeSA9IG9iai52MywgYXNzYXkgPSAnUk5BJywgcmVmZXJlbmNlID0gJ21vdXNlY29ydGV4cmVmJywgZG8uYWR0ID0gRikgCgpvYmpbWydyZWZBc3NheSddXSA8LSBvYmoudjNbWydyZWZBc3NheSddXQpvYmokcHJlZGljdGVkLnN1YmNsYXNzIDwtIG9iai52MyRwcmVkaWN0ZWQuc3ViY2xhc3MKb2JqJHByZWRpY3RlZC5jbHVzdGVyIDwtIG9iai52MyRwcmVkaWN0ZWQuY2x1c3RlcgpvYmokcHJlZGljdGVkLnN1YmNsYXNzX3Ntb290aCA8LSBTZXVyYXQ6OjpTbW9vdGhMYWJlbHMobGFiZWxzID0gb2JqJHByZWRpY3RlZC5zdWJjbGFzcyAsIGNsdXN0ZXJzID0gb2JqJFNDVF9zbm5fcmVzLjAuOCAgKQpgYGAKCiMjIFByb2plY3QgZnVsbCBjZWxscyB0byBQQ0EgZnJvbSBza2V0Y2ggYXNzYXkgCmBgYHtyLCB3YXJuaW5nPUZBTFNFLCBtZXNzYWdlPUZBTFNFfQp0M19Qcm9qZWN0RW1iZWRkaW5nIDwtIHN5c3RlbS50aW1lKHsKICByZWYuZW1iIDwtIFByb2plY3RDZWxsRW1iZWRkaW5ncyhxdWVyeSA9IG9iaiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICByZWZlcmVuY2UgPSBvYmosCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgcXVlcnkuYXNzYXkgPSAnUk5BJywgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgcmVmZXJlbmNlLmFzc2F5ID0gJ1NDVCcsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbm9ybWFsaXphdGlvbi5tZXRob2QgPSAnU0NUJywKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgcmVkdWN0aW9uID0gJ3BjYScpCm9ialtbJ3BjYS5vcmlnJ11dIDwtIENyZWF0ZURpbVJlZHVjT2JqZWN0KGVtYmVkZGluZ3MgPSByZWYuZW1iLCBhc3NheSA9ICdSTkEnKQpEZWZhdWx0QXNzYXkob2JqKSA8LSAnUk5BJwp9KQpgYGAgCgojIyBUcmFuc2ZlciBsYWJlbHMgYW5kIHVtYXAgZnJvbSBza2V0Y2ggdG8gZnVsbCBkYXRhCmBgYHtyLCB3YXJuaW5nPUZBTFNFLCBtZXNzYWdlPUZBTFNFfQp0NF90cmFuc2ZlckxhYmVsIDwtIHN5c3RlbS50aW1lKHsKICBvYmogPC0gVHJhbnNmZXJTa2V0Y2hMYWJlbHMob2JqZWN0ID0gb2JqLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgYXRvbXMgPSAnc2tldGNoJywKICAgICAgICAgICAgICAgICAgICAgICAgICAgIHJlZHVjdGlvbiA9ICdwY2Eub3JpZycsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICBkaW1zID0gMTozMCwKICAgICAgICAgICAgICAgICAgICAgICAgICAgIHJlZmRhdGEgPSBsaXN0KGNsdXN0ZXJfZnVsbCA9ICdTQ1Rfc25uX3Jlcy4wLjgnLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgc3ViY2xhc3NfZnVsbCA9J3ByZWRpY3RlZC5zdWJjbGFzcycpLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgcmVkdWN0aW9uLm1vZGVsID0gJ3VtYXAnCiAgICAgICAgICAgICAgICAgICAgICAgICAgICApCn0pCmBgYAoKCmBgYHtyfQpsaWJyYXJ5KGdncGxvdDIpCkRpbVBsb3Qob2JqLCBsYWJlbCA9IFQsIHJlZHVjdGlvbiA9ICdyZWYudW1hcCcsIGdyb3VwLmJ5ID0gJ3ByZWRpY3RlZC5jbHVzdGVyX2Z1bGwnLCBhbHBoYSA9IDAuMSkgKyBOb0xlZ2VuZCgpICAKRGltUGxvdChvYmosIGxhYmVsID0gVCwgcmVkdWN0aW9uID0gJ3JlZi51bWFwJywgZ3JvdXAuYnkgPSAncHJlZGljdGVkLnN1YmNsYXNzX2Z1bGwnLCBhbHBoYSA9IDAuMSkgKyBOb0xlZ2VuZCgpICAKYGBgCgpgYGB7cn0KYWxsX1QgIDwtIGxzKHBhdHRlcm4gPSAnXnQnKQpvdmVyYWxsIDwtIHN1bShzYXBwbHkoYWxsX1QsIGZ1bmN0aW9uKHgpIHJvdW5kKGdldCh4KVsnZWxhcHNlZCddLCBkaWdpdHMgPSAzKSkpLzYwCgoKZm9yIChpIGluIDE6bGVuZ3RoKGFsbF9UKSkgewogIFRfaSA8LSBnZXQoYWxsX1RbaV0pWydlbGFwc2VkJ10KICBpZiAoVF9pID4gNjApIHsKICAgICBwcmludChwYXN0ZShhbGxfVFtpXSwgcm91bmQoVF9pLzYwLCBkaWdpdHMgPSAxKSwgJ21pbnMnKSkKICB9IGVsc2UgewogICAgIHByaW50KHBhc3RlKGFsbF9UW2ldLCByb3VuZChUX2ksIGRpZ2l0cyA9IDEpLCAnc2VjcycpKQogIH0KfQpwcmludChwYXN0ZSgnVG90YWwgdGltZSAnLCByb3VuZChvdmVyYWxsLCBkaWdpdHMgPSAzKSwgJ21pbnMnICkpCmBgYAoKCmBgYHtyfQpvYmpbWydwY2Eubm4nXV0gPC0gU2V1cmF0Ojo6Tk5IZWxwZXIoZGF0YSA9IG9ialtbJ3BjYS5vcmlnJ11dQGNlbGwuZW1iZWRkaW5nc1ssMTozMF0sIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgayA9IDMwLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgIG1ldGhvZCA9ICJobnN3IiwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICBtZXRyaWMgPSAiY29zaW5lIiwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICBuX3RocmVhZHMgPSAxMCkKb2JqIDwtIFJ1blVNQVAob2JqLCBubi5uYW1lID0gInBjYS5ubiIsIHJlZHVjdGlvbi5uYW1lID0gJ3VtYXAub3JpZycsIHJlZHVjdGlvbi5rZXkgPSAnVW9fJykKCmBgYAoKYGBge3J9CkRpbVBsb3Qob2JqLCBsYWJlbCA9IFQsIHJlZHVjdGlvbiA9ICd1bWFwLm9yaWcnLCBncm91cC5ieSA9ICdwcmVkaWN0ZWQuY2x1c3Rlcl9mdWxsJywgYWxwaGEgPSAwLjEpICsgTm9MZWdlbmQoKSAgCiAgRGltUGxvdChvYmosIGxhYmVsID0gVCwgcmVkdWN0aW9uID0gJ3VtYXAub3JpZycsIGdyb3VwLmJ5ID0gJ3ByZWRpY3RlZC5zdWJjbGFzc19mdWxsJywgYWxwaGEgPSAwLjEpICsgTm9MZWdlbmQoKSAgCiAKI3NhdmVSRFMob2JqLCBmaWxlID0gIi9icmFobXMvaGFveS90ZXN0L21vdXNlXzFNX25ldXJvbnNfc2V1cmF0LnJkcyIpCmBgYAo=
    + + + +
    + + + + + + + + + + + + + + + + diff --git a/vignettes/seurat5_spatial_vignette.Rmd b/vignettes/seurat5_spatial_vignette.Rmd index 9bbabf874..4328ffe84 100644 --- a/vignettes/seurat5_spatial_vignette.Rmd +++ b/vignettes/seurat5_spatial_vignette.Rmd @@ -237,12 +237,12 @@ An alternative approach, implemented in `FindSpatiallyVariables()`, is to search We note that there are multiple methods in the literature to accomplish this task, including [SpatialDE](https://www.nature.com/articles/nmeth.4636), and [Splotch](https://www.biorxiv.org/content/10.1101/757096v1.article-metrics). We encourage interested users to explore these methods, and hope to add support for them in the near future. ```{r spatial.vf} -brain <- FindSpatiallyVariableFeatures(brain, assay = 'SCT', features = VariableFeatures(brain)[1:1000], selection.method = 'markvariogram') +brain <- FindSpatiallyVariableFeatures(brain, assay = 'SCT', features = VariableFeatures(brain)[1:1000], selection.method = 'moransi') ``` Now we visualize the expression of the top 6 features identified by this measure. ```{r spatial.vf.plot, fig.height=8} -top.features <- head(SpatiallyVariableFeatures(brain, selection.method = 'markvariogram'),6) +top.features <- head(SpatiallyVariableFeatures(brain, selection.method = 'moransi'),6) SpatialFeaturePlot(brain, features = top.features, ncol = 3, alpha = c(0.1, 1)) ``` @@ -307,7 +307,7 @@ SpatialFeaturePlot(cortex, features = c("L2/3 IT", "L4"), pt.size.factor = 1.6, Based on these prediction scores, we can also predict *cell types* whose location is spatially restricted. We use the same methods based on marked point processes to define spatially variable features, but use the cell type prediction scores as the "marks" rather than gene expression. ```{r sc.data8, fig.height = 10} -cortex <- FindSpatiallyVariableFeatures(cortex, assay = "predictions", selection.method = "markvariogram", features = rownames(cortex), r.metric = 5, slot = "data") +cortex <- FindSpatiallyVariableFeatures(cortex, assay = "predictions", selection.method = "moransi", features = rownames(cortex), r.metric = 5, slot = "data") top.clusters <- head(SpatiallyVariableFeatures(cortex), 4) SpatialPlot(object = cortex, features = top.clusters, ncol = 2) ``` @@ -462,6 +462,47 @@ Now we visualize the expression of the top 6 features identified by Moran's I. SpatialFeaturePlot(slide.seq, features = head(SpatiallyVariableFeatures(slide.seq, selection.method = "moransi"), 6), ncol = 3, alpha = c(0.1, 1), max.cutoff = "q95") ``` +Install RCTD + +```{r, eval=FALSE} +devtools::install_github("dmcable/spacexr", build_vignettes = FALSE) +``` + +Annotation using RCTD + +```{r rctd} +library(spacexr) + +# allen cortex reference +allen.cortex <- readRDS("/brahms/hartmana/seurat_site_builder/seurat5/seurat-private/data/allen_cortex.rds") +Idents(allen.cortex) <- "subclass" + +# need to drop the cell types which there are <25 of +drop.cells <- WhichCells(allen.cortex, idents = "CR") +allen.cortex <- subset(allen.cortex, cells = drop.cells, invert = TRUE) + +counts <- LayerData(allen.cortex, layer = "counts") +cluster <- as.factor(allen.cortex$subclass) +levels(cluster)[3] <- "L2 3 IT" +names(cluster) <- colnames(allen.cortex) +nUMI <- allen.cortex$nCount_RNA +names(nUMI) <- colnames(allen.cortex) +reference <- Reference(counts, cluster, nUMI) + +counts <- LayerData(cortex, layer = "counts") +coords <- GetTissueCoordinates(cortex) +colnames(coords) <- c("x", "y") +puck <- SpatialRNA(coords, counts, colSums(counts)) + +RCTD <- create.RCTD(puck, reference, max_cores = 1) +RCTD <- run.RCTD(RCTD, doublet_mode = 'doublet') + +cortex <- AddMetaData(cortex, metadata = myRCTD@results$results_df) +p1 <- SpatialDimPlot(cortex, group.by = "first_type") +p2 <- SpatialDimPlot(cortex, group.by = "second_type") +p1 + p2 +``` + ```{r save.times, include=TRUE} write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/seurat5_spatial_vignette_times.csv") ``` diff --git a/vignettes/spatial_vignette.Rmd b/vignettes/spatial_vignette.Rmd index cad2ef62e..a47c24c73 100644 --- a/vignettes/spatial_vignette.Rmd +++ b/vignettes/spatial_vignette.Rmd @@ -27,7 +27,8 @@ knitr::opts_chunk$set( message = FALSE, warning = FALSE, fig.width = 10, - time_it = TRUE + time_it = TRUE, + error = TRUE ) ``` @@ -233,12 +234,12 @@ An alternative approach, implemented in `FindSpatiallyVariables()`, is to search We note that there are multiple methods in the literature to accomplish this task, including [SpatialDE](https://www.nature.com/articles/nmeth.4636), and [Splotch](https://www.biorxiv.org/content/10.1101/757096v1.article-metrics). We encourage interested users to explore these methods, and hope to add support for them in the near future. ```{r spatial.vf} -brain <- FindSpatiallyVariableFeatures(brain, assay = 'SCT', features = VariableFeatures(brain)[1:1000], selection.method = 'markvariogram') +brain <- FindSpatiallyVariableFeatures(brain, assay = 'SCT', features = VariableFeatures(brain)[1:1000], selection.method = 'moransi') ``` Now we visualize the expression of the top 6 features identified by this measure. ```{r spatial.vf.plot, fig.height=8} -top.features <- head(SpatiallyVariableFeatures(brain, selection.method = 'markvariogram'),6) +top.features <- head(SpatiallyVariableFeatures(brain, selection.method = 'moransi'),6) SpatialFeaturePlot(brain, features = top.features, ncol = 3, alpha = c(0.1, 1)) ``` @@ -302,7 +303,7 @@ SpatialFeaturePlot(cortex, features = c("L2/3 IT", "L4"), pt.size.factor = 1.6, Based on these prediction scores, we can also predict *cell types* whose location is spatially restricted. We use the same methods based on marked point processes to define spatially variable features, but use the cell type prediction scores as the "marks" rather than gene expression. ```{r sc.data8, fig.height = 10} -cortex <- FindSpatiallyVariableFeatures(cortex, assay = "predictions", selection.method = "markvariogram", features = rownames(cortex), r.metric = 5, slot = "data") +cortex <- FindSpatiallyVariableFeatures(cortex, assay = "predictions", selection.method = "moransi", features = rownames(cortex), r.metric = 5, slot = "data") top.clusters <- head(SpatiallyVariableFeatures(cortex), 4) SpatialPlot(object = cortex, features = top.clusters, ncol = 2) ``` @@ -452,6 +453,47 @@ Now we visualize the expression of the top 6 features identified by Moran's I. SpatialFeaturePlot(slide.seq, features = head(SpatiallyVariableFeatures(slide.seq, selection.method = "moransi"), 6), ncol = 3, alpha = c(0.1, 1), max.cutoff = "q95") ``` +Install RCTD + +```{r, eval=FALSE} +devtools::install_github("dmcable/spacexr", build_vignettes = FALSE) +``` + +Annotation using RCTD + +```{r rctd} +library(spacexr) + +# allen cortex reference +allen.cortex <- readRDS("/brahms/hartmana/seurat_site_builder/seurat5/seurat-private/data/allen_cortex.rds") +Idents(allen.cortex) <- "subclass" + +# need to drop the cell types which there are <25 of +drop.cells <- WhichCells(allen.cortex, idents = "CR") +allen.cortex <- subset(allen.cortex, cells = drop.cells, invert = TRUE) + +counts <- LayerData(allen.cortex, layer = "counts") +cluster <- as.factor(allen.cortex$subclass) +levels(cluster)[3] <- "L2 3 IT" +names(cluster) <- colnames(allen.cortex) +nUMI <- allen.cortex$nCount_RNA +names(nUMI) <- colnames(allen.cortex) +reference <- Reference(counts, cluster, nUMI) + +counts <- LayerData(cortex, layer = "counts") +coords <- GetTissueCoordinates(cortex) +colnames(coords) <- c("x", "y") +puck <- SpatialRNA(coords, counts, colSums(counts)) + +RCTD <- create.RCTD(puck, reference, max_cores = 1) +RCTD <- run.RCTD(RCTD, doublet_mode = 'doublet') + +cortex <- AddMetaData(cortex, metadata = myRCTD@results$results_df) +p1 <- SpatialDimPlot(cortex, group.by = "first_type") +p2 <- SpatialDimPlot(cortex, group.by = "second_type") +p1 + p2 +``` + ```{r save.times, include = FALSE} write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/spatial_vignette_times.csv") ``` diff --git a/vignettes/spatial_vignette_2.Rmd b/vignettes/spatial_vignette_2.Rmd index 86779b7d3..90bbee2a3 100644 --- a/vignettes/spatial_vignette_2.Rmd +++ b/vignettes/spatial_vignette_2.Rmd @@ -26,7 +26,8 @@ knitr::opts_chunk$set( tidy.opts = list(width.cutoff = 95), message = FALSE, warning = FALSE, - time_it = TRUE + time_it = TRUE, + error = TRUE ) ``` diff --git a/vignettes/vignettes_v5.yaml b/vignettes/vignettes_v5.yaml new file mode 100644 index 000000000..8b46acf4f --- /dev/null +++ b/vignettes/vignettes_v5.yaml @@ -0,0 +1,79 @@ +- category: Introduction to v5 + vignettes: + - title: Guided tutorial --- 2,700 PBMCs + name: seurat5_pbmc3k_tutorial + summary: | + A basic overview of Seurat that includes an introduction to common analytical workflows. + image: pbmc3k_umap.jpg + + - title: Multimodal analysis + name: seurat5_multimodal_vignette + summary: | + An introduction to working with multi-modal datasets in Seurat. + image: citeseq_plot.jpg + + - title: Analysis of spatial datasets (Sequencing-based) + name: seurat5_spatial_vignette + summary: | + Learn to explore spatially-resolved transcriptomic data with examples from 10x Visium and Slide-seq v2. + image: spatial_vignette_ttr.jpg + + - title: Analysis of spatial datasets (Imaging-based) + name: seurat5_spatial_vignette_2 + summary: | + Learn to explore spatially-resolved data from multiplexed imaging technologies, including MERFISH, Nanostring SMI, and CODEX. + image: spatial_vignette_2.png + + - title: SCTransform + name: seurat5_sctransform_vignette + summary: | + Examples of how to use the SCTransform wrapper in Seurat. + image: assets/sctransform.png + + - title: Visualization + name: seurat5_visualization_vignette + summary: | + An overview of the major visualization functionality within Seurat. + image: visualization_vignette.jpg + + - title: Cell Cycle Regression + name: seurat5_cell_cycle_vignette + summary: | + Mitigate the effects of cell cycle heterogeneity by computing cell cycle phase scores based on marker genes. + image: cell_cycle_vignette.jpg + + - title: BPCells Sketch Clustering (Log) + name: BPCells_sketch_clustering_mouse_brain.nb + summary: | + Analyze a large mouse brain dataset using the on-disk capabilities introduced in Seurat5. + image: BPCells_sketch_clustering_mouse_brain.png + + - title: BPCells Sketch Clustering (SCTransform) + name: BPCells_sketch_clustering_mouse_brain_SCT.nb + summary: | + Analyze a large mouse brain dataset using the on-disk capabilities introduced in Seurat5. + image: BPCells_sketch_clustering_mouse_brain.png + + - title: BPCells Sketch integration (Log) + name: BPCells_sketch_integration_1M.nb + summary: | + Perform sketch integration on a large dataset from Parse Biosciences. + image: BPCells_sketch_inte_1M.png + + - title: BPCells Sketch integration (SCTransform) + name: BPCells_sketch_integration_1M_SCT.nb + summary: | + Perform sketch integration on a large dataset from Parse Biosciences. + image: BPCells_sketch_inte_1M.png + + - title: Chunked mapping (Log) + name: BPCells_COVID_logMapping.nb + summary: | + Iteratively map large COVID datasets onto a reference for cell type annotation. + image: BPCells_COVID.png + + - title: Chunked mapping (SCTransform) + name: BPCells_COVID_SCTMapping.nb + summary: | + Iteratively map large COVID datasets onto a reference for cell type annotation. + image: BPCells_COVID.png diff --git a/vignettes/weighted_nearest_neighbor_analysis.Rmd b/vignettes/weighted_nearest_neighbor_analysis.Rmd index 5566c05ab..68e155d1f 100644 --- a/vignettes/weighted_nearest_neighbor_analysis.Rmd +++ b/vignettes/weighted_nearest_neighbor_analysis.Rmd @@ -23,7 +23,8 @@ knitr::knit_hooks$set(time_it = local({ knitr::opts_chunk$set( message = FALSE, warning = FALSE, - time_it = TRUE + time_it = TRUE, + error = TRUE ) ``` The simultaneous measurement of multiple modalities, known as multimodal analysis, represents an exciting frontier for single-cell genomics and necessitates new computational methods that can define cellular states based on multiple data types. The varying information content of each modality, even across cells in the same dataset, represents a pressing challenge for the analysis and integration of multimodal datasets. In ([Hao\*, Hao\* et al, Cell 2021](https://doi.org/10.1016/j.cell.2021.04.048)), we introduce 'weighted-nearest neighbor' (WNN) analysis, an unsupervised framework to learn the relative utility of each data type in each cell, enabling an integrative analysis of multiple modalities. From 481ed6266b7caedb17e66d0f7a85d10f7ddeb343 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Mon, 23 Jan 2023 17:00:53 -0500 Subject: [PATCH 396/979] changes to query and reference assays --- R/integration.R | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/R/integration.R b/R/integration.R index b58d98f7f..91f22568b 100644 --- a/R/integration.R +++ b/R/integration.R @@ -838,8 +838,12 @@ FindTransferAnchors <- function( } } # make new query assay w same name as reference assay - suppressWarnings(expr = query[[reference.assay]] <- query[[query.assay]]) - DefaultAssay(query) <- reference.assay + if (reference.assay %in% Assays(query)) { + DefaultAssay(query) <- reference.assay + } else { + suppressWarnings(expr = query[[reference.assay]] <- query[[query.assay]]) + DefaultAssay(query) <- reference.assay + } # only keep necessary info from objects query <- DietSeurat( object = query, @@ -2163,6 +2167,12 @@ MapQuery <- function( verbose = TRUE ) { transfer.reduction <- slot(object = anchorset, name = "command")$reduction + if (DefaultAssay(anchorset@object.list[[1]]) %in% Assays(reference)) { + DefaultAssay(reference) <- DefaultAssay(anchorset@object.list[[1]]) + } else { + stop('The assay used to create the anchorset does not match any', + 'of the assays in the reference object.') + } # determine anchor type if (grepl(pattern = "pca", x = transfer.reduction)) { anchor.reduction <- "pcaproject" @@ -6001,7 +6011,7 @@ ValidateParams_FindTransferAnchors <- function( query.assay.check <- query.assay reference.assay.check <- reference.assay ref.features <- rownames(x = GetAssayData(object = reference[[reference.assay.check]], slot = feature.slot)) - query.features <- rownames(x = GetAssayData(object = query[[query.assay.check]], slot = feature.slot)) + query.features <- rownames(x = query) if (normalization.method == "SCT") { query.model.features <- rownames(x = Misc(object = query[[query.assay]])$vst.out$gene_attr) query.features <- unique(c(query.features, query.model.features)) From a6c8444a23304a2f59e5961b0886bf06dfdbd210 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 24 Jan 2023 10:58:41 -0500 Subject: [PATCH 397/979] edit rownames --- R/integration.R | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/R/integration.R b/R/integration.R index fd428d5ef..681c5e72e 100644 --- a/R/integration.R +++ b/R/integration.R @@ -840,9 +840,7 @@ FindTransferAnchors <- function( } } # make new query assay w same name as reference assay - if (reference.assay %in% Assays(query)) { - DefaultAssay(query) <- reference.assay - } else { + if (query.assay != reference.assay) { suppressWarnings(expr = query[[reference.assay]] <- query[[query.assay]]) DefaultAssay(query) <- reference.assay } @@ -6031,11 +6029,10 @@ ValidateParams_FindTransferAnchors <- function( call. = FALSE) } # features must be in both reference and query - feature.slot <- 'data' query.assay.check <- query.assay reference.assay.check <- reference.assay - ref.features <- rownames(x = GetAssayData(object = reference[[reference.assay.check]], slot = feature.slot)) - query.features <- rownames(x = query) + ref.features <- rownames(x = reference[[reference.assay.check]]) + query.features <- rownames(x = query[[query.assay.check]]) if (normalization.method == "SCT") { query.model.features <- rownames(x = Misc(object = query[[query.assay]])$vst.out$gene_attr) query.features <- unique(c(query.features, query.model.features)) From 67bbf164f9284a8b0f3c647b162d74abcd6cd43d Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 24 Jan 2023 11:06:45 -0500 Subject: [PATCH 398/979] bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 779cc830e..3fe2d0af8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9021 -Date: 2023-01-23 +Version: 4.9.9.9022 +Date: 2023-01-24 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From 65ada9ee4842b9e9e8d1386a1d36c72478f45a3a Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Tue, 24 Jan 2023 15:48:26 -0500 Subject: [PATCH 399/979] update assay arg to len 2 vector --- DESCRIPTION | 4 ++-- R/integration.R | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index a68af7c8f..37875d065 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9020 -Date: 2023-01-12 +Version: 4.9.9.9023 +Date: 2023-01-24 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( diff --git a/R/integration.R b/R/integration.R index 9b231ce62..14e981b12 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1140,7 +1140,7 @@ FindTransferAnchors <- function( } anchors <- FindAnchors( object.pair = combined.ob, - assay = reference.assay, + assay = c(reference.assay, reference.assay), slot = "data", cells1 = colnames(x = reference), cells2 = colnames(x = query), @@ -6434,7 +6434,7 @@ FindAssayAnchor <- function( message("Finding ", anchor.type," anchors from assay ", assay) } anchors <- FindAnchors(object.pair = object.both, - assay = DefaultAssay(object.both), + assay = c(DefaultAssay(object.both), DefaultAssay(object.both)), slot = 'data', cells1 = colnames(object.list[[1]]), cells2 = colnames(object.list[[2]]), From 5e8865b9bf00095176f6c3a5385dc0b1b738d718 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Wed, 25 Jan 2023 11:06:34 -0500 Subject: [PATCH 400/979] update seurat object before conversion --- vignettes/conversion_vignette.Rmd | 1 + 1 file changed, 1 insertion(+) diff --git a/vignettes/conversion_vignette.Rmd b/vignettes/conversion_vignette.Rmd index 7b9af30c9..f45965cbd 100644 --- a/vignettes/conversion_vignette.Rmd +++ b/vignettes/conversion_vignette.Rmd @@ -57,6 +57,7 @@ library(patchwork) # Use PBMC3K from SeuratData InstallData("pbmc3k") pbmc <- LoadData(ds = "pbmc3k", type = "pbmc3k.final") +pbmc <- UpdateSeuratObject(pbmc) pbmc.sce <- as.SingleCellExperiment(pbmc) p1 <- plotExpression(pbmc.sce, features = 'MS4A1', x = 'ident') + theme(axis.text.x = element_text(angle = 45, hjust = 1)) p2 <- plotPCA(pbmc.sce, colour_by = 'ident') From adb98c586c044a67e438bac89eaf2b6a672e02ee Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Wed, 25 Jan 2023 11:15:02 -0500 Subject: [PATCH 401/979] fix bpcell vignette names --- vignettes/vignettes_v5.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/vignettes/vignettes_v5.yaml b/vignettes/vignettes_v5.yaml index 8b46acf4f..67e1bbccc 100644 --- a/vignettes/vignettes_v5.yaml +++ b/vignettes/vignettes_v5.yaml @@ -43,37 +43,37 @@ image: cell_cycle_vignette.jpg - title: BPCells Sketch Clustering (Log) - name: BPCells_sketch_clustering_mouse_brain.nb + name: BPCells_sketch_clustering_mouse_brain summary: | Analyze a large mouse brain dataset using the on-disk capabilities introduced in Seurat5. image: BPCells_sketch_clustering_mouse_brain.png - title: BPCells Sketch Clustering (SCTransform) - name: BPCells_sketch_clustering_mouse_brain_SCT.nb + name: BPCells_sketch_clustering_mouse_brain_SCT summary: | Analyze a large mouse brain dataset using the on-disk capabilities introduced in Seurat5. image: BPCells_sketch_clustering_mouse_brain.png - title: BPCells Sketch integration (Log) - name: BPCells_sketch_integration_1M.nb + name: BPCells_sketch_integration_1M summary: | Perform sketch integration on a large dataset from Parse Biosciences. image: BPCells_sketch_inte_1M.png - title: BPCells Sketch integration (SCTransform) - name: BPCells_sketch_integration_1M_SCT.nb + name: BPCells_sketch_integration_1M_SCT summary: | Perform sketch integration on a large dataset from Parse Biosciences. image: BPCells_sketch_inte_1M.png - title: Chunked mapping (Log) - name: BPCells_COVID_logMapping.nb + name: BPCells_COVID_logMapping summary: | Iteratively map large COVID datasets onto a reference for cell type annotation. image: BPCells_COVID.png - title: Chunked mapping (SCTransform) - name: BPCells_COVID_SCTMapping.nb + name: BPCells_COVID_SCTMapping summary: | Iteratively map large COVID datasets onto a reference for cell type annotation. image: BPCells_COVID.png From b48ba721e6740a962cd71872838b7b120eb819fa Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Wed, 25 Jan 2023 11:33:22 -0500 Subject: [PATCH 402/979] sync spatial vignettes --- vignettes/spatial_vignette.Rmd | 5 +++-- vignettes/spatial_vignette_2.Rmd | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/vignettes/spatial_vignette.Rmd b/vignettes/spatial_vignette.Rmd index a47c24c73..6d082871c 100644 --- a/vignettes/spatial_vignette.Rmd +++ b/vignettes/spatial_vignette.Rmd @@ -399,8 +399,8 @@ slide.seq <- FindClusters(slide.seq, resolution = 0.3, verbose = FALSE) We can then visualize the results of the clustering either in UMAP space (with `DimPlot()`) or in the bead coordinate space with `SpatialDimPlot()`. ```{r dim.plots.ss,fig.height=5} -plot1 <- DimPlot(slide.seq, reduction = "umap", label = TRUE) -plot2 <- SpatialDimPlot(slide.seq, stroke = 0) +plot1 <- DimPlot(slide.seq, reduction = "umap", label = TRUE) +plot2 <- SpatialDimPlot(slide.seq, stroke = 0) plot1 + plot2 SpatialDimPlot(slide.seq, cells.highlight = CellsByIdentities(object = slide.seq, idents = c(1, 6, 13)), facet.highlight = TRUE) ``` @@ -411,6 +411,7 @@ To facilitate cell-type annotation of the Slide-seq dataset, we are leveraging a ```{r ref.saunders} ref <- readRDS("../data/mouse_hippocampus_reference.rds") +ref <- UpdateSeuratObject(ref) ``` The original annotations from the paper are provided in the cell metadata of the Seurat object. These annotations are provided at several "resolutions", from broad classes (`ref$class`) to subclusters within celltypes (`ref$subcluster`). For the purposes of this vignette, we'll work off of a modification of the celltype annotations (`ref$celltype`) which we felt struck a good balance. diff --git a/vignettes/spatial_vignette_2.Rmd b/vignettes/spatial_vignette_2.Rmd index 90bbee2a3..40c36a074 100644 --- a/vignettes/spatial_vignette_2.Rmd +++ b/vignettes/spatial_vignette_2.Rmd @@ -214,6 +214,7 @@ nano.obj <- LoadNanostring(data.dir = "/brahms/hartmana/spatial_vignette_data/na # add in precomputed Azimuth annotations azimuth.data <- readRDS("/brahms/hartmana/spatial_vignette_data/nanostring_data.Rds") nano.obj <- AddMetaData(nano.obj, metadata = azimuth.data$annotations) +azimuth.data$umap@assay.used <- "Nanostring" nano.obj[["proj.umap"]] <- azimuth.data$umap Idents(nano.obj) <- nano.obj$predicted.annotation.l1 From 0a10ea1c2276107410382827a58789a6f9401076 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Wed, 25 Jan 2023 11:53:46 -0500 Subject: [PATCH 403/979] fix cell scatter --- vignettes/interaction_vignette.Rmd | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/vignettes/interaction_vignette.Rmd b/vignettes/interaction_vignette.Rmd index 13bf2d8d6..6ca7079c6 100644 --- a/vignettes/interaction_vignette.Rmd +++ b/vignettes/interaction_vignette.Rmd @@ -118,21 +118,21 @@ cluster.averages <- AverageExpression(pbmc) head(cluster.averages[['RNA']][, 1:5]) # Return this information as a Seurat object (enables downstream plotting and analysis) -# First, replace spaces with underscores '_' so ggplot2 doesn't fail +# First, replace spaces with underscores '-' so ggplot2 doesn't fail orig.levels <- levels(pbmc) -Idents(pbmc) <- gsub(pattern = ' ', replacement = '_', x = Idents(pbmc)) -orig.levels <- gsub(pattern = ' ', replacement = '_', x = orig.levels) +Idents(pbmc) <- gsub(pattern = ' ', replacement = '-', x = Idents(pbmc)) +orig.levels <- gsub(pattern = ' ', replacement = '-', x = orig.levels) levels(pbmc) <- orig.levels cluster.averages <- AverageExpression(pbmc, return.seurat = TRUE) cluster.averages # How can I plot the average expression of NK cells vs. CD8 T cells? # Pass do.hover = T for an interactive plot to identify gene outliers -CellScatter(cluster.averages, cell1 = "NK", cell2 = "CD8_T") +CellScatter(cluster.averages, cell1 = "NK", cell2 = "CD8-T") # How can I calculate expression averages separately for each replicate? cluster.averages <- AverageExpression(pbmc, return.seurat = TRUE, add.ident = "replicate") -CellScatter(cluster.averages, cell1 = "CD8_T_rep1", cell2 = "CD8_T_rep2") +CellScatter(cluster.averages, cell1 = "CD8-T_rep1", cell2 = "CD8-T_rep2") # You can also plot heatmaps of these 'in silico' bulk datasets to visualize agreement between replicates DoHeatmap(cluster.averages, features = unlist(TopFeatures(pbmc[['pca']], balanced = TRUE)), size = 3, draw.lines = FALSE) From 4d7c062b365f52d621174752a3050ca6b9c057a7 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Wed, 25 Jan 2023 12:01:44 -0500 Subject: [PATCH 404/979] assign load data object to var --- vignettes/sctransform_v2_vignette.Rmd | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vignettes/sctransform_v2_vignette.Rmd b/vignettes/sctransform_v2_vignette.Rmd index ccc23ae4c..0f7fa2423 100644 --- a/vignettes/sctransform_v2_vignette.Rmd +++ b/vignettes/sctransform_v2_vignette.Rmd @@ -86,8 +86,7 @@ InstallData("ifnb") ```{r init, results='hide', message=FALSE, fig.keep='none'} # load dataset -LoadData("ifnb") - +ifnb <- LoadData("ifnb") ifnb <- UpdateSeuratObject(ifnb) # split the dataset into a list of two seurat objects (stim and CTRL) From e2690a62ec0e6719dcd4ef26f1174776f7faac3a Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 25 Jan 2023 13:32:33 -0500 Subject: [PATCH 405/979] fix v5 findanchors --- R/integration.R | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/R/integration.R b/R/integration.R index 8a0442f54..5673938c2 100644 --- a/R/integration.R +++ b/R/integration.R @@ -4111,12 +4111,14 @@ FindAnchors_v5 <- function( projected = FALSE, verbose = TRUE ) { - reference.layers <- Layers(object.pair[[assay]], search = 'data')[1] - query.layers <- setdiff(Layers(object.pair[[assay]], search = 'data'), reference.layers) + ref.assay <- assay[1] + query.assay <- assay[2] + reference.layers <- Layers(object.pair[[ref.assay]], search = 'data')[1] + query.layers <- setdiff(Layers(object.pair[[query.assay]], search = 'data'), reference.layers) anchor.list <- list() for (i in seq_along(query.layers)) { cells2.i <- Cells( - x = object.pair[[assay]], + x = object.pair[[query.assay]], layer = query.layers[i] ) object.pair.i <- subset( @@ -4185,11 +4187,11 @@ FindAnchors <- function( verbose = TRUE ) { if (inherits(x = object.pair[[assay[1]]], what = 'Assay')) { - FindAnchor.function <- FindAnchors_v3 + FindAnchors.function <- FindAnchors_v3 } else if (inherits(x = object.pair[[assay[1]]], what = 'Assay5')) { - FindAnchor.function <- FindAnchors_v5 + FindAnchors.function <- FindAnchors_v5 } - anchors <- FindAnchor.function( + anchors <- FindAnchors.function( object.pair = object.pair, assay = assay, slot = slot, From 8dc09ef7b511026a57af43297bcdb9b8c09b839e Mon Sep 17 00:00:00 2001 From: Gesmira Date: Wed, 25 Jan 2023 14:21:38 -0500 Subject: [PATCH 406/979] rename assays for transfer anchors --- R/integration.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/R/integration.R b/R/integration.R index 5673938c2..c12f95339 100644 --- a/R/integration.R +++ b/R/integration.R @@ -839,9 +839,13 @@ FindTransferAnchors <- function( )) } } + # make global variables to allow access for RenameAssays + query.assay <<- query.assay + reference.assay <<- reference.assay # make new query assay w same name as reference assay if (query.assay != reference.assay) { suppressWarnings(expr = query[[reference.assay]] <- query[[query.assay]]) + suppressWarnings(expr = query <- RenameAssays(query, query.assay = reference.assay)) DefaultAssay(query) <- reference.assay } # only keep necessary info from objects @@ -1203,6 +1207,7 @@ FindTransferAnchors <- function( slot(object = anchor.set, name = "neighbors") <- list( query.neighbors = query.neighbors) } + rm(query.assay, reference.assay) return(anchor.set) } From 74b5cac32a02a4827838d9c7ba3468e3b50ac0a8 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Wed, 25 Jan 2023 15:49:37 -0500 Subject: [PATCH 407/979] drop assay.used line since nanostring_data.Rds was updated --- vignettes/seurat5_spatial_vignette_2.Rmd | 1 - vignettes/spatial_vignette_2.Rmd | 1 - 2 files changed, 2 deletions(-) diff --git a/vignettes/seurat5_spatial_vignette_2.Rmd b/vignettes/seurat5_spatial_vignette_2.Rmd index fdb11f529..21a0876d3 100644 --- a/vignettes/seurat5_spatial_vignette_2.Rmd +++ b/vignettes/seurat5_spatial_vignette_2.Rmd @@ -215,7 +215,6 @@ nano.obj <- LoadNanostring(data.dir = "/brahms/hartmana/spatial_vignette_data/na # add in precomputed Azimuth annotations azimuth.data <- readRDS("/brahms/hartmana/spatial_vignette_data/nanostring_data.Rds") nano.obj <- AddMetaData(nano.obj, metadata = azimuth.data$annotations) -azimuth.data$umap@assay.used <- "Nanostring" nano.obj[["proj.umap"]] <- azimuth.data$umap Idents(nano.obj) <- nano.obj$predicted.annotation.l1 diff --git a/vignettes/spatial_vignette_2.Rmd b/vignettes/spatial_vignette_2.Rmd index 40c36a074..90bbee2a3 100644 --- a/vignettes/spatial_vignette_2.Rmd +++ b/vignettes/spatial_vignette_2.Rmd @@ -214,7 +214,6 @@ nano.obj <- LoadNanostring(data.dir = "/brahms/hartmana/spatial_vignette_data/na # add in precomputed Azimuth annotations azimuth.data <- readRDS("/brahms/hartmana/spatial_vignette_data/nanostring_data.Rds") nano.obj <- AddMetaData(nano.obj, metadata = azimuth.data$annotations) -azimuth.data$umap@assay.used <- "Nanostring" nano.obj[["proj.umap"]] <- azimuth.data$umap Idents(nano.obj) <- nano.obj$predicted.annotation.l1 From b8478c1b4263de3e3e9766f16bab4842f5b04c0f Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 25 Jan 2023 17:41:40 -0500 Subject: [PATCH 408/979] row mean for dense matrix --- R/integration.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/integration.R b/R/integration.R index 5673938c2..65ea2b6c7 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5258,7 +5258,7 @@ if (normalization.method == 'SCT') { if (inherits(x = reference.data, what = 'dgCMatrix')) { feature.mean <- RowMeanSparse(mat = reference.data) } else { - feature.mean <- rowMeans(mat = reference.data) + feature.mean <- rowMeans2(x = reference.data) } if (scale) { feature.sd <- sqrt( From 02d68d87180188def0d4ad6fe404daa8f14fabf2 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 25 Jan 2023 17:43:42 -0500 Subject: [PATCH 409/979] merge From c66ccbc97ba88595d1ae4cb6b9725ab2544d2b0d Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 25 Jan 2023 23:41:59 -0500 Subject: [PATCH 410/979] fix integration warning --- R/integration.R | 7 +++---- R/integration5.R | 19 ++++++++++--------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/R/integration.R b/R/integration.R index 65ea2b6c7..89bbde23d 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1677,8 +1677,7 @@ IntegrateEmbeddings.IntegrationAnchorSet <- function( reference.integrated[[active.assay]] <- CreateAssayObject( data = GetAssayData( object = reference.integrated[[new.reduction.name.safe]], - slot = 'data', - check.matrix = FALSE + slot = 'data' ) ) DefaultAssay(object = reference.integrated) <- active.assay @@ -1699,12 +1698,12 @@ IntegrateEmbeddings.IntegrationAnchorSet <- function( preserve.order = preserve.order, verbose = verbose ) - unintegrated[[new.reduction.name]] <- CreateDimReducObject( + suppressWarnings(expr = unintegrated[[new.reduction.name]] <- CreateDimReducObject( embeddings = as.matrix(x = t(x = integrated.data)), assay = intdr.assay, loadings = Loadings(object = reductions), key = paste0(new.reduction.name.safe, "_") - ) + )) unintegrated <- SetIntegrationData( object = unintegrated, integration.name = "Integration", diff --git a/R/integration5.R b/R/integration5.R index 5a3c7859a..5bcba5024 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -206,15 +206,13 @@ RPCAIntegration <- function( normalization.method <- match.arg(arg = normalization.method) features <- features %||% SelectIntegrationFeatures5(object = object) assay <- assay %||% 'RNA' - layers <- layers %||% Layers(object, search = 'data') - + layers <- layers %||% Layers(object = object, search = 'data') if (normalization.method == 'SCT') { object.sct <- CreateSeuratObject(counts = object, assay = 'SCT') object.sct$split <- groups[,1] - - object.list <- SplitObject(object = object.sct,split.by = 'split') - object.list <- PrepSCTIntegration(object.list, anchor.features = features) - object.list <- lapply(object.list, function(x) { + object.list <- SplitObject(object = object.sct, split.by = 'split') + object.list <- PrepSCTIntegration(object.list = object.list, anchor.features = features) + object.list <- lapply(X = object.list, FUN = function(x) { x <- RunPCA(object = x, features = features, verbose = FALSE) return(x) } @@ -229,7 +227,6 @@ RPCAIntegration <- function( object.list[[i]][['RNA']]$counts <- NULL } } - anchor <- FindIntegrationAnchors(object.list = object.list, anchor.features = features, scale = FALSE, @@ -241,8 +238,12 @@ RPCAIntegration <- function( verbose = verbose, ... ) - anchor@object.list <- lapply(anchor@object.list, function(x) { - x <- DietSeurat(x, features = features[1:2]) + slot(object = anchor, name = "object.list") <- lapply( + X = slot( + object = anchor, + name = "object.list"), + FUN = function(x) { + suppressWarnings(expr = x <- DietSeurat(x, features = features[1:2])) return(x) }) object_merged <- IntegrateEmbeddings(anchorset = anchor, From 21a27c805a0477becec7146a531fc18d366526a9 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 26 Jan 2023 11:08:48 -0500 Subject: [PATCH 411/979] fix project cell embeddings BPCells --- R/integration.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/integration.R b/R/integration.R index 89bbde23d..0981f7147 100644 --- a/R/integration.R +++ b/R/integration.R @@ -845,6 +845,7 @@ FindTransferAnchors <- function( DefaultAssay(query) <- reference.assay } # only keep necessary info from objects + suppressWarnings( query <- DietSeurat( object = query, assays = reference.assay, @@ -852,6 +853,7 @@ FindTransferAnchors <- function( features = features, scale.data = TRUE ) + ) # check assay in the reference.reduction if (!is.null(reference.reduction) && slot(object = reference[[reference.reduction]], name = "assay.used") != reference.assay) { @@ -927,7 +929,7 @@ FindTransferAnchors <- function( ) } query_nCount_UMI <- query[[]][, paste0("nCount_", query.assay)] - names(query_nCount_UMI) <- colnames(query) + names(x = query_nCount_UMI) <- colnames(x = query) projected.pca <- ProjectCellEmbeddings( reference = reference, reduction = reference.reduction, @@ -5321,7 +5323,7 @@ ProjectCellEmbeddings.IterableMatrix <- function( object = as.sparse(query[,cells.grid[[i]]]), reference.SCT.model = reference.SCT.model, features = features, - nCount_UMI = nCount_UMI) + nCount_UMI = nCount_UMI[colnames(query)[cells.grid[[i]]]]) proj.list[[i]] <- t(Loadings(object = reference[[reduction]])[features,dims]) %*% query.i } proj.pca <- t(matrix( From 7632c882b2e4fb2936ceaff153651f9b70380d7b Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 26 Jan 2023 11:21:26 -0500 Subject: [PATCH 412/979] bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 37875d065..3eabf3055 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9023 -Date: 2023-01-24 +Version: 4.9.9.9024 +Date: 2023-01-26 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From 1a22672e527bd379f1e346042e14678cf65d5627 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 26 Jan 2023 12:45:46 -0500 Subject: [PATCH 413/979] update doc --- NAMESPACE | 1 + man/FindSpatiallyVariableFeatures.Rd | 20 +++++++++++++++++++- man/FindTransferAnchors.Rd | 1 - man/ImageDimPlot.Rd | 3 +-- man/ImageFeaturePlot.Rd | 3 +-- man/RidgePlot.Rd | 3 ++- man/Seurat-package.Rd | 2 ++ man/VlnPlot.Rd | 3 ++- 8 files changed, 28 insertions(+), 8 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 95d222cfa..5d3d90860 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -31,6 +31,7 @@ S3method(FindNeighbors,default) S3method(FindNeighbors,dist) S3method(FindSpatiallyVariableFeatures,Assay) S3method(FindSpatiallyVariableFeatures,Seurat) +S3method(FindSpatiallyVariableFeatures,StdAssay) S3method(FindSpatiallyVariableFeatures,default) S3method(FindVariableFeatures,Assay) S3method(FindVariableFeatures,SCTAssay) diff --git a/man/FindSpatiallyVariableFeatures.Rd b/man/FindSpatiallyVariableFeatures.Rd index a94625155..1dbcbccd2 100644 --- a/man/FindSpatiallyVariableFeatures.Rd +++ b/man/FindSpatiallyVariableFeatures.Rd @@ -1,10 +1,12 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/generics.R, R/preprocessing.R +% Please edit documentation in R/generics.R, R/preprocessing.R, +% R/preprocessing5.R \name{FindSpatiallyVariableFeatures} \alias{FindSpatiallyVariableFeatures} \alias{FindSpatiallyVariableFeatures.default} \alias{FindSpatiallyVariableFeatures.Assay} \alias{FindSpatiallyVariableFeatures.Seurat} +\alias{FindSpatiallyVariableFeatures.StdAssay} \title{Find spatially variable features} \usage{ FindSpatiallyVariableFeatures(object, ...) @@ -48,6 +50,20 @@ FindSpatiallyVariableFeatures(object, ...) verbose = TRUE, ... ) + +\method{FindSpatiallyVariableFeatures}{StdAssay}( + object, + layer = "scale.data", + spatial.location, + selection.method = c("markvariogram", "moransi"), + features = NULL, + r.metric = 5, + x.cuts = NULL, + y.cuts = NULL, + nfeatures = nfeatures, + verbose = TRUE, + ... +) } \arguments{ \item{object}{A Seurat object, assay, or expression matrix} @@ -83,6 +99,8 @@ compute for all features.} \item{assay}{Assay to pull the features (marks) from} \item{image}{Name of image to pull the coordinates from} + +\item{layer}{Layer in the Assay5 to pull data from} } \description{ Identify features whose variability in expression can be explained to some diff --git a/man/FindTransferAnchors.Rd b/man/FindTransferAnchors.Rd index 49bbc4cf2..398a55a03 100644 --- a/man/FindTransferAnchors.Rd +++ b/man/FindTransferAnchors.Rd @@ -12,7 +12,6 @@ FindTransferAnchors( reference.assay = NULL, reference.neighbors = NULL, query.assay = NULL, - query.layers = NULL, reduction = "pcaproject", reference.reduction = NULL, project.query = FALSE, diff --git a/man/ImageDimPlot.Rd b/man/ImageDimPlot.Rd index ce3d3fd5d..a24182594 100644 --- a/man/ImageDimPlot.Rd +++ b/man/ImageDimPlot.Rd @@ -68,8 +68,7 @@ RColorBrewer is used by default.} \item{nmols}{Max number of each molecule specified in `molecules` to plot} -\item{alpha}{Alpha value, should be between 0 and 1; when plotting multiple -boundaries, \code{alpha} is equivalent to max alpha} +\item{alpha}{Alpha value for plotting (default is 1)} \item{border.color}{Color of cell segmentation border; pass \code{NA} to suppress borders for segmentation-based plots} diff --git a/man/ImageFeaturePlot.Rd b/man/ImageFeaturePlot.Rd index 9a91337eb..0e375c441 100644 --- a/man/ImageFeaturePlot.Rd +++ b/man/ImageFeaturePlot.Rd @@ -83,8 +83,7 @@ RColorBrewer is used by default.} \item{nmols}{Max number of each molecule specified in `molecules` to plot} -\item{alpha}{Alpha value, should be between 0 and 1; when plotting multiple -boundaries, \code{alpha} is equivalent to max alpha} +\item{alpha}{Alpha value for plotting (default is 1)} \item{border.color}{Color of cell segmentation border; pass \code{NA} to suppress borders for segmentation-based plots} diff --git a/man/RidgePlot.Rd b/man/RidgePlot.Rd index 7825a99a5..fda292262 100644 --- a/man/RidgePlot.Rd +++ b/man/RidgePlot.Rd @@ -16,7 +16,8 @@ RidgePlot( same.y.lims = FALSE, log = FALSE, ncol = NULL, - slot = "data", + slot = deprecated(), + layer = "data", stack = FALSE, combine = TRUE, fill.by = "feature" diff --git a/man/Seurat-package.Rd b/man/Seurat-package.Rd index 351af75c9..69a18bf33 100644 --- a/man/Seurat-package.Rd +++ b/man/Seurat-package.Rd @@ -53,7 +53,9 @@ Other contributors: \item Jeff Farrell \email{jfarrell@g.harvard.edu} [contributor] \item Christoph Hafemeister \email{chafemeister@nygenome.org} (\href{https://orcid.org/0000-0001-6365-8254}{ORCID}) [contributor] \item Yuhan Hao \email{yhao@nygenome.org} (\href{https://orcid.org/0000-0002-1810-0822}{ORCID}) [contributor] + \item Austin Hartman \email{ahartman@nygenome.org} (\href{https://orcid.org/0000-0001-7278-1852}{ORCID}) [contributor] \item Jaison Jain \email{jjain@nygenome.org} (\href{https://orcid.org/0000-0002-9478-5018}{ORCID}) [contributor] + \item Madeline Kowalski \email{mkowalski@nygenome.org} (\href{https://orcid.org/0000-0002-5655-7620}{ORCID}) [contributor] \item Efthymia Papalexi \email{epapalexi@nygenome.org} (\href{https://orcid.org/0000-0001-5898-694X}{ORCID}) [contributor] \item Patrick Roelli \email{proelli@nygenome.org} [contributor] \item Rahul Satija \email{rsatija@nygenome.org} (\href{https://orcid.org/0000-0001-9448-8833}{ORCID}) [contributor] diff --git a/man/VlnPlot.Rd b/man/VlnPlot.Rd index 8b1ca448b..749010d2c 100644 --- a/man/VlnPlot.Rd +++ b/man/VlnPlot.Rd @@ -20,7 +20,8 @@ VlnPlot( same.y.lims = FALSE, log = FALSE, ncol = NULL, - slot = "data", + slot = deprecated(), + layer = "data", split.plot = FALSE, stack = FALSE, combine = TRUE, From 834b510d9aeeddd06055548fa348bcd7ebbf0b70 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 26 Jan 2023 12:55:49 -0500 Subject: [PATCH 414/979] update imports --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index 3eabf3055..67a6daec7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -53,6 +53,7 @@ Imports: jsonlite, KernSmooth, leiden (>= 0.3.1), + lifecycle, lmtest, MASS, Matrix (>= 1.5-0), From 04b3102bbc0038f34463f6e709949d8611f2680b Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 26 Jan 2023 14:23:26 -0500 Subject: [PATCH 415/979] remove seurat5 --- R/preprocessing5.R | 117 --------------------------------------------- 1 file changed, 117 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index c0b963c64..fe56c8e92 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -222,38 +222,6 @@ FindSpatiallyVariableFeatures.StdAssay <- function( return(object) } -#' @importFrom rlang enquo -#' @method FindVariableFeatures Seurat5 -#' @export -#' -FindVariableFeatures.Seurat5 <- function( - object, - assay = NULL, - method = VST, - nselect = 2000L, - layer = NULL, - span = 0.3, - clip = NULL, - key = NULL, - verbose = TRUE, - ... -) { - assay <- assay[1L] %||% DefaultAssay(object = object) - assay <- match.arg(arg = assay, choices = Assays(object = object)) - method <- enquo(arg = method) - object[[assay]] <- FindVariableFeatures( - object = object[[assay]], - method = method, - nselect = nselect, - layer = layer, - span = span, - clip = clip, - key = key, - verbose = verbose, - ... - ) - return(object) -} #' @rdname LogNormalize #' @method LogNormalize default @@ -661,38 +629,6 @@ NormalizeData.StdAssay <- function( return(object) } -#' @importFrom SeuratObject DefaultAssay -#' -#' @method NormalizeData Seurat5 -#' @export -#' -NormalizeData.Seurat5 <- function( - object, - assay = NULL, - method = 'LogNormalize', - scale.factor = 1e4, - margin = 1L, - layer = NULL, - save = 'data', - default = TRUE, - verbose = TRUE, - ... -) { - assay <- assay[1L] %||% DefaultAssay(object = object) - assay <- match.arg(arg = assay, choices = Assays(object = object)) - object[[assay]] <- NormalizeData( - object = object[[assay]], - method = method, - scale.factor = scale.factor, - margin = margin, - layer = layer, - save = save, - default = default, - verbose = verbose, - ... - ) - return(object) -} #' @importFrom SeuratObject StitchMatrix #' @@ -801,59 +737,6 @@ ScaleData.StdAssay <- function( return(object) } -#' @importFrom rlang is_scalar_character -#' -#' @method ScaleData Seurat5 -#' @export -#' -ScaleData.Seurat5 <- function( - object, - features = NULL, - assay = NULL, - layer = NULL, - vars.to.regress = NULL, - split.by = NULL, - model.use = 'linear', - use.umi = FALSE, - do.scale = TRUE, - do.center = TRUE, - scale.max = 10, - block.size = 1000, - min.cells.to.block = 3000, - verbose = TRUE, - ... -) { - assay <- assay %||% DefaultAssay(object = object) - if (!is.null(x = vars.to.regress)) { - vars.to.regress <- intersect(x = vars.to.regress, y = names(x = object[[]])) - } - latent.data <- if (length(x = vars.to.regress)) { - object[[vars.to.regress]] - } else { - NULL - } - if (is_scalar_character(x = split.by)) { - split.by <- object[[split.by]] - } - object[[assay]] <- ScaleData( - object = object[[assay]], - features = features, - layer = layer, - vars.to.regress = vars.to.regress, - latent.data = latent.data, - split.by = split.by, - model.use = model.use, - use.umi = use.umi, - do.scale = do.scale, - do.center = do.center, - scale.max = scale.max, - min.cells.to.block = min.cells.to.block, - verbose = verbose, - ... - ) - return(object) -} - #' @rdname VST #' @method VST default #' @export From f581ed4795bf8ae983b264dc4f9520fb470a838e Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 26 Jan 2023 14:47:01 -0500 Subject: [PATCH 416/979] add RC and CLR normalization --- R/preprocessing5.R | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index fe56c8e92..8dd98c388 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -496,18 +496,18 @@ LogNormalize.TENxMatrix <- function( #' NormalizeData.default <- function( object, - method = c('LogNormalize'), + normalization.method = c('LogNormalize', 'CLR', 'RC'), scale.factor = 1e4, cmargin = 2L, margin = 1L, verbose = TRUE, ... ) { - method <- method[1L] - method <- match.arg(arg = method) + normalization.method <- normalization.method[1L] + normalization.method <- match.arg(arg = normalization.method) # TODO: enable parallelization via future normalized <- switch( - EXPR = method, + EXPR = normalization.method, 'LogNormalize' = { if (IsSparse(x = object) && .MARGIN(object = object) == cmargin) { .SparseNormalize( @@ -524,6 +524,29 @@ NormalizeData.default <- function( ... ) } + }, + 'CLR' = { + if (!inherits(x = object, what = 'dgCMatrix') && + !inherits(x = object, what = 'matrix')) { + stop('CLR normalization only supports for dense and dgCMatrix') + } + CustomNormalize( + data = object, + custom_function = function(x) { + return(log1p(x = x/(exp(x = sum(log1p(x = x[x > 0]), na.rm = TRUE)/length(x = x))))) + }, + margin = margin, + verbose = verbose + ) + }, + 'RC' = { + if (!inherits(x = object, what = 'dgCMatrix') && + !inherits(x = object, what = 'matrix')) { + stop('RC normalization only supports for dense and dgCMatrix') + } + RelativeCounts(data = object, + scale.factor = scale.factor, + verbose = verbose) } ) return(normalized) @@ -581,7 +604,7 @@ NormalizeData.default <- function( #' NormalizeData.StdAssay <- function( object, - method = 'LogNormalize', + normalization.method = 'LogNormalize', scale.factor = 1e4, margin = 1L, layer = 'counts', @@ -614,7 +637,7 @@ NormalizeData.StdAssay <- function( cells = Cells(x = object, layer = l) ) <- NormalizeData( object = LayerData(object = object, layer = l, fast = NA), - method = method, + normalization.method = normalization.method, scale.factor = scale.factor, margin = margin, verbose = verbose, From 6acadc994dca9ef8a3beb216692d9be9a63b4dfa Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 26 Jan 2023 14:56:13 -0500 Subject: [PATCH 417/979] update doc --- NAMESPACE | 4 ---- 1 file changed, 4 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 5d3d90860..c230cc587 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -36,7 +36,6 @@ S3method(FindSpatiallyVariableFeatures,default) S3method(FindVariableFeatures,Assay) S3method(FindVariableFeatures,SCTAssay) S3method(FindVariableFeatures,Seurat) -S3method(FindVariableFeatures,Seurat5) S3method(FindVariableFeatures,StdAssay) S3method(FindVariableFeatures,V3Matrix) S3method(FindVariableFeatures,default) @@ -73,7 +72,6 @@ S3method(MappingScore,AnchorSet) S3method(MappingScore,default) S3method(NormalizeData,Assay) S3method(NormalizeData,Seurat) -S3method(NormalizeData,Seurat5) S3method(NormalizeData,StdAssay) S3method(NormalizeData,V3Matrix) S3method(NormalizeData,default) @@ -135,7 +133,6 @@ S3method(SCTransform,default) S3method(ScaleData,Assay) S3method(ScaleData,IterableMatrix) S3method(ScaleData,Seurat) -S3method(ScaleData,Seurat5) S3method(ScaleData,StdAssay) S3method(ScaleData,default) S3method(ScaleFactors,VisiumV1) @@ -802,7 +799,6 @@ importFrom(rlang,invoke) importFrom(rlang,is_integerish) importFrom(rlang,is_na) importFrom(rlang,is_quosure) -importFrom(rlang,is_scalar_character) importFrom(rlang,is_scalar_integerish) importFrom(rlang,quo_get_env) importFrom(rlang,quo_get_expr) From 5377e3be09b30aed1b0d9868bf96caac884876ea Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 26 Jan 2023 15:59:03 -0500 Subject: [PATCH 418/979] var in RunPCA --- R/dimensional_reduction.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R index 1ab86d15a..72272ddaf 100644 --- a/R/dimensional_reduction.R +++ b/R/dimensional_reduction.R @@ -2407,7 +2407,7 @@ PrepDR <- function( PrepDR5 <- function(object, features = NULL, layer = 'scale.data', verbose = TRUE) { layer <- layer[1L] layer <- match.arg(arg = layer, choices = Layers(object = object)) - features <- features %||% VariableFeatures(object = object, layer = layer) + features <- features %||% VariableFeatures(object = object) if (!length(x = features)) { stop("No variable features, run FindVariableFeatures() or provide a vector of features", call. = FALSE) } From d7c1ba0c38dd1e4fafde30c00860091fe1abb708 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 26 Jan 2023 17:14:55 -0500 Subject: [PATCH 419/979] add warning for FindVariableFeatures --- R/preprocessing5.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 8dd98c388..5d189f318 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -56,10 +56,6 @@ FindVariableFeatures.default <- function( return(var.gene.ouput) } -g <- function(x, method = VST) { - method <- enquo(arg = method) - FindVariableFeatures(object = x, method = method, layer = 'counts') -} #' @importFrom SeuratObject DefaultLayer Features Key Layers #' @@ -75,9 +71,13 @@ FindVariableFeatures.StdAssay <- function( clip = NULL, key = NULL, verbose = TRUE, + selection.method = 'vst', ... ) { - layer <- unique(x = layer) + if (selection.method != 'vst') { + warning('Only VST is supported for Assay5. ', + 'The method is changed to VST') + } layer <- Layers(object = object, search = layer) if (is.null(x = key)) { false <- function(...) { From 0356b565d6297ec418f5f0e3d99a1e5a1acd5bc2 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 26 Jan 2023 18:12:31 -0500 Subject: [PATCH 420/979] bump version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 67a6daec7..f83f655f5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 4.9.9.9024 +Version: 4.9.9.9025 Date: 2023-01-26 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. From 4e4871951cafd06a9aedb8d649e0982dc0a75027 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 26 Jan 2023 21:44:58 -0500 Subject: [PATCH 421/979] add mvp and disp default function --- R/preprocessing5.R | 93 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 91 insertions(+), 2 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 5d189f318..61762e35f 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -102,12 +102,12 @@ FindVariableFeatures.StdAssay <- function( message("Finding variable features for layer ", layer[i]) } data <- LayerData(object = object, layer = layer[i], fast = TRUE) - f <- if (inherits(x = data, what = 'V3Matrix')) { + hvf.function <- if (inherits(x = data, what = 'V3Matrix')) { FindVariableFeatures.default } else { FindVariableFeatures } - hvf.info <- f( + hvf.info <- hvf.function( object = data, method = method, nselect = nselect, @@ -984,6 +984,55 @@ VST.matrix <- function( #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#' Calculate dispersion of features +#' +#' +CalcDispersion <- function( + object, + mean.function = FastExpMean, + dispersion.function = FastLogVMR, + num.bin = 20, + binning.method = "equal_width", + verbose = TRUE +) { + if (!inherits(x = object, what = c('dgCMatrix', 'matrix'))) { + stop('mean.var.plot and dispersion methods only support dense and sparse matrix input') + } + if (inherits(x = object, what = 'matrix')) { + object <- as.sparse(x = object) + } + feature.mean <- mean.function(object, verbose) + feature.dispersion <- dispersion.function(object, verbose) + + names(x = feature.mean) <- names(x = feature.dispersion) <- rownames(x = object) + feature.dispersion[is.na(x = feature.dispersion)] <- 0 + feature.mean[is.na(x = feature.mean)] <- 0 + data.x.breaks <- switch( + EXPR = binning.method, + 'equal_width' = num.bin, + 'equal_frequency' = c( + quantile( + x = feature.mean[feature.mean > 0], + probs = seq.int(from = 0, to = 1, length.out = num.bin) + ) + ), + stop("Unknown binning method: ", binning.method) + ) + data.x.bin <- cut(x = feature.mean, breaks = data.x.breaks, + include.lowest = TRUE) + names(x = data.x.bin) <- names(x = feature.mean) + mean.y <- tapply(X = feature.dispersion, INDEX = data.x.bin, FUN = mean) + sd.y <- tapply(X = feature.dispersion, INDEX = data.x.bin, FUN = sd) + feature.dispersion.scaled <- (feature.dispersion - mean.y[as.numeric(x = data.x.bin)]) / + sd.y[as.numeric(x = data.x.bin)] + names(x = feature.dispersion.scaled) <- names(x = feature.mean) + hvf.info <- data.frame(feature.mean, feature.dispersion, feature.dispersion.scaled) + rownames(x = hvf.info) <- rownames(x = object) + colnames(x = hvf.info) <- paste0('mvp.', c('mean', 'dispersion', 'dispersion.scaled')) + return(hvf.info) +} + + #' @importFrom SeuratObject .CalcN #' CalcN <- function(object) { @@ -1001,6 +1050,26 @@ CalcN <- function(object) { )) } +#' Find variable features based on dispersion +#' +DISP <- function( + data, + nselect = 2000L, + verbose = TRUE, + ... +) { + hvf.info <- CalcDispersion(object = data, verbose = verbose, ...) + hvf.info$variable <- FALSE + hvf.info$rank <- NA + vf <- head( + x = order(hvf.info$mvp.dispersion, decreasing = TRUE), + n = nselect + ) + hvf.info$variable[vf] <- TRUE + hvf.info$rank[vf] <- seq_along(along.with = vf) + return(hvf.info) +} + .FeatureVar <- function( data, mu, @@ -2140,3 +2209,23 @@ FetchResiduals_reference <- function(object, new_residual <- MinMax(data = new_residual, min = clip.min, max = clip.max) return(new_residual) } + +#' Find variable features based on mean.var.plot +#' +MVP <- function( + data, + verbose = TRUE, + nselect = 2000L, + mean.cutoff = c(0.1, 8), + dispersion.cutoff = c(1, Inf), + ... +) { + hvf.info <- DISP(data = data, nselect = nselect, verbose = verbose) + hvf.info$variable <- FALSE + means.use <- (hvf.info[, 1] > mean.cutoff[1]) & (hvf.info[, 1] < mean.cutoff[2]) + dispersions.use <- (hvf.info[, 3] > dispersion.cutoff[1]) & (hvf.info[, 3] < dispersion.cutoff[2]) + hvf.info[which(x = means.use & dispersions.use), 'variable'] <- TRUE + hvf.info[hvf.info$variable,'rank'] <- rank(x = hvf.info[hvf.info$variable,'rank']) + hvf.info[!hvf.info$variable,'rank'] <- NA + return(hvf.info) +} \ No newline at end of file From d7c43f6c2707d660b2b8bf777827e3560cafd095 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 26 Jan 2023 22:38:44 -0500 Subject: [PATCH 422/979] add mvp and disp for v5 assay --- R/preprocessing5.R | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 61762e35f..829493116 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -64,9 +64,9 @@ FindVariableFeatures.default <- function( #' FindVariableFeatures.StdAssay <- function( object, - method = VST, + method = NULL, nselect = 2000L, - layer = 'counts', + layer = NULL, span = 0.3, clip = NULL, key = NULL, @@ -74,9 +74,22 @@ FindVariableFeatures.StdAssay <- function( selection.method = 'vst', ... ) { - if (selection.method != 'vst') { - warning('Only VST is supported for Assay5. ', - 'The method is changed to VST') + if (selection.method == 'vst') { + layer <- layer%||%'counts' + method <- VST + key <- 'vst' + } else if (selection.method %in% c('mean.var.plot', 'mvp')) { + layer <- layer%||%'data' + method <- MVP + key <- 'mvp' + } else if (selection.method %in% c('dispersion', 'disp')) { + layer <- layer%||%'data' + method <- DISP + key <- 'disp' + } else if (is.null(x = method) || is.null(x = layer)){ + stop('Custome functions and layers are both required') + } else { + key <- NULL } layer <- Layers(object = object, search = layer) if (is.null(x = key)) { @@ -147,11 +160,14 @@ FindVariableFeatures.StdAssay <- function( rownames(x = hvf.info) <- Features(x = object, layer = layer[i]) object[colnames(x = hvf.info)] <- hvf.info } - VariableFeatures(object = object) <- VariableFeatures( - object = object, - nfeatures = nselect, - simplify = TRUE - ) + var.name <- paste( + 'vf', + key, + layer[i], + 'variable', + sep = '_' + ) + VariableFeatures(object = object) <- rownames(hvf.info)[hvf.info[,var.name]] return(object) } @@ -993,7 +1009,8 @@ CalcDispersion <- function( dispersion.function = FastLogVMR, num.bin = 20, binning.method = "equal_width", - verbose = TRUE + verbose = TRUE, + ... ) { if (!inherits(x = object, what = c('dgCMatrix', 'matrix'))) { stop('mean.var.plot and dispersion methods only support dense and sparse matrix input') From c75b315d444e8da6a84d622d8d9a0eb6e048162b Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 26 Jan 2023 22:41:14 -0500 Subject: [PATCH 423/979] bump version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index f83f655f5..1c52d298b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 4.9.9.9025 +Version: 4.9.9.9026 Date: 2023-01-26 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. From 7ca0e3ad692d7d2aa689d05cb3bca8511bc31d38 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Fri, 27 Jan 2023 11:34:21 -0500 Subject: [PATCH 424/979] run on satijalab05 --- .github/workflows/R_CMD_check.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/R_CMD_check.yaml b/.github/workflows/R_CMD_check.yaml index e81e333b5..190c2f5d7 100644 --- a/.github/workflows/R_CMD_check.yaml +++ b/.github/workflows/R_CMD_check.yaml @@ -13,7 +13,7 @@ jobs: name: R CMD check container: image: satijalab/seurat:develop - runs-on: self-hosted + runs-on: [self-hosted, satijalab05] steps: - uses: actions/checkout@v2 From df4c58b7b25f88cc63d981b4aebd1614eabde05b Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Fri, 27 Jan 2023 12:34:41 -0500 Subject: [PATCH 425/979] install SeuratObject, BPCells, Signac --- .github/workflows/R_CMD_check.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/R_CMD_check.yaml b/.github/workflows/R_CMD_check.yaml index 190c2f5d7..c85d7177b 100644 --- a/.github/workflows/R_CMD_check.yaml +++ b/.github/workflows/R_CMD_check.yaml @@ -5,6 +5,9 @@ on: - develop pull_request: +env: + GITHUB_PAT: ${{ secrets.PAT }} + jobs: r-cmd-check: @@ -21,6 +24,12 @@ jobs: run: rm -rf 'vignettes/' shell: bash + - name: Install additional dependencies + run: | + Rscript -e "remotes::install_github('mojaveazure/seurat-object', ref = 'feat/CalN_generic')" + Rscript -e "remotes::install_github('stuart-lab/signac', ref = 'seurat5')" + Rscript -e "remotes::install_github('bnprks/BPCells')" + - name: Check run: devtools::check(args = "--no-manual", error_on = "warning", check_dir = "check", force_suggests = FALSE) shell: Rscript {0} From eb7d9093e82911da2fd53b1914cef222e6fcc33f Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Fri, 27 Jan 2023 12:40:04 -0500 Subject: [PATCH 426/979] drop signac install --- .github/workflows/R_CMD_check.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/R_CMD_check.yaml b/.github/workflows/R_CMD_check.yaml index c85d7177b..1f8d2bd7c 100644 --- a/.github/workflows/R_CMD_check.yaml +++ b/.github/workflows/R_CMD_check.yaml @@ -27,7 +27,6 @@ jobs: - name: Install additional dependencies run: | Rscript -e "remotes::install_github('mojaveazure/seurat-object', ref = 'feat/CalN_generic')" - Rscript -e "remotes::install_github('stuart-lab/signac', ref = 'seurat5')" Rscript -e "remotes::install_github('bnprks/BPCells')" - name: Check From 5d621475ef23ff8a8750f5f240478216d8305cfb Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 27 Jan 2023 19:54:21 -0500 Subject: [PATCH 427/979] fix s5 atac vignette typo --- vignettes/seurat5_atacseq_integration_vignette.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/seurat5_atacseq_integration_vignette.Rmd b/vignettes/seurat5_atacseq_integration_vignette.Rmd index 0d4d2b1c7..7745629f2 100644 --- a/vignettes/seurat5_atacseq_integration_vignette.Rmd +++ b/vignettes/seurat5_atacseq_integration_vignette.Rmd @@ -69,7 +69,7 @@ pbmc.rna <- LoadData("pbmcMultiome", "pbmc.rna") pbmc.atac <- LoadData("pbmcMultiome", "pbmc.atac") pbmc.rna <- UpdateSeuratObject(pbmc.rna) -pbma.atac <- UpdateSeuratObject(pbmc.atac) +pbmc.atac <- UpdateSeuratObject(pbmc.atac) # repeat QC steps performed in the WNN vignette pbmc.rna <- subset(pbmc.rna, seurat_annotations != 'filtered') From 79df76705e68ff22a7efa486c777ade2e94d6715 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 27 Jan 2023 21:28:16 -0500 Subject: [PATCH 428/979] update obj mapping vig --- vignettes/multimodal_reference_mapping.Rmd | 6 +++++- vignettes/seurat5_multimodal_reference_mapping.Rmd | 3 +-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/vignettes/multimodal_reference_mapping.Rmd b/vignettes/multimodal_reference_mapping.Rmd index 0affbb33c..15f2eb277 100644 --- a/vignettes/multimodal_reference_mapping.Rmd +++ b/vignettes/multimodal_reference_mapping.Rmd @@ -65,7 +65,7 @@ options(SeuratData.repo.use = "http://satijalab04.nygenome.org") We load the reference (download [here](https://atlas.fredhutch.org/data/nygc/multimodal/pbmc_multimodal.h5seurat)) from our recent [paper](https://doi.org/10.1016/j.cell.2021.04.048), and visualize the pre-computed UMAP. This reference is stored as an h5Seurat file, a format that enables on-disk storage of multimodal Seurat objects (more details on h5Seurat and `SeuratDisk` can be found [here](https://mojaveazure.github.io/seurat-disk/index.html)). ```{r pbmc.ref} -reference <- LoadH5Seurat("../data/pbmc_multimodal.h5seurat") +reference <- readRDS("/brahms/haoy/seurat4_pbmc/pbmc_multimodal_2023.rds") ``` ```{r ref.dimplot} @@ -236,6 +236,10 @@ bm <- LoadData(ds = "bmcite") #load query data InstallData('hcabm40k') hcabm40k <- LoadData(ds = "hcabm40k") + +bm <- UpdateSeuratObject(bm) +hcabm40k <- UpdateSeuratObject(hcabm40k) + ``` The reference dataset contains a [WNN graph](weighted_nearest_neighbor_analysis.html), reflecting a weighted combination of the RNA and protein data in this CITE-seq experiment. diff --git a/vignettes/seurat5_multimodal_reference_mapping.Rmd b/vignettes/seurat5_multimodal_reference_mapping.Rmd index cac375e58..216a42adb 100644 --- a/vignettes/seurat5_multimodal_reference_mapping.Rmd +++ b/vignettes/seurat5_multimodal_reference_mapping.Rmd @@ -65,8 +65,7 @@ options(SeuratData.repo.use = "http://satijalab04.nygenome.org") We load the reference (download [here](https://atlas.fredhutch.org/data/nygc/multimodal/pbmc_multimodal.h5seurat)) from our recent [paper](https://doi.org/10.1016/j.cell.2021.04.048), and visualize the pre-computed UMAP. This reference is stored as an h5Seurat file, a format that enables on-disk storage of multimodal Seurat objects (more details on h5Seurat and `SeuratDisk` can be found [here](https://mojaveazure.github.io/seurat-disk/index.html)). ```{r pbmc.ref} -reference <- LoadH5Seurat("../data/pbmc_multimodal.h5seurat") -reference <- UpdateSeuratObject(reference) +reference <- readRDS("/brahms/haoy/seurat4_pbmc/pbmc_multimodal_2023.rds") ``` ```{r ref.dimplot} From 3eee88068484505a9df1c85839a5094761b1dab5 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 27 Jan 2023 21:52:37 -0500 Subject: [PATCH 429/979] query v5 assay mapping vig --- vignettes/seurat5_multimodal_reference_mapping.Rmd | 1 + 1 file changed, 1 insertion(+) diff --git a/vignettes/seurat5_multimodal_reference_mapping.Rmd b/vignettes/seurat5_multimodal_reference_mapping.Rmd index 216a42adb..1f5f3989a 100644 --- a/vignettes/seurat5_multimodal_reference_mapping.Rmd +++ b/vignettes/seurat5_multimodal_reference_mapping.Rmd @@ -80,6 +80,7 @@ To demonstrate mapping to this multimodal reference, we will use a dataset of 2, library(SeuratData) InstallData('pbmc3k') pbmc3k <- UpdateSeuratObject(pbmc3k) +pbmc3k[['RNA']] <- as(pbmc3k[['RNA']], Class = 'Assay5') ``` The reference was normalized using `SCTransform()`, so we use the same approach to normalize the query here. From 9aa6aac41bb6405233a38a5d62148f19debe4b5a Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 27 Jan 2023 23:06:23 -0500 Subject: [PATCH 430/979] fix sct residuals --- R/preprocessing.R | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index 933f61366..2671a168d 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -418,7 +418,11 @@ GetResidual <- function( "This SCTAssay contains multiple SCT models. Computing residuals for cells using different models" ) } - if ((!umi.assay %in% Assays(object = object)) || class(x = object[[umi.assay]])[1] == "Assay"){ + if (!umi.assay %in% Assays(object = object) || + length(x = Layers(object = object[[umi.assay]], search = 'counts')) == 0) { + return(object) + } + if (inherits(x = object[[umi.assay]], what = 'Assay')) { new.residuals <- lapply( X = sct.models, FUN = function(x) { @@ -433,7 +437,7 @@ GetResidual <- function( ) } ) - } else if (class(x = object[[umi.assay]])[1] == "Assay5"){ + } else if (inherits(x = object[[umi.assay]], what = 'Assay5')) { new.residuals <- lapply( X = sct.models, FUN = function(x) { @@ -447,7 +451,6 @@ GetResidual <- function( } ) } - existing.data <- GetAssayData(object = object, slot = 'scale.data', assay = assay) all.features <- union(x = rownames(x = existing.data), y = features) new.scale <- matrix( From 72666ee09b170f5769d40475e6be97cf93c2e34c Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 27 Jan 2023 23:12:20 -0500 Subject: [PATCH 431/979] fix v5 vignette --- vignettes/seurat5_atacseq_integration_vignette.Rmd | 1 + vignettes/seurat5_hashing_vignette.Rmd | 2 +- vignettes/seurat5_integration_mapping.Rmd | 2 +- vignettes/seurat5_multimodal_reference_mapping.Rmd | 1 + vignettes/seurat5_weighted_nearest_neighbor_analysis.Rmd | 6 ++---- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/vignettes/seurat5_atacseq_integration_vignette.Rmd b/vignettes/seurat5_atacseq_integration_vignette.Rmd index 7745629f2..eadcd9b43 100644 --- a/vignettes/seurat5_atacseq_integration_vignette.Rmd +++ b/vignettes/seurat5_atacseq_integration_vignette.Rmd @@ -71,6 +71,7 @@ pbmc.atac <- LoadData("pbmcMultiome", "pbmc.atac") pbmc.rna <- UpdateSeuratObject(pbmc.rna) pbmc.atac <- UpdateSeuratObject(pbmc.atac) +pbmc.rna[['RNA']] <- as(pbmc.rna[['RNA']], Class = 'Assay5') # repeat QC steps performed in the WNN vignette pbmc.rna <- subset(pbmc.rna, seurat_annotations != 'filtered') pbmc.atac <- subset(pbmc.atac, seurat_annotations != 'filtered') diff --git a/vignettes/seurat5_hashing_vignette.Rmd b/vignettes/seurat5_hashing_vignette.Rmd index 5321d8062..d389067c8 100644 --- a/vignettes/seurat5_hashing_vignette.Rmd +++ b/vignettes/seurat5_hashing_vignette.Rmd @@ -146,7 +146,7 @@ RidgePlot(pbmc.hashtag, assay = 'HTO', features = rownames(pbmc.hashtag[['HTO']] Visualize pairs of HTO signals to confirm mutual exclusivity in singlets ```{r hashtag_scatter1, fig.height=8, fig.width=9} -FeatureScatter(pbmc.hashtag, feature1 = 'hto_HTO_A', feature2 = 'hto_HTO_B') +FeatureScatter(pbmc.hashtag, feature1 = 'hto_HTO-A', feature2 = 'hto_HTO-B') ``` Compare number of UMIs for singlets, doublets and negative cells diff --git a/vignettes/seurat5_integration_mapping.Rmd b/vignettes/seurat5_integration_mapping.Rmd index 0d3a009a4..0e4661fc4 100644 --- a/vignettes/seurat5_integration_mapping.Rmd +++ b/vignettes/seurat5_integration_mapping.Rmd @@ -130,7 +130,7 @@ After finding anchors, we use the `TransferData()` function to classify the quer # do we want a different query and reference object or just have different layers?? pancreas.query <- DietSeurat(panc8, layers = "fluidigmc1", assays = "RNA", ) pancreas.query <- as(object = pancreas.query[['RNA']], Class = 'Assay5') -pancreas.query <- CreateSeuratObject(pancreas.query, meta.data = panc8@meta.data) +pancreas.query <- CreateSeuratObject(pancreas.query, meta.data = panc8[[]]) pancreas.anchors <- FindTransferAnchors(reference = pancreas.ref, query = pancreas.query, dims = 1:30, reference.reduction = "integrated.dr", k.filter = NA) diff --git a/vignettes/seurat5_multimodal_reference_mapping.Rmd b/vignettes/seurat5_multimodal_reference_mapping.Rmd index 1f5f3989a..87490972b 100644 --- a/vignettes/seurat5_multimodal_reference_mapping.Rmd +++ b/vignettes/seurat5_multimodal_reference_mapping.Rmd @@ -239,6 +239,7 @@ bm <- UpdateSeuratObject(bm) InstallData('hcabm40k') hcabm40k <- LoadData(ds = "hcabm40k") hcabm40k <- UpdateSeuratObject(hcabm40k) +hcabm40k[['RNA']] <- as(hcabm40k[['RNA']], Class = 'Assay5') ``` The reference dataset contains a [WNN graph](weighted_nearest_neighbor_analysis.html), reflecting a weighted combination of the RNA and protein data in this CITE-seq experiment. diff --git a/vignettes/seurat5_weighted_nearest_neighbor_analysis.Rmd b/vignettes/seurat5_weighted_nearest_neighbor_analysis.Rmd index 9d4b17055..d5b38df08 100644 --- a/vignettes/seurat5_weighted_nearest_neighbor_analysis.Rmd +++ b/vignettes/seurat5_weighted_nearest_neighbor_analysis.Rmd @@ -63,8 +63,8 @@ library(dplyr) InstallData("bmcite") bm <- LoadData(ds = "bmcite") bm <- UpdateSeuratObject(bm) -bm[["ADT"]] <- CreateAssay5Object(bm[["ADT"]]@counts) -bm[["RNA"]] <- CreateAssay5Object(bm[["RNA"]]@counts) +bm[["ADT"]] <- CreateAssay5Object(bm[["ADT"]]$counts) +bm[["RNA"]] <- CreateAssay5Object(bm[["RNA"]]$counts) ``` We first perform pre-processing and dimensional reduction on both assays independently. We use standard normalization, but you can also use SCTransform or any alternative method. @@ -72,9 +72,7 @@ We first perform pre-processing and dimensional reduction on both assays indepen ```{r pp.rna} DefaultAssay(bm) <- 'RNA' bm <- NormalizeData(bm) -DefaultLayer(bm[["RNA"]]) <- "counts" bm <- FindVariableFeatures(bm) -DefaultLayer(bm[["RNA"]]) <- "data" bm <- ScaleData(bm) bm <- RunPCA(bm) From 0823d18d50b30f63424d85868c427c6daa17bd95 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 27 Jan 2023 23:13:12 -0500 Subject: [PATCH 432/979] bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1c52d298b..1ab6a1258 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9026 -Date: 2023-01-26 +Version: 4.9.9.9027 +Date: 2023-01-27 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From f57699c94976df2b625c8ffd458ad60cd335f618 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Mon, 30 Jan 2023 11:03:59 -0500 Subject: [PATCH 433/979] new rename assays --- R/integration.R | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/R/integration.R b/R/integration.R index 2fa18ddbe..8e4402a6d 100644 --- a/R/integration.R +++ b/R/integration.R @@ -839,13 +839,9 @@ FindTransferAnchors <- function( )) } } - # make global variables to allow access for RenameAssays - query.assay <<- query.assay - reference.assay <<- reference.assay - # make new query assay w same name as reference assay + # Rename query assay w same name as reference assay if (query.assay != reference.assay) { - suppressWarnings(expr = query[[reference.assay]] <- query[[query.assay]]) - suppressWarnings(expr = query <- RenameAssays(query, query.assay = reference.assay)) + suppressWarnings(expr = query <- RenameAssays(query, old.assay.name = query.assay, new.assay.name = reference.assay)) DefaultAssay(query) <- reference.assay } # only keep necessary info from objects @@ -1209,7 +1205,6 @@ FindTransferAnchors <- function( slot(object = anchor.set, name = "neighbors") <- list( query.neighbors = query.neighbors) } - rm(query.assay, reference.assay) return(anchor.set) } From 887526d31748b8167a19ee2ca258ac44ea477c7b Mon Sep 17 00:00:00 2001 From: Gesmira Date: Mon, 30 Jan 2023 11:42:03 -0500 Subject: [PATCH 434/979] parameter names --- R/integration.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/integration.R b/R/integration.R index 8e4402a6d..bfcea9ec5 100644 --- a/R/integration.R +++ b/R/integration.R @@ -841,7 +841,7 @@ FindTransferAnchors <- function( } # Rename query assay w same name as reference assay if (query.assay != reference.assay) { - suppressWarnings(expr = query <- RenameAssays(query, old.assay.name = query.assay, new.assay.name = reference.assay)) + suppressWarnings(expr = query <- RenameAssays(query, assay.name = query.assay, new.assay.name = reference.assay)) DefaultAssay(query) <- reference.assay } # only keep necessary info from objects From e635452d761bb2b607e46b25a99a11254571f564 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Mon, 30 Jan 2023 12:37:19 -0500 Subject: [PATCH 435/979] do not stop on error for BPCells vignettes --- vignettes/BPCells_COVID_SCTMapping.Rmd | 3 ++- vignettes/BPCells_COVID_logMapping.Rmd | 3 ++- vignettes/BPCells_sketch_clustering_mouse_brain.Rmd | 3 ++- vignettes/BPCells_sketch_clustering_mouse_brain_SCT.Rmd | 3 ++- vignettes/BPCells_sketch_inte_1M.Rmd | 3 ++- vignettes/BPCells_sketch_inte_1M_SCT.Rmd | 3 ++- vignettes/future_vignette.Rmd | 1 + vignettes/seurat5_future_vignette.Rmd | 5 +++-- vignettes/seurat5_integration_large_datasets.Rmd | 1 + 9 files changed, 17 insertions(+), 8 deletions(-) diff --git a/vignettes/BPCells_COVID_SCTMapping.Rmd b/vignettes/BPCells_COVID_SCTMapping.Rmd index 11f964465..65bb7c067 100755 --- a/vignettes/BPCells_COVID_SCTMapping.Rmd +++ b/vignettes/BPCells_COVID_SCTMapping.Rmd @@ -21,7 +21,8 @@ knitr::opts_chunk$set( tidy.opts = list(width.cutoff = 95), message = FALSE, warning = FALSE, - time_it = TRUE + time_it = TRUE, + error = TRUE ) ``` diff --git a/vignettes/BPCells_COVID_logMapping.Rmd b/vignettes/BPCells_COVID_logMapping.Rmd index 8a328f8cd..00df41c52 100755 --- a/vignettes/BPCells_COVID_logMapping.Rmd +++ b/vignettes/BPCells_COVID_logMapping.Rmd @@ -21,7 +21,8 @@ knitr::opts_chunk$set( tidy.opts = list(width.cutoff = 95), message = FALSE, warning = FALSE, - time_it = TRUE + time_it = TRUE, + error = TRUE ) ``` diff --git a/vignettes/BPCells_sketch_clustering_mouse_brain.Rmd b/vignettes/BPCells_sketch_clustering_mouse_brain.Rmd index 6ca1ab3a5..ebfca4f83 100755 --- a/vignettes/BPCells_sketch_clustering_mouse_brain.Rmd +++ b/vignettes/BPCells_sketch_clustering_mouse_brain.Rmd @@ -21,7 +21,8 @@ knitr::opts_chunk$set( tidy.opts = list(width.cutoff = 95), message = FALSE, warning = FALSE, - time_it = TRUE + time_it = TRUE, + error = TRUE ) ``` diff --git a/vignettes/BPCells_sketch_clustering_mouse_brain_SCT.Rmd b/vignettes/BPCells_sketch_clustering_mouse_brain_SCT.Rmd index d2d5e02fb..5b276c129 100755 --- a/vignettes/BPCells_sketch_clustering_mouse_brain_SCT.Rmd +++ b/vignettes/BPCells_sketch_clustering_mouse_brain_SCT.Rmd @@ -21,7 +21,8 @@ knitr::opts_chunk$set( tidy.opts = list(width.cutoff = 95), message = FALSE, warning = FALSE, - time_it = TRUE + time_it = TRUE, + error = TRUE ) ``` diff --git a/vignettes/BPCells_sketch_inte_1M.Rmd b/vignettes/BPCells_sketch_inte_1M.Rmd index 4639c14bf..f4cd57249 100755 --- a/vignettes/BPCells_sketch_inte_1M.Rmd +++ b/vignettes/BPCells_sketch_inte_1M.Rmd @@ -21,7 +21,8 @@ knitr::opts_chunk$set( tidy.opts = list(width.cutoff = 95), message = FALSE, warning = FALSE, - time_it = TRUE + time_it = TRUE, + error = TRUE ) ``` diff --git a/vignettes/BPCells_sketch_inte_1M_SCT.Rmd b/vignettes/BPCells_sketch_inte_1M_SCT.Rmd index a52110e0e..b4c7f5ec7 100755 --- a/vignettes/BPCells_sketch_inte_1M_SCT.Rmd +++ b/vignettes/BPCells_sketch_inte_1M_SCT.Rmd @@ -21,7 +21,8 @@ knitr::opts_chunk$set( tidy.opts = list(width.cutoff = 95), message = FALSE, warning = FALSE, - time_it = TRUE + time_it = TRUE, + error = TRUE ) ``` diff --git a/vignettes/future_vignette.Rmd b/vignettes/future_vignette.Rmd index 1f10104e2..cb7e747f6 100644 --- a/vignettes/future_vignette.Rmd +++ b/vignettes/future_vignette.Rmd @@ -60,6 +60,7 @@ For example, to run the parallel version of `FindMarkers()`, you simply need to ```{r demo} library(Seurat) pbmc <- readRDS("../data/pbmc3k_final.rds") +pbmc <- UpdateSeuratObject(pbmc) # Enable parallelization plan('multiprocess', workers = 4) diff --git a/vignettes/seurat5_future_vignette.Rmd b/vignettes/seurat5_future_vignette.Rmd index 98b57ea8e..cd9e473f9 100644 --- a/vignettes/seurat5_future_vignette.Rmd +++ b/vignettes/seurat5_future_vignette.Rmd @@ -59,8 +59,9 @@ For example, to run the parallel version of `FindMarkers()`, you simply need to ```{r demo} library(Seurat) -data("pbmc3k.final") -pbmc <- UpdateSeuratObject(pbmc3k.final) +pbmc <- readRDS("../data/pbmc3k_final.rds") +pbmc <- UpdateSeuratObject(pbmc) +pbmc[["RNA"]] <- CreateAssay5Object(pbmc[["RNA"]]@counts) # Enable parallelization plan('multiprocess', workers = 4) diff --git a/vignettes/seurat5_integration_large_datasets.Rmd b/vignettes/seurat5_integration_large_datasets.Rmd index 0b56a405a..409761cc2 100644 --- a/vignettes/seurat5_integration_large_datasets.Rmd +++ b/vignettes/seurat5_integration_large_datasets.Rmd @@ -34,6 +34,7 @@ knitr::opts_chunk$set( ```{r, include=TRUE} options(SeuratData.repo.use = "http://satijalab04.nygenome.org") +options(future.globals.maxSize = 8e9) ``` For very large datasets, the standard integration workflow can sometimes be prohibitively computationally expensive. In this workflow, we employ two options that can improve efficiency and runtimes: From d5fb172fb4e896b844d1b5fac4e0151a6aed7f70 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Mon, 30 Jan 2023 14:26:11 -0500 Subject: [PATCH 436/979] fix layer arg in Ridge and Vln plot --- R/visualization.R | 4 ++-- vignettes/spatial_vignette_2.Rmd | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/visualization.R b/R/visualization.R index d0b28c609..dc4c0d94c 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -549,7 +549,7 @@ RidgePlot <- function( what = 'RidgePlot(slot = )', with = 'RidgePlot(layer = )' ) - layer <- layer %||% slot + layer <- slot %||% layer } return(ExIPlot( object = object, @@ -634,7 +634,7 @@ VlnPlot <- function( what = 'VlnPlot(slot = )', with = 'VlnPlot(layer = )' ) - layer <- layer %||% slot + layer <- slot %||% layer } if ( !is.null(x = split.by) & diff --git a/vignettes/spatial_vignette_2.Rmd b/vignettes/spatial_vignette_2.Rmd index 90bbee2a3..4d8fe5e8b 100644 --- a/vignettes/spatial_vignette_2.Rmd +++ b/vignettes/spatial_vignette_2.Rmd @@ -248,7 +248,7 @@ ImageDimPlot(nano.obj, fov = "lung5.rep1", cells = WhichCells(nano.obj, idents=c We can also visualize gene expression markers a few different ways: ```{r, fig.width=10, fig.height=5} -VlnPlot(nano.obj, features = "KRT17", slot = "counts", pt.size = 0.1, y.max = 30) + NoLegend() +VlnPlot(nano.obj, features = "KRT17", layer = "counts", pt.size = 0.1, y.max = 30) + NoLegend() ``` ```{r, fig.width=5, fig.height=4} From 0c299a1dbc65af7d81e84d8e9b2e5a396d43dc88 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Mon, 30 Jan 2023 14:39:42 -0500 Subject: [PATCH 437/979] remove DefaultLayer switching in StdAssay NormalizeData method --- R/preprocessing5.R | 7 ------- 1 file changed, 7 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 829493116..5eeee2505 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -625,15 +625,11 @@ NormalizeData.StdAssay <- function( margin = 1L, layer = 'counts', save = 'data', - default = TRUE, verbose = TRUE, ... ) { olayer <- layer <- unique(x = layer) layer <- Layers(object = object, search = layer) - if (save %in% olayer) { - default <- FALSE - } if (length(x = save) != length(x = layer)) { save <- make.unique(names = gsub( pattern = olayer, @@ -661,9 +657,6 @@ NormalizeData.StdAssay <- function( ... ) } - if (isTRUE(x = default)) { - DefaultLayer(object = object) <- save - } gc(verbose = FALSE) return(object) } From 15a25627b1b4bd80cc8d797a12f797ce73cfcddb Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Mon, 30 Jan 2023 16:12:39 -0500 Subject: [PATCH 438/979] update vignette formatting --- vignettes/seurat5_future_vignette.Rmd | 2 +- .../seurat5_integration_introduction.Rmd | 12 +- vignettes/seurat5_integration_mapping.Rmd | 12 +- vignettes/seurat5_integration_rpca.Rmd | 29 +- vignettes/seurat5_mixscape_vignette.Rmd | 248 +++++++++--------- vignettes/seurat5_sctransform_v2_vignette.Rmd | 3 +- vignettes/seurat5_spatial_vignette.Rmd | 11 +- vignettes/seurat5_visualization_vignette.Rmd | 19 +- vignettes/spatial_vignette_2.Rmd | 6 + 9 files changed, 168 insertions(+), 174 deletions(-) diff --git a/vignettes/seurat5_future_vignette.Rmd b/vignettes/seurat5_future_vignette.Rmd index cd9e473f9..694227bb8 100644 --- a/vignettes/seurat5_future_vignette.Rmd +++ b/vignettes/seurat5_future_vignette.Rmd @@ -61,7 +61,7 @@ For example, to run the parallel version of `FindMarkers()`, you simply need to library(Seurat) pbmc <- readRDS("../data/pbmc3k_final.rds") pbmc <- UpdateSeuratObject(pbmc) -pbmc[["RNA"]] <- CreateAssay5Object(pbmc[["RNA"]]@counts) +pbmc[["RNA"]] <- as(pbmc[["RNA"]], Class = "Assay5") # Enable parallelization plan('multiprocess', workers = 4) diff --git a/vignettes/seurat5_integration_introduction.Rmd b/vignettes/seurat5_integration_introduction.Rmd index 67588f46e..41ebc021a 100644 --- a/vignettes/seurat5_integration_introduction.Rmd +++ b/vignettes/seurat5_integration_introduction.Rmd @@ -68,9 +68,8 @@ InstallData('ifnb') ```{r init, results='hide', message=FALSE, fig.keep='none'} # load dataset -data('ifnb') -ifnb <- UpdateSeuratObject(ifnb) -ifnb[["RNA"]] <- CreateAssay5Object(ifnb[["RNA"]]@counts) +ifnb <- LoadData("ifnb") +ifnb[["RNA"]] <- as(ifnb[["RNA"]], Class = "Assay5") # split the dataset into layers (stim and CTRL) ifnb[["RNA"]] <- split(ifnb[["RNA"]], f = ifnb$stim) @@ -233,10 +232,9 @@ Below, we demonstrate how to modify the Seurat integration workflow for datasets ```{r panc8.cca.sct.init, results='hide', message=FALSE, fig.keep='none'} -LoadData('ifnb') -ifnb <- UpdateSeuratObject(ifnb) -ifnb[["RNA"]] <- CreateAssay5Object(ifnb[["RNA"]]@counts) -ifnb[["RNA"]] <- split(ifnb[["RNA"]], f = ifnb$stim) +ifnb <- LoadData("ifnb") +ifnb[["RNA"]] <- as(ifnb[["RNA"]], Class = "Assay5") +ifnb[["RNA"]] <- split(ifnb[["RNA"]], f = ifnb$stim) ifnb <- SCTransform(ifnb) ifnb <- RunPCA(ifnb) diff --git a/vignettes/seurat5_integration_mapping.Rmd b/vignettes/seurat5_integration_mapping.Rmd index 0e4661fc4..25ab4caf3 100644 --- a/vignettes/seurat5_integration_mapping.Rmd +++ b/vignettes/seurat5_integration_mapping.Rmd @@ -47,15 +47,15 @@ library(SeuratData) ``` ```{r install.data, eval=FALSE} -InstallData('panc8') +InstallData("panc8") ``` To construct a reference, we will identify 'anchors' between the individual datasets. First, we split the combined object into a list, with each dataset as an element (this is only necessary because the data was bundled together for easy distribution). ```{r preprocessing1} -data('panc8') -panc8 <- UpdateSeuratObject(panc8) -panc8[["RNA"]] <- CreateAssay5Object(panc8[["RNA"]]@counts) +panc8 <- LoadData("panc8") +panc8[["RNA"]] <- as(panc8[["RNA"]], Class = "Assay5") + # split the dataset into layers by technology panc8[["RNA"]] <- split(panc8[["RNA"]], f = panc8$tech) ``` @@ -76,7 +76,7 @@ Next, we identify anchors using the `FindIntegrationAnchors()` function, which t ```{r integration.anchors, warning = FALSE, message = FALSE} pancreas.ref <- DietSeurat(panc8, layers = c("celseq", "celseq2", "smartseq2")) -pancreas.ref <- as(object = pancreas.ref[['RNA']], Class = 'Assay5') +pancreas.ref <- as(object = pancreas.ref[["RNA"]], Class = "Assay5") pancreas.ref <- CreateSeuratObject(pancreas.ref, meta.data = panc8@meta.data) pancreas.ref <- ScaleData(pancreas.ref) pancreas.ref <- RunPCA(pancreas.ref) @@ -129,7 +129,7 @@ After finding anchors, we use the `TransferData()` function to classify the quer ```{r label.transfer, warning = FALSE, message = FALSE} # do we want a different query and reference object or just have different layers?? pancreas.query <- DietSeurat(panc8, layers = "fluidigmc1", assays = "RNA", ) -pancreas.query <- as(object = pancreas.query[['RNA']], Class = 'Assay5') +pancreas.query <- as(object = pancreas.query[["RNA"]], Class = "Assay5") pancreas.query <- CreateSeuratObject(pancreas.query, meta.data = panc8[[]]) pancreas.anchors <- FindTransferAnchors(reference = pancreas.ref, query = pancreas.query, dims = 1:30, reference.reduction = "integrated.dr", k.filter = NA) diff --git a/vignettes/seurat5_integration_rpca.Rmd b/vignettes/seurat5_integration_rpca.Rmd index 72c7223a9..20ea5f554 100644 --- a/vignettes/seurat5_integration_rpca.Rmd +++ b/vignettes/seurat5_integration_rpca.Rmd @@ -50,14 +50,13 @@ options(SeuratData.repo.use = "http://satijalab04.nygenome.org") library(Seurat) library(SeuratData) # install dataset -InstallData('ifnb') +InstallData("ifnb") ``` ```{r init, results='hide', message=FALSE, fig.keep='none'} # load dataset -data('ifnb') # what if I do data here instead -ifnb <- UpdateSeuratObject(ifnb) -ifnb[["RNA"]] <- CreateAssay5Object(ifnb[["RNA"]]@counts) +ifnb <- LoadData("ifnb") +ifnb[["RNA"]] <- as(ifnb[["RNA"]], Class = "Assay5") # split the dataset into layers (stim and CTRL) ifnb[["RNA"]] <- split(ifnb[["RNA"]], f = ifnb$stim) @@ -75,9 +74,6 @@ features <- VariableFeatures(ifnb) ifnb <- ScaleData(ifnb, features = features, verbose = FALSE) ifnb <- RunPCA(ifnb, features = features, verbose = FALSE) ifnb - - - ``` # Perform integration @@ -85,7 +81,7 @@ ifnb We then identify anchors using the `FindIntegrationAnchors()` function, which takes a list of Seurat objects as input, and use these anchors to integrate the two datasets together with `IntegrateData()`. ```{r integrate.data} -ifnb <- IntegrateLayers(object = ifnb, +ifnb <- IntegrateLayers(object = ifnb, method = RPCAIntegration, features = features, verbose = F) @@ -118,7 +114,7 @@ The results show that rpca-based integration is more conservative, and in this c #immune.anchors <- FindIntegrationAnchors(object.list = ifnb.list, anchor.features = features,reduction = 'rpca', k.anchor = 20) #ifnb <- IntegrateData(anchorset = immune.anchors) -ifnb <- IntegrateLayers(object = ifnb, +ifnb <- IntegrateLayers(object = ifnb, k.anchor = 20, method = RPCAIntegration, features = features, @@ -139,8 +135,8 @@ p1 + p2 ```{r save.img, include=TRUE} library(ggplot2) plot <- DimPlot(ifnb, group.by = "stim") + - xlab("UMAP 1") + ylab("UMAP 2") + - theme(axis.title = element_text(size = 18), legend.text = element_text(size = 18)) + + xlab("UMAP 1") + ylab("UMAP 2") + + theme(axis.title = element_text(size = 18), legend.text = element_text(size = 18)) + guides(colour = guide_legend(override.aes = list(size = 10))) ggsave(filename = "../output/images/rpca_integration.jpg", height = 7, width = 12, plot = plot, quality = 50) ``` @@ -152,10 +148,9 @@ Now that the datasets have been integrated, you can follow the previous steps in As an additional example, we repeat the analyses performed above, but normalize the datasets using [SCTransform](sctransform_vignette.html). We may choose to set the `method` parameter to `glmGamPoi` (install [here](https://bioconductor.org/packages/release/bioc/html/glmGamPoi.html)) in order to enable faster estimation of regression parameters in `SCTransform()`. ```{r panc8.cca.sct.init, results='hide', message=FALSE, fig.keep='none'} -data('ifnb') -ifnb <- UpdateSeuratObject(ifnb) -ifnb[["RNA"]] <- CreateAssay5Object(ifnb[["RNA"]]@counts) -ifnb[["RNA"]] <- split(ifnb[["RNA"]], f = ifnb$stim) +ifnb <- LoadData("ifnb") +ifnb[["RNA"]] <- as(ifnb[["RNA"]], Class = "Assay5") +ifnb[["RNA"]] <- split(ifnb[["RNA"]], f = ifnb$stim) ifnb <- SCTransform(ifnb, method = "glmGamPoi") features <- VariableFeatures(ifnb) @@ -163,7 +158,7 @@ ifnb <- RunPCA(ifnb, features = features) ``` ```{r ifnb.cca.sct.anchors} -ifnb <- IntegrateLayers(object = ifnb, +ifnb <- IntegrateLayers(object = ifnb, method = RPCAIntegration, normalization.method = "SCT", features = features, @@ -183,7 +178,7 @@ p1 + p2 ``` ```{r save.times, include=TRUE} -#write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/seurat5_integration_rpca.csv") +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/seurat5_integration_rpca.csv") ```
    diff --git a/vignettes/seurat5_mixscape_vignette.Rmd b/vignettes/seurat5_mixscape_vignette.Rmd index 0ee06edd5..5d229c54d 100644 --- a/vignettes/seurat5_mixscape_vignette.Rmd +++ b/vignettes/seurat5_mixscape_vignette.Rmd @@ -55,8 +55,8 @@ InstallData(ds = "thp1.eccite") # Setup custom theme for plotting. custom_theme <- theme( - plot.title = element_text(size=16, hjust = 0.5), - legend.key.size = unit(0.7, "cm"), + plot.title = element_text(size = 16, hjust = 0.5), + legend.key.size = unit(0.7, "cm"), legend.text = element_text(size = 14)) ``` @@ -67,17 +67,17 @@ We use a 111 gRNA ECCITE-seq dataset generated from stimulated THP-1 cells that ```{r eccite.load} # Load object. eccite <- LoadData(ds = "thp1.eccite") -eccite <- UpdateSeuratObject(eccite) -eccite[["RNA"]] <- CreateAssay5Object(eccite[["RNA"]]@counts) -eccite[["ADT"]] <- CreateAssay5Object(eccite[["ADT"]]@counts) -eccite[["HTO"]] <- CreateAssay5Object(eccite[["HTO"]]@counts) -eccite[["GDO"]] <- CreateAssay5Object(eccite[["GDO"]]@counts) + +eccite[["RNA"]] <- as(eccite[["RNA"]], Class = "Assay5") +eccite[["ADT"]] <- as(eccite[["ADT"]], Class = "Assay5") +eccite[["HTO"]] <- as(eccite[["HTO"]], Class = "Assay5") +eccite[["GDO"]] <- as(eccite[["GDO"]], Class = "Assay5") # Normalize protein. eccite <- NormalizeData( - object = eccite, - assay = "ADT", - normalization.method = "CLR", + object = eccite, + assay = "ADT", + normalization.method = "CLR", margin = 2) ``` @@ -100,10 +100,10 @@ eccite <- RunUMAP(object = eccite, dims = 1:40) # Generate plots to check if clustering is driven by biological replicate ID, # cell cycle phase or target gene class. p1 <- DimPlot( - object = eccite, - group.by = 'replicate', - label = F, - pt.size = 0.2, + object = eccite, + group.by = "replicate", + label = F, + pt.size = 0.2, reduction = "umap", cols = "Dark2", repel = T) + scale_color_brewer(palette = "Dark2") + ggtitle("Biological Replicate") + @@ -111,28 +111,28 @@ p1 <- DimPlot( ylab("UMAP 2") p2 <- DimPlot( - object = eccite, - group.by = 'Phase', - label = F, pt.size = 0.2, - reduction = "umap", repel = T) + + object = eccite, + group.by = "Phase", + label = F, pt.size = 0.2, + reduction = "umap", repel = T) + ggtitle("Cell Cycle Phase") + ylab("UMAP 2") + xlab("UMAP 1") p3 <- DimPlot( - object = eccite, - group.by = 'crispr', - pt.size = 0.2, - reduction = "umap", - split.by = "crispr", - ncol = 1, - cols = c("grey39","goldenrod3")) + + object = eccite, + group.by = "crispr", + pt.size = 0.2, + reduction = "umap", + split.by = "crispr", + ncol = 1, + cols = c("grey39", "goldenrod3")) + ggtitle("Perturbation Status") + ylab("UMAP 2") + xlab("UMAP 1") # Visualize plots. -((p1 / p2 + plot_layout(guides = 'auto')) | p3 ) +((p1 / p2 + plot_layout(guides = "auto")) | p3 ) ``` # Calculating local perturbation signatures mitigates confounding effects @@ -141,53 +141,53 @@ To calculate local perturbation signatures we set the number of non-targeting Ne ```{r eccite.cps, fig.height = 10, fig.width = 15} # Calculate perturbation signature (PRTB). -eccite<- CalcPerturbSig( - object = eccite, - assay = "RNA", - slot = "data", - gd.class ="gene", - nt.cell.class = "NT", - reduction = "pca", - ndims = 40, - num.neighbors = 20, - split.by = "replicate", +eccite <- CalcPerturbSig( + object = eccite, + assay = "RNA", + slot = "data", + gd.class ="gene", + nt.cell.class = "NT", + reduction = "pca", + ndims = 40, + num.neighbors = 20, + split.by = "replicate", new.assay.name = "PRTB") -# Prepare PRTB assay for dimensionality reduction: +# Prepare PRTB assay for dimensionality reduction: # Normalize data, find variable features and center data. -DefaultAssay(object = eccite) <- 'PRTB' +DefaultAssay(object = eccite) <- "PRTB" # Use variable features from RNA assay. VariableFeatures(object = eccite) <- VariableFeatures(object = eccite[["RNA"]]) -eccite <- ScaleData(object = eccite, do.scale = F, do.center = T) +eccite <- ScaleData(object = eccite, do.scale = FALSE, do.center = TRUE) # Run PCA to reduce the dimensionality of the data. -eccite <- RunPCA(object = eccite, reduction.key = 'prtbpca', reduction.name = 'prtbpca') +eccite <- RunPCA(object = eccite, reduction.key = "prtbpca", reduction.name = "prtbpca") # Run UMAP to visualize clustering in 2-D. eccite <- RunUMAP( - object = eccite, - dims = 1:40, - reduction = 'prtbpca', - reduction.key = 'prtbumap', - reduction.name = 'prtbumap') + object = eccite, + dims = 1:40, + reduction = "prtbpca", + reduction.key = "prtbumap", + reduction.name = "prtbumap") -# Generate plots to check if clustering is driven by biological replicate ID, +# Generate plots to check if clustering is driven by biological replicate ID, # cell cycle phase or target gene class. q1 <- DimPlot( - object = eccite, - group.by = 'replicate', - reduction = 'prtbumap', - pt.size = 0.2, cols = "Dark2", label = F, repel = T) + + object = eccite, + group.by = "replicate", + reduction = "prtbumap", + pt.size = 0.2, cols = "Dark2", label = FALSE, repel = TRUE) + scale_color_brewer(palette = "Dark2") + ggtitle("Biological Replicate") + ylab("UMAP 2") + xlab("UMAP 1") q2 <- DimPlot( - object = eccite, - group.by = 'Phase', - reduction = 'prtbumap', + object = eccite, + group.by = "Phase", + reduction = "prtbumap", pt.size = 0.2, label = F, repel = T) + ggtitle("Cell Cycle Phase") + ylab("UMAP 2") + @@ -195,18 +195,18 @@ q2 <- DimPlot( q3 <- DimPlot( object = eccite, - group.by = 'crispr', - reduction = 'prtbumap', - split.by = "crispr", - ncol = 1, - pt.size = 0.2, - cols = c("grey39","goldenrod3")) + + group.by = "crispr", + reduction = "prtbumap", + split.by = "crispr", + ncol = 1, + pt.size = 0.2, + cols = c("grey39", "goldenrod3")) + ggtitle("Perturbation Status") + ylab("UMAP 2") + xlab("UMAP 1") # Visualize plots. -(q1 / q2 + plot_layout(guides = 'auto') | q3) +(q1 / q2 + plot_layout(guides = "auto") | q3) ``` # Mixscape identifies cells with no detectable perturbation @@ -216,15 +216,15 @@ Here, we are assuming each target gene class is a mixture of two Gaussian distri ```{r eccite.mixscape, fig.height = 20, fig.width = 20, results="hide"} # Run mixscape. eccite <- RunMixscape( - object = eccite, - assay = "PRTB", - slot = "scale.data", - labels = "gene", - nt.class.name = "NT", - min.de.genes = 5, - iter.num = 10, - de.assay = "RNA", - verbose = F, + object = eccite, + assay = "PRTB", + slot = "scale.data", + labels = "gene", + nt.class.name = "NT", + min.de.genes = 5, + iter.num = 10, + de.assay = "RNA", + verbose = FALSE, prtb.type = "KO") # Calculate percentage of KO cells for all target gene classes. @@ -233,28 +233,28 @@ df <- prop.table(table(eccite$mixscape_class.global, eccite$NT),2) df2 <- reshape2::melt(df) df2$Var2 <- as.character(df2$Var2) test <- df2[which(df2$Var1 == "KO"),] -test <- test[order(test$value, decreasing = T),] +test <- test[order(test$value, decreasing = TRUE),] new.levels <- test$Var2 -df2$Var2 <- factor(df2$Var2, levels = new.levels ) +df2$Var2 <- factor(df2$Var2, levels = new.levels) df2$Var1 <- factor(df2$Var1, levels = c("NT", "NP", "KO")) df2$gene <- sapply(as.character(df2$Var2), function(x) strsplit(x, split = "g")[[1]][1]) -df2$guide_number <- sapply(as.character(df2$Var2), +df2$guide_number <- sapply(as.character(df2$Var2), function(x) strsplit(x, split = "g")[[1]][2]) df3 <- df2[-c(which(df2$gene == "NT")),] -p1 <- ggplot(df3, aes(x = guide_number, y = value*100, fill= Var1)) + - geom_bar(stat= "identity") + - theme_classic()+ - scale_fill_manual(values = c("grey49", "grey79","coral1")) + +p1 <- ggplot(df3, aes(x = guide_number, y = value * 100, fill = Var1)) + + geom_bar(stat = "identity") + + theme_classic() + + scale_fill_manual(values = c("grey49", "grey79", "coral1")) + ylab("% of cells") + xlab("sgRNA") -p1 + theme(axis.text.x = element_text(size = 18, hjust = 1), - axis.text.y = element_text(size = 18), - axis.title = element_text(size = 16), - strip.text = element_text(size=16, face = "bold")) + - facet_wrap(vars(gene),ncol = 5, scales = "free") + - labs(fill = "mixscape class") +theme(legend.title = element_text(size = 14), +p1 + theme(axis.text.x = element_text(size = 18, hjust = 1), + axis.text.y = element_text(size = 18), + axis.title = element_text(size = 16), + strip.text = element_text(size = 16, face = "bold")) + + facet_wrap(vars(gene), ncol = 5, scales = "free") + + labs(fill = "mixscape class") + theme(legend.title = element_text(size = 14), legend.text = element_text(size = 12)) ``` @@ -264,40 +264,40 @@ To ensure mixscape is assigning the correct perturbation status to cells we can ```{r eccite.plots, fig.height = 10, fig.width = 15, results="hide"} # Explore the perturbation scores of cells. -PlotPerturbScore(object = eccite, - target.gene.ident = "IFNGR2", - mixscape.class = "mixscape_class", - col = "coral2") +labs(fill = "mixscape class") +PlotPerturbScore(object = eccite, + target.gene.ident = "IFNGR2", + mixscape.class = "mixscape_class", + col = "coral2") + labs(fill = "mixscape class") # Inspect the posterior probability values in NP and KO cells. VlnPlot(eccite, "mixscape_class_p_ko", idents = c("NT", "IFNGR2 KO", "IFNGR2 NP")) + - theme(axis.text.x = element_text(angle = 0, hjust = 0.5),axis.text = element_text(size = 16) ,plot.title = element_text(size = 20)) + + theme(axis.text.x = element_text(angle = 0, hjust = 0.5),axis.text = element_text(size = 16) ,plot.title = element_text(size = 20)) + NoLegend() + ggtitle("mixscape posterior probabilities") # Run DE analysis and visualize results on a heatmap ordering cells by their posterior # probability values. Idents(object = eccite) <- "gene" -MixscapeHeatmap(object = eccite, - ident.1 = "NT", - ident.2 = "IFNGR2", - balanced = F, - assay = "RNA", - max.genes = 20, angle = 0, - group.by = "mixscape_class", - max.cells.group = 300, +MixscapeHeatmap(object = eccite, + ident.1 = "NT", + ident.2 = "IFNGR2", + balanced = FALSE, + assay = "RNA", + max.genes = 20, angle = 0, + group.by = "mixscape_class", + max.cells.group = 300, size=6.5) + NoLegend() +theme(axis.text.y = element_text(size = 16)) # Show that only IFNG pathway KO cells have a reduction in PD-L1 protein expression. VlnPlot( - object = eccite, - features = "adt_PDL1", - idents = c("NT","JAK2","STAT1","IFNGR1","IFNGR2", "IRF1"), - group.by = "gene", - pt.size = 0.2, - sort = T, - split.by = "mixscape_class.global", - cols = c("coral3","grey79","grey39")) + + object = eccite, + features = "adt_PDL1", + idents = c("NT","JAK2", "STAT1", "IFNGR1", "IFNGR2", "IRF1"), + group.by = "gene", + pt.size = 0.2, + sort = TRUE, + split.by = "mixscape_class.global", + cols = c("coral3", "grey79", "grey39")) + ggtitle("PD-L1 protein") + theme(axis.text.x = element_text(angle = 0, hjust = 0.5), plot.title = element_text(size = 20), axis.text = element_text(size = 16)) ``` @@ -318,43 +318,43 @@ sub <- subset(eccite, idents = c("KO", "NT")) # Run LDA. sub <- MixscapeLDA( - object = sub, - assay = "RNA", - pc.assay = "PRTB", - labels = "gene", - nt.label = "NT", - npcs = 10, - logfc.threshold = 0.25, - verbose = F) - -# Use LDA results to run UMAP and visualize cells on 2-D. + object = sub, + assay = "RNA", + pc.assay = "PRTB", + labels = "gene", + nt.label = "NT", + npcs = 10, + logfc.threshold = 0.25, + verbose = FALSE) + +# Use LDA results to run UMAP and visualize cells on 2-D. # Here, we note that the number of the dimensions to be used is equal to the number of # labels minus one (to account for NT cells). sub <- RunUMAP( object = sub, dims = 1:11, - reduction = 'lda', - reduction.key = 'ldaumap', - reduction.name = 'ldaumap') + reduction = "lda", + reduction.key = "ldaumap", + reduction.name = "ldaumap") # Visualize UMAP clustering results. Idents(sub) <- "mixscape_class" sub$mixscape_class <- as.factor(sub$mixscape_class) # Set colors for each perturbation. -col = setNames(object = hue_pal()(12),nm = levels(sub$mixscape_class)) +col = setNames(object = hue_pal()(12), nm = levels(sub$mixscape_class)) names(col) <- c(names(col)[1:7], "NT", names(col)[9:12]) col[8] <- "grey39" -p <- DimPlot(object = sub, - reduction = "ldaumap", - repel = T, - label.size = 5, - label = T, +p <- DimPlot(object = sub, + reduction = "ldaumap", + repel = TRUE, + label.size = 5, + label = TRUE, cols = col) + NoLegend() -p2 <- p+ - scale_color_manual(values=col, drop=FALSE) + +p2 <- p + + scale_color_manual(values = col, drop = FALSE) + ylab("UMAP 2") + xlab("UMAP 1") p2 diff --git a/vignettes/seurat5_sctransform_v2_vignette.Rmd b/vignettes/seurat5_sctransform_v2_vignette.Rmd index 69a98b42a..9f309799c 100644 --- a/vignettes/seurat5_sctransform_v2_vignette.Rmd +++ b/vignettes/seurat5_sctransform_v2_vignette.Rmd @@ -88,8 +88,7 @@ InstallData("ifnb") ```{r init, results='hide', message=FALSE, fig.keep='none'} # load dataset ifnb <- LoadData("ifnb") -ifnb <- UpdateSeuratObject(ifnb) -ifnb[["RNA"]] <- CreateAssay5Object(ifnb[["RNA"]]@counts) +ifnb[["RNA"]] <- as(ifnb[["RNA"]], Class = "Assay5") # split the dataset into a list of two seurat objects (stim and CTRL) ifnb.list <- SplitObject(ifnb, split.by = "stim") diff --git a/vignettes/seurat5_spatial_vignette.Rmd b/vignettes/seurat5_spatial_vignette.Rmd index 4328ffe84..d51333879 100644 --- a/vignettes/seurat5_spatial_vignette.Rmd +++ b/vignettes/seurat5_spatial_vignette.Rmd @@ -76,8 +76,7 @@ InstallData("stxBrain") ```{r data} brain <- LoadData('stxBrain', type = 'anterior1') -brain <- UpdateSeuratObject(brain) -brain[["Spatial"]] <- CreateAssay5Object(brain[["Spatial"]]@counts) +brain[['Spatial']] <- as(brain[['Spatial']], class = 'Assay5') ```
    @@ -325,9 +324,8 @@ This dataset of the mouse brain contains another slice corresponding to the othe ```{r brain2data} brain2 <- LoadData('stxBrain', type = 'posterior1') -brain2 <- UpdateSeuratObject(brain2) -brain2[["Spatial"]] <- CreateAssay5Object(counts = brain2[["Spatial"]]@counts) -brain2 <- SCTransform(brain2, assay = "Spatial", verbose = FALSE) +brain2[['Spatial']] <- as(brain2[['Spatial']], class = 'Assay5') +brain2 <- SCTransform(brain2, assay = 'Spatial', verbose = FALSE) ``` In order to work with multiple slices in the same Seurat object, we provide the `merge` function. @@ -379,8 +377,7 @@ InstallData("ssHippo") ```{r data.ss} slide.seq <- LoadData('ssHippo') -slide.seq <- UpdateSeuratObject(slide.seq) -slide.seq[["Spatial"]] <- CreateAssay5Object(counts = slide.seq[["Spatial"]]@counts) +slide.seq[['Spatial']] <- as(slide.seq[['Spatial']], class = 'Assay5') ``` ## Data preprocessing diff --git a/vignettes/seurat5_visualization_vignette.Rmd b/vignettes/seurat5_visualization_vignette.Rmd index 98fc2eb09..b49f4539e 100644 --- a/vignettes/seurat5_visualization_vignette.Rmd +++ b/vignettes/seurat5_visualization_vignette.Rmd @@ -38,7 +38,7 @@ options(SeuratData.repo.use = 'satijalab04.nygenome.org') We'll demonstrate visualization techniques in Seurat using our previously computed Seurat object from the 2,700 PBMC tutorial. You can download this dataset from [SeuratData](https://github.com/satijalab/seurat-data) ```{r data, eval = FALSE} -SeuratData::InstallData('pbmc3k') +SeuratData::InstallData("pbmc3k") ``` ```{r seed, include=TRUE} @@ -51,15 +51,14 @@ options(Seurat.object.assay.version = "v5") library(SeuratData) library(ggplot2) library(patchwork) + data("pbmc3k.final") pbmc3k.final <- UpdateSeuratObject(pbmc3k.final) -pbmc3k.final[["RNA"]] <- CreateAssay5Object(counts=pbmc3k.final[["RNA"]]@counts) +pbmc3k.final[["RNA"]] <- as(pbmc3k.final, Class = "Assay5") pbmc3k.final <- NormalizeData(pbmc3k.final) -DefaultLayer(pbmc3k.final[["RNA"]]) <- "counts" pbmc3k.final <- FindVariableFeatures(pbmc3k.final) -DefaultLayer(pbmc3k.final[["RNA"]]) <- "data" pbmc3k.final <- ScaleData(pbmc3k.final) -pbmc3k.final$groups <- sample(c('group1', 'group2'), size = ncol(pbmc3k.final), replace = TRUE) +pbmc3k.final$groups <- sample(c("group1", "group2"), size = ncol(pbmc3k.final), replace = TRUE) features <- c("LYZ", "CCL5", "IL32", "PTPRCAP", "FCGR3A", "PF4") pbmc3k.final ``` @@ -89,23 +88,23 @@ DoHeatmap(subset(pbmc3k.final, downsample = 100), features = features, size = 3) ```{r featureplot} # Plot a legend to map colors to expression levels -FeaturePlot(pbmc3k.final, features = 'MS4A1') +FeaturePlot(pbmc3k.final, features = "MS4A1") # Adjust the contrast in the plot -FeaturePlot(pbmc3k.final, features = 'MS4A1', min.cutoff = 1, max.cutoff = 3) +FeaturePlot(pbmc3k.final, features = "MS4A1", min.cutoff = 1, max.cutoff = 3) ``` ```{r featureplot2, fig.height = 4} # Calculate feature-specific contrast levels based on quantiles of non-zero expression. Particularly useful when plotting multiple markers -FeaturePlot(pbmc3k.final, features = c('MS4A1', "PTPRCAP"), min.cutoff = "q10", max.cutoff = "q90") +FeaturePlot(pbmc3k.final, features = c("MS4A1", "PTPRCAP"), min.cutoff = "q10", max.cutoff = "q90") # Visualize co-expression of two features simultaneously -FeaturePlot(pbmc3k.final, features = c('MS4A1', 'CD79A'), blend = TRUE) +FeaturePlot(pbmc3k.final, features = c("MS4A1", "CD79A"), blend = TRUE) ``` ```{r featureplot.split} # Split visualization to view expression by groups (replaces FeatureHeatmap) -FeaturePlot(pbmc3k.final, features = c('MS4A1', 'CD79A'), split.by = 'groups') +FeaturePlot(pbmc3k.final, features = c("MS4A1", "CD79A"), split.by = "groups") ``` # Updated and expanded visualization functions diff --git a/vignettes/spatial_vignette_2.Rmd b/vignettes/spatial_vignette_2.Rmd index 4d8fe5e8b..940814771 100644 --- a/vignettes/spatial_vignette_2.Rmd +++ b/vignettes/spatial_vignette_2.Rmd @@ -338,3 +338,9 @@ Each of these datasets represents an opportunity to learn organizing principles sessionInfo() ```
    + +```{r save.times, include=TRUE} +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/spatial_vignette_2.csv") +``` + + From e40aad2b7629b4287c1a44d4d622960843d27cc7 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Mon, 30 Jan 2023 16:24:35 -0500 Subject: [PATCH 439/979] specify assay in VlnPlot --- vignettes/seurat5_spatial_vignette_2.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vignettes/seurat5_spatial_vignette_2.Rmd b/vignettes/seurat5_spatial_vignette_2.Rmd index 21a0876d3..35feef372 100644 --- a/vignettes/seurat5_spatial_vignette_2.Rmd +++ b/vignettes/seurat5_spatial_vignette_2.Rmd @@ -249,11 +249,11 @@ ImageDimPlot(nano.obj, fov = "lung5.rep1", cells = WhichCells(nano.obj, idents=c We can also visualize gene expression markers a few different ways: ```{r, fig.width=10, fig.height=5} -VlnPlot(nano.obj, features = "KRT17", layer = "counts", pt.size = 0.1, y.max = 30) + NoLegend() +VlnPlot(nano.obj, features = "KRT17", assay = "Nanostring", layer = "counts", pt.size = 0.1, y.max = 30) + NoLegend() ``` ```{r, fig.width=5, fig.height=4} -FeaturePlot(nano.obj, features = "KRT17") +FeaturePlot(nano.obj, features = "KRT17", max.cutoff = "q95") ``` ```{r, fig.height=4, fig.width=8} From 5d466b9307edd7cfa26880685a6f7a8f21da3a8c Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 30 Jan 2023 17:04:57 -0500 Subject: [PATCH 440/979] modify integrate layer --- R/integration5.R | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/R/integration5.R b/R/integration5.R index 5bcba5024..5324b08f5 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -404,31 +404,30 @@ IntegrateLayers <- function( features <- features %||% VariableFeatures( object = object, assay = assay, - layer = layers, nfeatures = 2000L ) - # features <- features %||% SelectIntegrationFeatures5( - # object = object, - # assay = assay - # ) } else { abort(message = "'assay' must be a v5 or SCT assay") } - features <- intersect( - x = features, - y = Features(x = object, assay = assay, layer = scale.layer) - ) + if (!is.null(scale.layer)) { + features <- intersect( + x = features, + y = Features(x = object, assay = assay, layer = scale.layer) + ) + } if (!length(x = features)) { abort(message = "None of the features provided are found in this assay") } - # Check our dimensional reduction - orig <- orig %||% DefaultDimReduc(object = object, assay = assay) - if (!orig %in% Reductions(object = object)) { - abort(message = paste(sQuote(x = orig), 'is not a dimensional reduction')) - } - obj.orig <- object[[orig]] - if (is.null(x = DefaultAssay(object = obj.orig))) { - DefaultAssay(object = obj.orig) <- assay + if (!is.null(orig)) { + # Check our dimensional reduction + orig <- orig %||% DefaultDimReduc(object = object, assay = assay) + if (!orig %in% Reductions(object = object)) { + abort(message = paste(sQuote(x = orig), 'is not a dimensional reduction')) + } + obj.orig <- object[[orig]] + if (is.null(x = DefaultAssay(object = obj.orig))) { + DefaultAssay(object = obj.orig) <- assay + } } # Check our groups groups <- if (inherits(x = object[[assay]], what = 'SCTAssay')) { @@ -450,7 +449,7 @@ IntegrateLayers <- function( object = cmap, values = Cells(x = object[[assay]], layer = scale.layer) )) - } else if (is_scalar_character(x = group.by) && group.by %in% names(x = object[[]])) { + } else if (rlang::is_scalar_character(x = group.by) && group.by %in% names(x = object[[]])) { FetchData( object = object, vars = group.by, From fb1fc9cbcc05aee20a4de5db6f71f9f80f3e5df7 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Mon, 30 Jan 2023 19:09:47 -0500 Subject: [PATCH 441/979] tmp skip saving artifacts on failure --- .github/workflows/R_CMD_check.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/R_CMD_check.yaml b/.github/workflows/R_CMD_check.yaml index 1f8d2bd7c..b93d6fef4 100644 --- a/.github/workflows/R_CMD_check.yaml +++ b/.github/workflows/R_CMD_check.yaml @@ -33,9 +33,9 @@ jobs: run: devtools::check(args = "--no-manual", error_on = "warning", check_dir = "check", force_suggests = FALSE) shell: Rscript {0} - - name: Upload check results - if: failure() - uses: actions/upload-artifact@master - with: - name: results - path: check +# - name: Upload check results +# if: failure() +# uses: actions/upload-artifact@master +# with: +# name: results +# path: check From 824aa596b34690637327d1c9acc6a8f949a96413 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Mon, 30 Jan 2023 19:20:57 -0500 Subject: [PATCH 442/979] dont fail on error --- vignettes/visualization_vignette.Rmd | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/vignettes/visualization_vignette.Rmd b/vignettes/visualization_vignette.Rmd index 08f5118f5..c4476d861 100644 --- a/vignettes/visualization_vignette.Rmd +++ b/vignettes/visualization_vignette.Rmd @@ -29,7 +29,8 @@ knitr::opts_chunk$set( tidy.opts = list(width.cutoff = 95), message = FALSE, warning = FALSE, - time_it = TRUE + time_it = TRUE, + error = TRUE ) options(SeuratData.repo.use = 'satijalab04.nygenome.org') ``` @@ -37,7 +38,7 @@ options(SeuratData.repo.use = 'satijalab04.nygenome.org') We'll demonstrate visualization techniques in Seurat using our previously computed Seurat object from the 2,700 PBMC tutorial. You can download this dataset from [SeuratData](https://github.com/satijalab/seurat-data) ```{r data, eval = FALSE} -SeuratData::InstallData('pbmc3k') +SeuratData::InstallData("pbmc3k") ``` ```{r seed, include=FALSE} @@ -50,7 +51,8 @@ library(SeuratData) library(ggplot2) library(patchwork) data("pbmc3k.final") -pbmc3k.final$groups <- sample(c('group1', 'group2'), size = ncol(pbmc3k.final), replace = TRUE) +pbmc3k.final <- UpdateSeuratObject(pbmc3k.final) +pbmc3k.final$groups <- sample(c("group1", "group2"), size = ncol(pbmc3k.final), replace = TRUE) features <- c("LYZ", "CCL5", "IL32", "PTPRCAP", "FCGR3A", "PF4") pbmc3k.final ``` From 9bc0b5bebceb94d6baba92a412ab039851910676 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Mon, 30 Jan 2023 19:35:25 -0500 Subject: [PATCH 443/979] update GHA checkout version to handle node version deprecation --- .github/workflows/R_CMD_check.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/R_CMD_check.yaml b/.github/workflows/R_CMD_check.yaml index b93d6fef4..8c005ab38 100644 --- a/.github/workflows/R_CMD_check.yaml +++ b/.github/workflows/R_CMD_check.yaml @@ -19,7 +19,7 @@ jobs: runs-on: [self-hosted, satijalab05] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Remove vignettes dir run: rm -rf 'vignettes/' shell: bash From ffbdeff75e560c1479e75f8e8e99f1a92f8d10bd Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Mon, 30 Jan 2023 19:52:30 -0500 Subject: [PATCH 444/979] keep dimplot generation --- vignettes/spatial_vignette.Rmd | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/vignettes/spatial_vignette.Rmd b/vignettes/spatial_vignette.Rmd index 6d082871c..e4116ba36 100644 --- a/vignettes/spatial_vignette.Rmd +++ b/vignettes/spatial_vignette.Rmd @@ -141,7 +141,7 @@ SpatialFeaturePlot(brain, features = c("Hpca", "Ttr")) ``` -```{r save.img, include=FALSE} +```{r save.img, include=TRUE} library(ggplot2) plot <- SpatialFeaturePlot(brain, features = c("Ttr")) + theme(legend.text = element_text(size = 0), legend.title = element_text(size = 20), legend.key.size = unit(1, "cm")) @@ -481,17 +481,18 @@ nUMI <- allen.cortex$nCount_RNA names(nUMI) <- colnames(allen.cortex) reference <- Reference(counts, cluster, nUMI) -counts <- LayerData(cortex, layer = "counts") -coords <- GetTissueCoordinates(cortex) +counts <- LayerData(slide.seq, layer = "counts") +coords <- GetTissueCoordinates(slide.seq) colnames(coords) <- c("x", "y") +coords[is.na(colnames(coords))] <- NULL puck <- SpatialRNA(coords, counts, colSums(counts)) RCTD <- create.RCTD(puck, reference, max_cores = 1) RCTD <- run.RCTD(RCTD, doublet_mode = 'doublet') -cortex <- AddMetaData(cortex, metadata = myRCTD@results$results_df) -p1 <- SpatialDimPlot(cortex, group.by = "first_type") -p2 <- SpatialDimPlot(cortex, group.by = "second_type") +slide.seq <- AddMetaData(slide.seq, metadata = RCTD@results$results_df) +p1 <- SpatialDimPlot(slide.seq, group.by = "first_type") +p2 <- SpatialDimPlot(slide.seq, group.by = "second_type") p1 + p2 ``` From 9969cfb8e60bdc1e37798309ddd6c09731ab2321 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Tue, 31 Jan 2023 09:47:54 -0500 Subject: [PATCH 445/979] dont fail on error --- vignettes/seurat5_atomic_integration.Rmd | 3 ++- vignettes/seurat5_bridge_integration_vignette.Rmd | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/vignettes/seurat5_atomic_integration.Rmd b/vignettes/seurat5_atomic_integration.Rmd index fedce4754..7e7fe7725 100644 --- a/vignettes/seurat5_atomic_integration.Rmd +++ b/vignettes/seurat5_atomic_integration.Rmd @@ -24,7 +24,8 @@ knitr::opts_chunk$set( fig.width = 10, message = FALSE, warning = FALSE, - time_it = TRUE + time_it = TRUE, + error = TRUE ) ``` diff --git a/vignettes/seurat5_bridge_integration_vignette.Rmd b/vignettes/seurat5_bridge_integration_vignette.Rmd index 9561ec0a5..827d168a0 100644 --- a/vignettes/seurat5_bridge_integration_vignette.Rmd +++ b/vignettes/seurat5_bridge_integration_vignette.Rmd @@ -24,7 +24,8 @@ knitr::knit_hooks$set(time_it = local({ knitr::opts_chunk$set( message = FALSE, warning = FALSE, - time_it = TRUE + time_it = TRUE, + error = TRUE ) ``` From 6a8ad921a57798ba8390bbccb668b488b5c4a7b2 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Tue, 31 Jan 2023 10:47:09 -0500 Subject: [PATCH 446/979] bump version --- DESCRIPTION | 4 ++-- .../seurat5_multimodal_reference_mapping.Rmd | 15 ++++++++------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1ab6a1258..e9bcb2d72 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9027 -Date: 2023-01-27 +Version: 4.9.9.9028 +Date: 2023-01-31 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( diff --git a/vignettes/seurat5_multimodal_reference_mapping.Rmd b/vignettes/seurat5_multimodal_reference_mapping.Rmd index 87490972b..19c666f67 100644 --- a/vignettes/seurat5_multimodal_reference_mapping.Rmd +++ b/vignettes/seurat5_multimodal_reference_mapping.Rmd @@ -78,8 +78,7 @@ To demonstrate mapping to this multimodal reference, we will use a dataset of 2, ```{r 3k.load} library(SeuratData) -InstallData('pbmc3k') -pbmc3k <- UpdateSeuratObject(pbmc3k) +pbmc3k <- LoadData('pbmc3k') pbmc3k[['RNA']] <- as(pbmc3k[['RNA']], Class = 'Assay5') ``` @@ -231,14 +230,15 @@ This vignette exhibits the same reference-mapping functionality as the PBMC exam ```{r bmref.seuratdata} # Both datasets are available through SeuratData library(SeuratData) + #load reference data InstallData("bmcite") -bm <- LoadData(ds = "bmcite") -bm <- UpdateSeuratObject(bm) +bm <- LoadData("bmcite") +bm[['RNA']] <- as(bm[['RNA']], Class = 'Assay5') + #load query data InstallData('hcabm40k') -hcabm40k <- LoadData(ds = "hcabm40k") -hcabm40k <- UpdateSeuratObject(hcabm40k) +hcabm40k <- LoadData("hcabm40k") hcabm40k[['RNA']] <- as(hcabm40k[['RNA']], Class = 'Assay5') ``` @@ -301,7 +301,8 @@ Here we will demonstrate mapping multiple donor bone marrow samples to the multi ```{r bm40k.load} library(dplyr) library(SeuratData) -InstallData('hcabm40k') +hcabm40k <- LoadData('hcabm40k') +hcabm40k[['RNA']] <- as(hcabm40k[['RNA']], Class = 'Assay5') hcabm40k.batches <- SplitObject(hcabm40k, split.by = "orig.ident") ``` From 344022b47be2539c4227dce39e5a4cc73eb0a1c6 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Tue, 31 Jan 2023 11:07:11 -0500 Subject: [PATCH 447/979] update separating character in v5 interaction vignette --- vignettes/seurat5_interaction_vignette.Rmd | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vignettes/seurat5_interaction_vignette.Rmd b/vignettes/seurat5_interaction_vignette.Rmd index 7e812f461..5cc03dfd1 100644 --- a/vignettes/seurat5_interaction_vignette.Rmd +++ b/vignettes/seurat5_interaction_vignette.Rmd @@ -120,19 +120,19 @@ head(cluster.averages[['RNA']][, 1:5]) # Return this information as a Seurat object (enables downstream plotting and analysis) # First, replace spaces with underscores '_' so ggplot2 doesn't fail orig.levels <- levels(pbmc) -Idents(pbmc) <- gsub(pattern = ' ', replacement = '_', x = Idents(pbmc)) -orig.levels <- gsub(pattern = ' ', replacement = '_', x = orig.levels) +Idents(pbmc) <- gsub(pattern = ' ', replacement = '-', x = Idents(pbmc)) +orig.levels <- gsub(pattern = ' ', replacement = '-', x = orig.levels) levels(pbmc) <- orig.levels cluster.averages <- AverageExpression(pbmc, return.seurat = TRUE) cluster.averages # How can I plot the average expression of NK cells vs. CD8 T cells? # Pass do.hover = T for an interactive plot to identify gene outliers -CellScatter(cluster.averages, cell1 = "NK", cell2 = "CD8_T") +CellScatter(cluster.averages, cell1 = "NK", cell2 = "CD8-T") # How can I calculate expression averages separately for each replicate? cluster.averages <- AverageExpression(pbmc, return.seurat = TRUE, add.ident = "replicate") -CellScatter(cluster.averages, cell1 = "CD8_T_rep1", cell2 = "CD8_T_rep2") +CellScatter(cluster.averages, cell1 = "CD8-T_rep1", cell2 = "CD8-T_rep2") # You can also plot heatmaps of these 'in silico' bulk datasets to visualize agreement between replicates DoHeatmap(cluster.averages, features = unlist(TopFeatures(pbmc[['pca']], balanced = TRUE)), size = 3, draw.lines = FALSE) From 0551768ccea7c5bd30f8c0bfa96a3c2526277c36 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Tue, 31 Jan 2023 11:33:09 -0500 Subject: [PATCH 448/979] adding support for mean and variance calculations of bp cells --- R/integration.R | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/R/integration.R b/R/integration.R index bfcea9ec5..d5557d98e 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5258,15 +5258,20 @@ if (normalization.method == 'SCT') { if (is.null(x = feature.mean)) { if (inherits(x = reference.data, what = 'dgCMatrix')) { feature.mean <- RowMeanSparse(mat = reference.data) + } else if (inherits(x = reference.data, what = "MatrixSubset")) { + bp.stats <- BPCells::matrix_stats(matrix = reference.data, + row_stats = "variance") + bp.stats <- t(bp.stats$row_stats) + feature.mean <- bp.stats[,"mean"] } else { feature.mean <- rowMeans2(x = reference.data) } if (scale) { - feature.sd <- sqrt( - x = RowVarSparse( - mat = as.sparse(reference.data) - ) - ) + if (inherits(x = reference.data, what = "MatrixSubset")) { + feature.sd <- sqrt(bp.stats[,"variance"]) + } else { + feature.sd <- sqrt(x = RowVarSparse(mat = as.sparse(reference.data))) + } feature.sd[is.na(x = feature.sd)] <- 1 } else { feature.sd <- rep(x = 1, nrow(x = reference.data)) @@ -5340,15 +5345,24 @@ ProjectCellEmbeddings.IterableMatrix <- function( if (is.null(x = feature.mean)) { if (inherits(x = reference.data, what = 'dgCMatrix')) { feature.mean <- RowMeanSparse(mat = reference.data) + } else if (inherits(x = reference.data, what = "MatrixSubset")) { + bp.stats <- BPCells::matrix_stats(matrix = reference.data, + row_stats = "variance") + bp.stats <- t(bp.stats$row_stats) + feature.mean <- bp.stats[,"mean"] } else { feature.mean <- rowMeans(mat = reference.data) } if (scale) { - feature.sd <- sqrt( - x = RowVarSparse( - mat = as.sparse(reference.data) + if (inherits(x = reference.data, what = "MatrixSubset")) { + feature.sd <- sqrt(bp.stats[,"variance"]) + } else { + feature.sd <- sqrt( + x = RowVarSparse( + mat = as.sparse(reference.data) + ) ) - ) + } feature.sd[is.na(x = feature.sd)] <- 1 } else { feature.sd <- rep(x = 1, nrow(x = reference.data)) From 12155f699104d8dd31848bfcf097040e946a3491 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Tue, 31 Jan 2023 12:14:57 -0500 Subject: [PATCH 449/979] fix typo: c->C --- vignettes/seurat5_spatial_vignette.Rmd | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vignettes/seurat5_spatial_vignette.Rmd b/vignettes/seurat5_spatial_vignette.Rmd index d51333879..c0af8b9f1 100644 --- a/vignettes/seurat5_spatial_vignette.Rmd +++ b/vignettes/seurat5_spatial_vignette.Rmd @@ -76,7 +76,7 @@ InstallData("stxBrain") ```{r data} brain <- LoadData('stxBrain', type = 'anterior1') -brain[['Spatial']] <- as(brain[['Spatial']], class = 'Assay5') +brain[['Spatial']] <- as(brain[['Spatial']], Class = 'Assay5') ```
    @@ -324,7 +324,7 @@ This dataset of the mouse brain contains another slice corresponding to the othe ```{r brain2data} brain2 <- LoadData('stxBrain', type = 'posterior1') -brain2[['Spatial']] <- as(brain2[['Spatial']], class = 'Assay5') +brain2[['Spatial']] <- as(brain2[['Spatial']], Class = 'Assay5') brain2 <- SCTransform(brain2, assay = 'Spatial', verbose = FALSE) ``` @@ -377,7 +377,7 @@ InstallData("ssHippo") ```{r data.ss} slide.seq <- LoadData('ssHippo') -slide.seq[['Spatial']] <- as(slide.seq[['Spatial']], class = 'Assay5') +slide.seq[['Spatial']] <- as(slide.seq[['Spatial']], Class = 'Assay5') ``` ## Data preprocessing From 31fce2be3617f12f6a117eaba40849a93b0acd9a Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 31 Jan 2023 12:16:18 -0500 Subject: [PATCH 450/979] fix FetchResidual model features --- R/preprocessing5.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 5eeee2505..58ade27b1 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1933,7 +1933,7 @@ FetchResidualSCTModel <- function(object, replace.value = FALSE, verbose = FALSE) { model.cells <- character() - model.features <- Features(x = object, layer = layer) + model.features <- Features(x = object, assay = assay) if (is.null(x = reference.SCT.model)){ clip.range <- clip.range %||% SCTResults(object = object[[assay]], slot = "clips", model = SCTModel)$sct model.features <- rownames(x = SCTResults(object = object[[assay]], slot = "feature.attributes", model = SCTModel)) From 98b31632533b28dff7c2577e5a17a36427b9f548 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Tue, 31 Jan 2023 12:37:05 -0500 Subject: [PATCH 451/979] IterableMatrix check --- R/integration.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/integration.R b/R/integration.R index d5557d98e..ef758ff9b 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5258,7 +5258,7 @@ if (normalization.method == 'SCT') { if (is.null(x = feature.mean)) { if (inherits(x = reference.data, what = 'dgCMatrix')) { feature.mean <- RowMeanSparse(mat = reference.data) - } else if (inherits(x = reference.data, what = "MatrixSubset")) { + } else if (inherits(x = reference.data, what = "IterableMatrix")) { bp.stats <- BPCells::matrix_stats(matrix = reference.data, row_stats = "variance") bp.stats <- t(bp.stats$row_stats) @@ -5267,7 +5267,7 @@ if (normalization.method == 'SCT') { feature.mean <- rowMeans2(x = reference.data) } if (scale) { - if (inherits(x = reference.data, what = "MatrixSubset")) { + if (inherits(x = reference.data, what = "IterableMatrix")) { feature.sd <- sqrt(bp.stats[,"variance"]) } else { feature.sd <- sqrt(x = RowVarSparse(mat = as.sparse(reference.data))) @@ -5345,7 +5345,7 @@ ProjectCellEmbeddings.IterableMatrix <- function( if (is.null(x = feature.mean)) { if (inherits(x = reference.data, what = 'dgCMatrix')) { feature.mean <- RowMeanSparse(mat = reference.data) - } else if (inherits(x = reference.data, what = "MatrixSubset")) { + } else if (inherits(x = reference.data, what = "IterableMatrix")) { bp.stats <- BPCells::matrix_stats(matrix = reference.data, row_stats = "variance") bp.stats <- t(bp.stats$row_stats) @@ -5354,7 +5354,7 @@ ProjectCellEmbeddings.IterableMatrix <- function( feature.mean <- rowMeans(mat = reference.data) } if (scale) { - if (inherits(x = reference.data, what = "MatrixSubset")) { + if (inherits(x = reference.data, what = "IterableMatrix")) { feature.sd <- sqrt(bp.stats[,"variance"]) } else { feature.sd <- sqrt( From 4ce8717e5c336d5ffe553eb185fc524c12a02a42 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Tue, 31 Jan 2023 12:51:19 -0500 Subject: [PATCH 452/979] stats and returning scaling clip --- R/integration.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/R/integration.R b/R/integration.R index ef758ff9b..ceb53751e 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5261,14 +5261,14 @@ if (normalization.method == 'SCT') { } else if (inherits(x = reference.data, what = "IterableMatrix")) { bp.stats <- BPCells::matrix_stats(matrix = reference.data, row_stats = "variance") - bp.stats <- t(bp.stats$row_stats) - feature.mean <- bp.stats[,"mean"] + bp.stats <- bp.stats$row_stats + feature.mean <- bp.stats["mean",] } else { feature.mean <- rowMeans2(x = reference.data) } if (scale) { if (inherits(x = reference.data, what = "IterableMatrix")) { - feature.sd <- sqrt(bp.stats[,"variance"]) + feature.sd <- sqrt(bp.stats["variance",]) } else { feature.sd <- sqrt(x = RowVarSparse(mat = as.sparse(reference.data))) } @@ -5348,14 +5348,14 @@ ProjectCellEmbeddings.IterableMatrix <- function( } else if (inherits(x = reference.data, what = "IterableMatrix")) { bp.stats <- BPCells::matrix_stats(matrix = reference.data, row_stats = "variance") - bp.stats <- t(bp.stats$row_stats) - feature.mean <- bp.stats[,"mean"] + bp.stats <- bp.stats$row_stats + feature.mean <- bp.stats["mean",] } else { feature.mean <- rowMeans(mat = reference.data) } if (scale) { if (inherits(x = reference.data, what = "IterableMatrix")) { - feature.sd <- sqrt(bp.stats[,"variance"]) + feature.sd <- sqrt(bp.stats["variance",]) } else { feature.sd <- sqrt( x = RowVarSparse( @@ -5370,7 +5370,7 @@ ProjectCellEmbeddings.IterableMatrix <- function( feature.mean[is.na(x = feature.mean)] <- 1 } query.scale <- (query - feature.mean)/feature.sd - #query.scale <- BPCells::min_scalar(mat = query.scale, val = 10) + query.scale <- BPCells::min_scalar(mat = query.scale, val = 10) proj.pca <- t(query.scale) %*% Loadings(object = reference[[reduction]])[features,dims] rownames(proj.pca) <- colnames(query) colnames(proj.pca) <- colnames(Embeddings(object = reference[[reduction]]))[dims] From c31614fdfba05c0c0f646b9ae2cffa3c329f4ef9 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Tue, 31 Jan 2023 16:59:59 -0500 Subject: [PATCH 453/979] new way of clipping --- R/integration.R | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/R/integration.R b/R/integration.R index ceb53751e..576aff9fb 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5261,14 +5261,13 @@ if (normalization.method == 'SCT') { } else if (inherits(x = reference.data, what = "IterableMatrix")) { bp.stats <- BPCells::matrix_stats(matrix = reference.data, row_stats = "variance") - bp.stats <- bp.stats$row_stats - feature.mean <- bp.stats["mean",] + feature.mean <- bp.stats$row_stats["mean",] } else { feature.mean <- rowMeans2(x = reference.data) } if (scale) { if (inherits(x = reference.data, what = "IterableMatrix")) { - feature.sd <- sqrt(bp.stats["variance",]) + feature.sd <- sqrt(bp.stats$row_stats["variance",]) } else { feature.sd <- sqrt(x = RowVarSparse(mat = as.sparse(reference.data))) } @@ -5348,14 +5347,13 @@ ProjectCellEmbeddings.IterableMatrix <- function( } else if (inherits(x = reference.data, what = "IterableMatrix")) { bp.stats <- BPCells::matrix_stats(matrix = reference.data, row_stats = "variance") - bp.stats <- bp.stats$row_stats - feature.mean <- bp.stats["mean",] + feature.mean <- bp.stats$row_stats["mean",] } else { feature.mean <- rowMeans(mat = reference.data) } if (scale) { if (inherits(x = reference.data, what = "IterableMatrix")) { - feature.sd <- sqrt(bp.stats["variance",]) + feature.sd <- sqrt(bp.stats$row_stats["variance",]) } else { feature.sd <- sqrt( x = RowVarSparse( @@ -5369,8 +5367,8 @@ ProjectCellEmbeddings.IterableMatrix <- function( } feature.mean[is.na(x = feature.mean)] <- 1 } - query.scale <- (query - feature.mean)/feature.sd - query.scale <- BPCells::min_scalar(mat = query.scale, val = 10) + query.scale <- BPCells::min_by_row(query, 10*feature.sd + feature.mean) + query.scale <- (query.scale - feature.mean)/feature.sd proj.pca <- t(query.scale) %*% Loadings(object = reference[[reduction]])[features,dims] rownames(proj.pca) <- colnames(query) colnames(proj.pca) <- colnames(Embeddings(object = reference[[reduction]]))[dims] From 9bf3191efed01d9f4b9e4ad80e5ae8d90e3eaf85 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 31 Jan 2023 17:48:50 -0500 Subject: [PATCH 454/979] fix SCT counts missing --- R/preprocessing5.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 58ade27b1..dd43dc3b2 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1600,7 +1600,12 @@ SCTransform.StdAssay <- function( min.cells.to.block = 3000, verbose = verbose ) - assay.out <- CreateSCTAssayObject(scale.data = new.residuals, data = corrected_counts, SCTModel.list = SCTModel.list) + assay.out <- CreateSCTAssayObject( + counts = corrected_counts, + scale.data = new.residuals, + SCTModel.list = SCTModel.list + ) + assay.out$data <- log1p(x = corrected_counts) VariableFeatures(assay.out) <- variable.features # one assay per dataset if (verbose){ From 22f845a2ce6d023a1f188bb44c69068cc1d5b024 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Tue, 31 Jan 2023 18:01:50 -0500 Subject: [PATCH 455/979] parameter fix for JoinLayers --- R/integration.R | 2 +- vignettes/seurat5_integration_introduction.Rmd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/integration.R b/R/integration.R index bfcea9ec5..56ba2ed56 100644 --- a/R/integration.R +++ b/R/integration.R @@ -807,7 +807,7 @@ FindTransferAnchors <- function( if (inherits(x = reference[[reference.assay]], what = 'Assay5')) { if (length(Layers(reference, search = "data")) > 1) { reference[[reference.assay]] <- JoinLayers(reference[[reference.assay]], - search = "data", new = "data") + layers = "data", new = "data") } } if (normalization.method == "SCT") { diff --git a/vignettes/seurat5_integration_introduction.Rmd b/vignettes/seurat5_integration_introduction.Rmd index 41ebc021a..371571880 100644 --- a/vignettes/seurat5_integration_introduction.Rmd +++ b/vignettes/seurat5_integration_introduction.Rmd @@ -132,7 +132,7 @@ To identify canonical cell type marker genes that are conserved across condition # For performing differential expression after integration, we switch back to the original data DefaultAssay(ifnb) <- "RNA" # Join Data Layers across stimualtions -ifnb[['RNA']] <- JoinLayers(ifnb[["RNA"]], search = "data", new = "data") +ifnb[['RNA']] <- JoinLayers(ifnb[["RNA"]], layers = "data", new = "data") nk.markers <- FindConservedMarkers(ifnb, ident.1 = 6, grouping.var = "stim", verbose = FALSE) head(nk.markers) ``` From 455e9719159cd8446bef60c939a0ba080949e9e1 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Tue, 31 Jan 2023 18:02:19 -0500 Subject: [PATCH 456/979] update RCTD --- vignettes/seurat5_spatial_vignette.Rmd | 40 ++++++++++++-------------- vignettes/spatial_vignette.Rmd | 31 +++++++++----------- 2 files changed, 33 insertions(+), 38 deletions(-) diff --git a/vignettes/seurat5_spatial_vignette.Rmd b/vignettes/seurat5_spatial_vignette.Rmd index c0af8b9f1..bb3a3609b 100644 --- a/vignettes/seurat5_spatial_vignette.Rmd +++ b/vignettes/seurat5_spatial_vignette.Rmd @@ -470,33 +470,31 @@ Annotation using RCTD ```{r rctd} library(spacexr) -# allen cortex reference -allen.cortex <- readRDS("/brahms/hartmana/seurat_site_builder/seurat5/seurat-private/data/allen_cortex.rds") -Idents(allen.cortex) <- "subclass" - -# need to drop the cell types which there are <25 of -drop.cells <- WhichCells(allen.cortex, idents = "CR") -allen.cortex <- subset(allen.cortex, cells = drop.cells, invert = TRUE) - -counts <- LayerData(allen.cortex, layer = "counts") -cluster <- as.factor(allen.cortex$subclass) -levels(cluster)[3] <- "L2 3 IT" -names(cluster) <- colnames(allen.cortex) -nUMI <- allen.cortex$nCount_RNA -names(nUMI) <- colnames(allen.cortex) +# set up reference +ref <- readRDS("../data/mouse_hippocampus_reference.rds") +ref <- UpdateSeuratObject(ref) +Idents(ref) <- "celltype" +counts <- LayerData(ref, assay = "RNA", layer = "counts") +cluster <- as.factor(ref$celltype) +names(cluster) <- colnames(ref) +nUMI <- ref$nCount_RNA +names(nUMI) <- colnames(ref) reference <- Reference(counts, cluster, nUMI) -counts <- LayerData(cortex, layer = "counts") -coords <- GetTissueCoordinates(cortex) +# set up query +slide.seq <- LodaData("ssHippo") +counts <- LayerData(slide.seq, assay = "Spatial", layer = "counts") +coords <- GetTissueCoordinates(slide.seq) colnames(coords) <- c("x", "y") -puck <- SpatialRNA(coords, counts, colSums(counts)) +coords[is.na(colnames(coords))] <- NULL +query <- SpatialRNA(coords, counts, colSums(counts)) -RCTD <- create.RCTD(puck, reference, max_cores = 1) +RCTD <- create.RCTD(query, reference, max_cores = 1) RCTD <- run.RCTD(RCTD, doublet_mode = 'doublet') -cortex <- AddMetaData(cortex, metadata = myRCTD@results$results_df) -p1 <- SpatialDimPlot(cortex, group.by = "first_type") -p2 <- SpatialDimPlot(cortex, group.by = "second_type") +slide.seq <- AddMetaData(slide.seq, metadata = RCTD@results$results_df) +p1 <- SpatialDimPlot(slide.seq, group.by = "first_type") +p2 <- SpatialDimPlot(slide.seq, group.by = "second_type") p1 + p2 ``` diff --git a/vignettes/spatial_vignette.Rmd b/vignettes/spatial_vignette.Rmd index e4116ba36..91cfd7bcf 100644 --- a/vignettes/spatial_vignette.Rmd +++ b/vignettes/spatial_vignette.Rmd @@ -465,29 +465,26 @@ Annotation using RCTD ```{r rctd} library(spacexr) -# allen cortex reference -allen.cortex <- readRDS("/brahms/hartmana/seurat_site_builder/seurat5/seurat-private/data/allen_cortex.rds") -Idents(allen.cortex) <- "subclass" - -# need to drop the cell types which there are <25 of -drop.cells <- WhichCells(allen.cortex, idents = "CR") -allen.cortex <- subset(allen.cortex, cells = drop.cells, invert = TRUE) - -counts <- LayerData(allen.cortex, layer = "counts") -cluster <- as.factor(allen.cortex$subclass) -levels(cluster)[3] <- "L2 3 IT" -names(cluster) <- colnames(allen.cortex) -nUMI <- allen.cortex$nCount_RNA -names(nUMI) <- colnames(allen.cortex) +# set up reference +ref <- readRDS("../data/mouse_hippocampus_reference.rds") +ref <- UpdateSeuratObject(ref) +Idents(ref) <- "celltype" +counts <- LayerData(ref, assay = "RNA", layer = "counts") +cluster <- as.factor(ref$celltype) +names(cluster) <- colnames(ref) +nUMI <- ref$nCount_RNA +names(nUMI) <- colnames(ref) reference <- Reference(counts, cluster, nUMI) -counts <- LayerData(slide.seq, layer = "counts") +# set up query +slide.seq <- LodaData("ssHippo") +counts <- LayerData(slide.seq, assay = "Spatial", layer = "counts") coords <- GetTissueCoordinates(slide.seq) colnames(coords) <- c("x", "y") coords[is.na(colnames(coords))] <- NULL -puck <- SpatialRNA(coords, counts, colSums(counts)) +query <- SpatialRNA(coords, counts, colSums(counts)) -RCTD <- create.RCTD(puck, reference, max_cores = 1) +RCTD <- create.RCTD(query, reference, max_cores = 1) RCTD <- run.RCTD(RCTD, doublet_mode = 'doublet') slide.seq <- AddMetaData(slide.seq, metadata = RCTD@results$results_df) From c5e62e9aa62a33e96245562598a13aa71b9c1756 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Tue, 31 Jan 2023 18:06:46 -0500 Subject: [PATCH 457/979] bump version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index e9bcb2d72..2014c5c19 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 4.9.9.9028 +Version: 4.9.9.9029 Date: 2023-01-31 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. From b2db9dcbbb4cccea436f20fa4867287d65aefecd Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Tue, 31 Jan 2023 18:17:34 -0500 Subject: [PATCH 458/979] use relative path --- vignettes/seurat5_multimodal_vignette.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vignettes/seurat5_multimodal_vignette.Rmd b/vignettes/seurat5_multimodal_vignette.Rmd index 1b09a3821..6a086618d 100644 --- a/vignettes/seurat5_multimodal_vignette.Rmd +++ b/vignettes/seurat5_multimodal_vignette.Rmd @@ -52,13 +52,13 @@ library(patchwork) # Load in the RNA UMI matrix # Note that this dataset also contains ~5% of mouse cells, which we can use as negative controls for the protein measurements. For this reason, the gene expression matrix has HUMAN_ or MOUSE_ appended to the beginning of each gene. -cbmc.rna <- as.sparse(read.csv(file = '/brahms/hartmana/seurat_site_builder/seurat5/seurat-private/data/GSE100866_CBMC_8K_13AB_10X-RNA_umi.csv.gz', sep = ',', header = TRUE, row.names = 1)) +cbmc.rna <- as.sparse(read.csv(file = '../data/GSE100866_CBMC_8K_13AB_10X-RNA_umi.csv.gz', sep = ',', header = TRUE, row.names = 1)) # To make life a bit easier going forward, we're going to discard all but the top 100 most highly expressed mouse genes, and remove the "HUMAN_" from the CITE-seq prefix cbmc.rna <- CollapseSpeciesExpressionMatrix(cbmc.rna) # Load in the ADT UMI matrix -cbmc.adt <- as.sparse(read.csv(file = '/brahms/hartmana/seurat_site_builder/seurat5/seurat-private/data/GSE100866_CBMC_8K_13AB_10X-ADT_umi.csv.gz', sep = ',', header = TRUE, row.names = 1)) +cbmc.adt <- as.sparse(read.csv(file = '../data/GSE100866_CBMC_8K_13AB_10X-ADT_umi.csv.gz', sep = ',', header = TRUE, row.names = 1)) # Note that since measurements were made in the same cells, the two matrices have identical column names all.equal(colnames(cbmc.rna),colnames(cbmc.adt)) From a20b86091c2efc3230d6f864969581001852e5ab Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Tue, 31 Jan 2023 18:28:42 -0500 Subject: [PATCH 459/979] rm old large dataset vignette --- vignettes/seurat5_large_dataset_analysis.Rmd | 112 ------------------- 1 file changed, 112 deletions(-) delete mode 100644 vignettes/seurat5_large_dataset_analysis.Rmd diff --git a/vignettes/seurat5_large_dataset_analysis.Rmd b/vignettes/seurat5_large_dataset_analysis.Rmd deleted file mode 100644 index 416fdd020..000000000 --- a/vignettes/seurat5_large_dataset_analysis.Rmd +++ /dev/null @@ -1,112 +0,0 @@ ---- -title: "Seurat 5: Large dataset analysis" -output: html_notebook ---- - -## load package - -```{r, warning=F, message=F} -library(Seurat) -library(BPCells) -``` - -## load matrix -```{r, warning=F, message=F} -time0_loadMatrix <- system.time({ - #mat <- open_matrix_dir('/brahms/haoy/test/pbmc_150k_sparse/') - #meta <- readRDS('/brahms/haoy/seurat5/S5_object/ParseBio_PBMC_meta_100K.rds') - mat <- open_matrix_dir('/brahms/haoy/test/pbmc_ParseBio_sparse//') - meta <- readRDS('/brahms/haoy/seurat5/S5_object/ParseBio_PBMC_meta.rds') -}) -``` - -## sketch object -```{r,warning=F, message=F} -options(Seurat.object.assay.version = "v5", Seurat.object.assay.calcn = T) -time1_normalize <- system.time({ - object <- CreateSeuratObject(counts = mat, meta.data = meta) - object <- NormalizeData(object) -}) - -time2_split.mat <- system.time({ - options(Seurat.object.assay.calcn = FALSE) - object[['RNA']] <- split(object[['RNA']], f = meta$sample) -}) - - -time3_FindVariable <- system.time({ - object <- FindVariableFeatures(object, layer = 'counts') -}) - -time4_LeverageScoreSampling <- system.time({ - object <- LeverageScore(object) - object <- LeverageScoreSampling(object = object, ncells = 5000, cast = 'dgCMatrix') -}) -``` - -## integrate sketched assay -```{r} - -time5_SketchIntegration <- system.time({ - DefaultAssay(object) <- 'sketch' - object <- FindVariableFeatures(object) - features <- SelectIntegrationFeatures5(object) - object <- ScaleData(object, features = features) - object <- RunPCA(object, features = features) - DefaultAssay(object) <- 'sketch' - object <- IntegrateLayers(object, - method = RPCAIntegration, - orig = 'pca', - new.reduction = 'integrated.rpca', - dims = 1:30, - k.anchor = 20, - reference = which(Layers(object, search = 'data') == 'data.H_3060')) -}) -object <- RunUMAP(object, reduction = 'integrated.rpca', dims = 1:30, return.model = T) -plot.s1 <- DimPlot(object, group.by = 'sample', reduction = 'umap') + NoLegend() -plot.s2 <- DimPlot(object, group.by = 'celltype.weight', reduction = 'umap') + NoLegend() - -plot.s1 + plot.s2 - -``` - - -## proporgate embeddings to full data -```{r} -time6_UnSketch <- system.time({ - object <- IntegrateSketchEmbeddings(object = object, - atoms = 'sketch', - orig = 'RNA', - reduction = 'integrated.rpca' , - layers = Layers(object = object[['RNA']], search = 'data'), - features = features ) - -}) - -object <- RunUMAP(object, reduction = 'integrated.rpca.orig', dims = 1:30 , reduction.name = 'umap.orig', reduction.key = 'Uorig_') - -``` - -```{r} -p1<- DimPlot(object, reduction = 'umap.orig', group.by = 'sample',alpha = 0.1) + NoLegend() -p2<- DimPlot(object, reduction = 'umap.orig', group.by = 'celltype.weight', label = T, alpha = 0.1) + NoLegend() -p1+p2 -``` - -## computing time summary -```{r} -all_T <- ls(pattern = 'time') -overall <- sum(sapply(all_T, function(x) round(get(x)['elapsed'], digits = 3)))/60 - - -for (i in 1:length(all_T)) { - time.i <- get(all_T[i])['elapsed'] - if (time.i > 60) { - print(paste(all_T[i], round(time.i/60, digits = 1), 'mins')) - } else { - print(paste(all_T[i], round(time.i, digits = 1), 'secs')) - } -} -print(paste('Total time ', round(overall, digits = 3), 'mins' )) -``` - From ff70e4a03548b1878f919a6e7bd920189b09f435 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Tue, 31 Jan 2023 18:37:10 -0500 Subject: [PATCH 460/979] drop old sketch vignette --- vignettes/seurat5_sketch_integration.Rmd | 131 ----------------------- 1 file changed, 131 deletions(-) delete mode 100644 vignettes/seurat5_sketch_integration.Rmd diff --git a/vignettes/seurat5_sketch_integration.Rmd b/vignettes/seurat5_sketch_integration.Rmd deleted file mode 100644 index c9f8c390c..000000000 --- a/vignettes/seurat5_sketch_integration.Rmd +++ /dev/null @@ -1,131 +0,0 @@ ---- -title: "Seurat 5: Sketch integration" -output: html_notebook ---- - -```{r install_seurat, warning=FALSE, message=FALSE, eval=FALSE} -remotes::install_github("mojaveazure/seurat-object", ref = "feat/CalN_generic") -remotes::install_github("satijalab/seurat-private", ref = "feat/S5_transferAnchors") -``` - -## load library -```{r, warning=FALSE, message=FALSE} -library(Seurat) -library(BPCells) -library(Azimuth) -``` - -## load data from h5ad -```{r, warning=FALSE, message=FALSE} -t0_CreateObject <- system.time({ - mat <- open_matrix_dir("/brahms/haoy/test/mouse_1M_neurons_counts") - options(Seurat.object.assay.version = "v5", Seurat.object.assay.calcn = T) - obj <- CreateSeuratObject(counts = mat ) -}) -``` - -## create sketch assay -```{r, warning=FALSE, message=FALSE} -t1_CreateSketchAssay <- system.time({ - obj <- NormalizeData(obj) - obj <- FindVariableFeatures(obj, layer = 'counts') - obj <- LeverageScore(obj) - obj <- LeverageScoreSampling(object = obj, ncells = 50000, cast = 'dgCMatrix') -}) -``` -## Sketch assay clustering -```{r, warning=FALSE, message=FALSE} -t2_SketchClustering <- system.time({ - obj <- SCTransform(object = obj) - obj <- RunPCA(obj) - obj <- FindNeighbors(obj, dims = 1:30) - obj <- FindClusters(obj, v) -}) - -obj <- RunUMAP(obj, dims = 1:30, return.model = T, verbose = F) -``` - -```{r} -DimPlot(obj, label = T, reduction = 'umap') + NoLegend() -``` - -```{r} -options(Seurat.object.assay.version = "v3", Seurat.object.assay.calcn = T) -obj.v3 <- CreateSeuratObject(counts = as.sparse(obj[['sketch']]$counts) ) - -obj.v3 <- RunAzimuth(query = obj.v3, assay = 'RNA', reference = 'mousecortexref', do.adt = F) - -obj[['refAssay']] <- obj.v3[['refAssay']] -obj$predicted.subclass <- obj.v3$predicted.subclass -obj$predicted.cluster <- obj.v3$predicted.cluster -obj$predicted.subclass_smooth <- Seurat:::SmoothLabels(labels = obj$predicted.subclass , clusters = obj$SCT_snn_res.0.8 ) -``` - -## Project full cells to PCA from sketch assay -```{r, warning=FALSE, message=FALSE} -t3_ProjectEmbedding <- system.time({ - ref.emb <- ProjectCellEmbeddings(query = obj, - reference = obj, - query.assay = 'RNA', - reference.assay = 'SCT', - normalization.method = 'SCT', - reduction = 'pca') -obj[['pca.orig']] <- CreateDimReducObject(embeddings = ref.emb, assay = 'RNA') -DefaultAssay(obj) <- 'RNA' -}) -``` - -## Transfer labels and umap from sketch to full data -```{r, warning=FALSE, message=FALSE} -t4_transferLabel <- system.time({ - obj <- TransferSketchLabels(object = obj, - atoms = 'sketch', - reduction = 'pca.orig', - dims = 1:30, - refdata = list(cluster_full = 'SCT_snn_res.0.8', - subclass_full ='predicted.subclass'), - reduction.model = 'umap' - ) -}) -``` - - -```{r} -library(ggplot2) -DimPlot(obj, label = T, reduction = 'ref.umap', group.by = 'predicted.cluster_full', alpha = 0.1) + NoLegend() -DimPlot(obj, label = T, reduction = 'ref.umap', group.by = 'predicted.subclass_full', alpha = 0.1) + NoLegend() -``` - -```{r} -all_T <- ls(pattern = '^t') -overall <- sum(sapply(all_T, function(x) round(get(x)['elapsed'], digits = 3)))/60 - - -for (i in 1:length(all_T)) { - T_i <- get(all_T[i])['elapsed'] - if (T_i > 60) { - print(paste(all_T[i], round(T_i/60, digits = 1), 'mins')) - } else { - print(paste(all_T[i], round(T_i, digits = 1), 'secs')) - } -} -print(paste('Total time ', round(overall, digits = 3), 'mins' )) -``` - - -```{r} -obj[['pca.nn']] <- Seurat:::NNHelper(data = obj[['pca.orig']]@cell.embeddings[,1:30], - k = 30, - method = "hnsw", - metric = "cosine", - n_threads = 10) -obj <- RunUMAP(obj, nn.name = "pca.nn", reduction.name = 'umap.orig', reduction.key = 'Uo_') - -``` - -```{r} -DimPlot(obj, label = T, reduction = 'umap.orig', group.by = 'predicted.cluster_full', alpha = 0.1) + NoLegend() - DimPlot(obj, label = T, reduction = 'umap.orig', group.by = 'predicted.subclass_full', alpha = 0.1) + NoLegend() - -#saveRDS(obj, file = "/brahms/haoy/test/mouse_1M_neurons_seurat.rds") -``` From 70df32c37becb0d3af52f015739523dd80c61bba Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Tue, 31 Jan 2023 19:14:27 -0500 Subject: [PATCH 461/979] update paths --- vignettes/BPCells_COVID_SCTMapping.Rmd | 6 +- vignettes/BPCells_COVID_logMapping.Rmd | 4 +- .../BPCells_sketch_clustering_mouse_brain.Rmd | 9 +- ...ells_sketch_clustering_mouse_brain_SCT.Rmd | 11 +- vignettes/BPCells_sketch_inte_1M.Rmd | 22 +- vignettes/BPCells_sketch_inte_1M_SCT.Rmd | 21 +- vignettes/atomic_integration.Rmd | 4 +- vignettes/bridge_integration_vignette.Rmd | 10 +- vignettes/multimodal_reference_mapping.Rmd | 2 +- vignettes/seurat5_atomic_integration.Rmd | 4 +- .../seurat5_bridge_integration_vignette.Rmd | 10 +- vignettes/seurat5_hashing_vignette.Rmd | 4 +- .../seurat5_integration_large_datasets.Rmd | 3 - .../seurat5_large_dataset_analysis.nb.html | 423 --------------- .../seurat5_multimodal_reference_mapping.Rmd | 8 +- vignettes/seurat5_sketch_integration.nb.html | 483 ------------------ vignettes/seurat5_spatial_vignette_2.Rmd | 8 +- vignettes/spatial_vignette_2.Rmd | 8 +- 18 files changed, 46 insertions(+), 994 deletions(-) delete mode 100644 vignettes/seurat5_large_dataset_analysis.nb.html delete mode 100644 vignettes/seurat5_sketch_integration.nb.html diff --git a/vignettes/BPCells_COVID_SCTMapping.Rmd b/vignettes/BPCells_COVID_SCTMapping.Rmd index 65bb7c067..6da646d94 100755 --- a/vignettes/BPCells_COVID_SCTMapping.Rmd +++ b/vignettes/BPCells_COVID_SCTMapping.Rmd @@ -38,9 +38,9 @@ library(dplyr) ```{r, warning=F, message=F} time0_loadMatrix <- system.time({ -meta.list <- readRDS('/brahms/haoy/vignette_data/PBMCVignette/PBMC_meta.list') +meta.list <- readRDS('../data/PBMCVignette/PBMC_meta.list') -file.dir <- "/brahms/haoy/vignette_data/PBMCVignette/" +file.dir <- "../data/PBMCVignette/" files.set <- c("arunachalam_2020_processed.BPCells", "combes_2021_processed.BPCells", "lee_2020_processed.BPCells", "wilk_2020_processed.BPCells", "yao_2021_processed.BPCells") input.list <- list() @@ -66,7 +66,7 @@ time1_normalize <- system.time({ ## load reference ```{r} -obj.ref <- readRDS("/brahms/haoy/seurat4_pbmc/pbmc_multimodal_2023.rds") +obj.ref <- readRDS("../data/pbmc_multimodal_2023.rds") ``` diff --git a/vignettes/BPCells_COVID_logMapping.Rmd b/vignettes/BPCells_COVID_logMapping.Rmd index 00df41c52..77b2f660d 100755 --- a/vignettes/BPCells_COVID_logMapping.Rmd +++ b/vignettes/BPCells_COVID_logMapping.Rmd @@ -38,9 +38,9 @@ library(dplyr) ```{r, warning=F, message=F} time0_loadMatrix <- system.time({ -meta.list <- readRDS('/brahms/haoy/vignette_data/PBMCVignette/PBMC_meta.list') +meta.list <- readRDS('../data/PBMCVignette/PBMC_meta.list') -file.dir <- "/brahms/haoy/vignette_data/PBMCVignette/" +file.dir <- "../data/PBMCVignette/" files.set <- c("arunachalam_2020_processed.BPCells", "combes_2021_processed.BPCells", "lee_2020_processed.BPCells", "wilk_2020_processed.BPCells", "yao_2021_processed.BPCells") input.list <- list() diff --git a/vignettes/BPCells_sketch_clustering_mouse_brain.Rmd b/vignettes/BPCells_sketch_clustering_mouse_brain.Rmd index ebfca4f83..fd7013abf 100755 --- a/vignettes/BPCells_sketch_clustering_mouse_brain.Rmd +++ b/vignettes/BPCells_sketch_clustering_mouse_brain.Rmd @@ -36,7 +36,7 @@ library(BPCells) ```{r, warning=FALSE, message=FALSE} t0_CreateObject <- system.time({ -mat <- open_matrix_dir("/brahms/haoy/test/mouse_1M_neurons_counts") +mat <- open_matrix_dir("../data/mouse_1M_neurons_counts") devtools::load_all("~/share/package/MetricPatch/") mat <- ConvertEnsembleToSymbol(mat = mat, species = 'mouse') @@ -177,9 +177,6 @@ obj <- RunUMAP(obj, nn.name = "pca.nn", reduction.name = 'umap.orig', reduction. ``` ```{r} -DimPlot(obj, label = T, reduction = 'umap.orig', group.by = 'predicted.subclass_full', alpha = 0.1) + NoLegend() - -DimPlot(obj, label = T, reduction = 'umap.orig', group.by = 'predicted.cluster_anno_full', alpha = 0.1) + NoLegend() -#saveRDS(obj, file = "/brahms/haoy/test/mouse_1M_neurons_seurat.rds") - +DimPlot(obj, label = T, reduction = 'umap.orig', group.by = 'predicted.subclass_full', alpha = 0.1) + NoLegend() +DimPlot(obj, label = T, reduction = 'umap.orig', group.by = 'predicted.cluster_anno_full', alpha = 0.1) + NoLegend() ``` \ No newline at end of file diff --git a/vignettes/BPCells_sketch_clustering_mouse_brain_SCT.Rmd b/vignettes/BPCells_sketch_clustering_mouse_brain_SCT.Rmd index 5b276c129..6dfdedeb9 100755 --- a/vignettes/BPCells_sketch_clustering_mouse_brain_SCT.Rmd +++ b/vignettes/BPCells_sketch_clustering_mouse_brain_SCT.Rmd @@ -37,7 +37,7 @@ library(Azimuth) ```{r, warning=FALSE, message=FALSE} t0_CreateObject <- system.time({ -mat <- open_matrix_dir("/brahms/haoy/test/mouse_1M_neurons_counts") +mat <- open_matrix_dir("../data/mouse_1M_neurons_counts") # devtools::load_all("~/share/package/MetricPatch/") devtools::load_all("/home/haoy/share/package/MetricPatch/") mat <- ConvertEnsembleToSymbol(mat = mat, species = 'mouse') @@ -160,20 +160,15 @@ print(paste('Total time ', round(overall, digits = 3), 'mins' )) ```{r} - obj[['pca.nn']] <- Seurat:::NNHelper(data = obj[['pca.orig']]@cell.embeddings[,1:50], k = 30, method = "hnsw", metric = "cosine", n_threads = 10) obj <- RunUMAP(obj, nn.name = "pca.nn", reduction.name = 'umap.orig', reduction.key = 'Uo_') - ``` ```{r} -DimPlot(obj, label = T, reduction = 'umap.orig', group.by = 'predicted.cluster_full', alpha = 0.1) + NoLegend() -DimPlot(obj, label = T, reduction = 'umap.orig', group.by = 'predicted.subclass_full', alpha = 0.1) + NoLegend() - -#saveRDS(obj, file = "/brahms/haoy/test/mouse_1M_neurons_seurat_SCT.rds") - +DimPlot(obj, label = T, reduction = 'umap.orig', group.by = 'predicted.cluster_full', alpha = 0.1) + NoLegend() +DimPlot(obj, label = T, reduction = 'umap.orig', group.by = 'predicted.subclass_full', alpha = 0.1) + NoLegend() ``` diff --git a/vignettes/BPCells_sketch_inte_1M.Rmd b/vignettes/BPCells_sketch_inte_1M.Rmd index f4cd57249..2d284b54e 100755 --- a/vignettes/BPCells_sketch_inte_1M.Rmd +++ b/vignettes/BPCells_sketch_inte_1M.Rmd @@ -36,13 +36,10 @@ library(dplyr) ## load matrix ```{r, warning=F, message=F} - + time0_loadMatrix <- system.time({ - #mat <- open_matrix_dir('/brahms/haoy/test/pbmc_150k_sparse/') - #meta <- readRDS('/brahms/haoy/seurat5/S5_object/ParseBio_PBMC_meta_100K.rds') - mat <- open_matrix_dir('/brahms/haoy/test/pbmc_ParseBio_sparse//') - meta <- readRDS('/brahms/haoy/seurat5/S5_object/ParseBio_PBMC_meta.rds') - + mat <- open_matrix_dir('../data/pbmc_ParseBio_sparse//') + meta <- readRDS('../data/ParseBio_PBMC_meta.rds') }) ``` @@ -53,11 +50,6 @@ options(Seurat.object.assay.version = "v5", Seurat.object.assay.calcn = T) time1_normalize <- system.time({ object <- CreateSeuratObject(counts = mat, meta.data = meta) object <- NormalizeData(object, verbose = FALSE) - # object[['RNA']]$data <- write_matrix_dir( - # mat = object[['RNA']]$data, - # dir = '/brahms/haoy/test/pbmc_ParseBio_sparse_data' - # ) - #object[['RNA']]$data <- open_matrix_dir(dir = '/brahms/haoy/test/pbmc_ParseBio_sparse_data') }) @@ -129,11 +121,6 @@ time6_UnSketch <- system.time({ object <- RunUMAP(object, reduction = 'integrated.rpca.orig', dims = 1:30 , reduction.name = 'umap.orig', reduction.key = 'Uorig_') ``` -## save object -```{r} -#time7_saveRDS <- system.time(saveRDS(object, "/brahms/haoy/test/pbmc_ParseBio_seurat.rds")) -``` - ## pseudo-bulk ```{r} time8_bulk <- system.time( bulk <- AverageExpression(object, @@ -168,8 +155,6 @@ for (i in 1:length(all_T)) { print(paste('Total time ', round(overall, digits = 1), 'mins' )) ``` - - ```{r} p1<- DimPlot(object, reduction = 'umap.orig', group.by = 'sample',alpha = 0.1) + NoLegend() p2<- DimPlot(object, reduction = 'umap.orig', group.by = 'celltype.weight', label = T, alpha = 0.1) + NoLegend() @@ -178,5 +163,4 @@ p1+p2 ``` ```{r,fig.height = 20, fig.width = 15} DoHeatmap(bulk, features = top5$gene) + NoLegend() - ``` diff --git a/vignettes/BPCells_sketch_inte_1M_SCT.Rmd b/vignettes/BPCells_sketch_inte_1M_SCT.Rmd index b4c7f5ec7..19d97370a 100755 --- a/vignettes/BPCells_sketch_inte_1M_SCT.Rmd +++ b/vignettes/BPCells_sketch_inte_1M_SCT.Rmd @@ -36,13 +36,9 @@ library(dplyr) ## load matrix ```{r, warning=F, message=F} - -time0_loadMatrix <- system.time({ - #mat <- open_matrix_dir('/brahms/haoy/test/pbmc_150k_sparse/') - #meta <- readRDS('/brahms/haoy/seurat5/S5_object/ParseBio_PBMC_meta_100K.rds') - mat <- open_matrix_dir('/brahms/haoy/test/pbmc_ParseBio_sparse//') - meta <- readRDS('/brahms/haoy/seurat5/S5_object/ParseBio_PBMC_meta.rds') - +time0_loadMatrix <- system.time({ + mat <- open_matrix_dir('../data/pbmc_ParseBio_sparse/') + meta <- readRDS('../data/ParseBio_PBMC_meta.rds') }) ``` @@ -53,11 +49,6 @@ options(Seurat.object.assay.version = "v5", Seurat.object.assay.calcn = T) time1_normalize <- system.time({ object <- CreateSeuratObject(counts = mat, meta.data = meta) object <- NormalizeData(object) - # object[['RNA']]$data <- write_matrix_dir( - # mat = object[['RNA']]$data, - # dir = '/brahms/haoy/test/pbmc_ParseBio_sparse_data' - # ) - #object[['RNA']]$data <- open_matrix_dir(dir = '/brahms/haoy/test/pbmc_ParseBio_sparse_data') }) @@ -130,11 +121,6 @@ time6_UnSketch <- system.time({ object <- RunUMAP(object, reduction = 'integrated.rpca.orig', dims = 1:30 , reduction.name = 'umap.orig', reduction.key = 'Uorig_') ``` -## save object -```{r} -#time7_saveRDS <- system.time(saveRDS(object, "/brahms/haoy/test/pbmc_ParseBio_seurat_SCT.rds")) -``` - ## pseudo-bulk ```{r} time8_bulk <- system.time( bulk <- AverageExpression(object, @@ -184,5 +170,4 @@ p1+p2 + p3 ``` ```{r,fig.height = 20, fig.width = 15} DoHeatmap(bulk, features = top5$gene) + NoLegend() - ``` diff --git a/vignettes/atomic_integration.Rmd b/vignettes/atomic_integration.Rmd index 7e7fe7725..38e695c01 100644 --- a/vignettes/atomic_integration.Rmd +++ b/vignettes/atomic_integration.Rmd @@ -65,7 +65,7 @@ We load each object separately, perform basic preprocessing (normalization and v ```{r init, results='hide', message=FALSE, fig.keep='none'} -file.dir <- '/brahms/haoy/vignette_data/PBMCVignette/' +file.dir <- '../data/PBMCVignette/' files.set <- c("arunachalam_2020_processed.HDF5", "combes_2021_processed.HDF5","lee_2020_processed.HDF5","wilk_2020_processed.HDF5","yao_2021_processed.HDF5") atoms.list <- list() @@ -175,7 +175,7 @@ obj.merge <- RunUMAP(obj.merge, reduction = 'integrated_dr', dims = 1:30) Now we can visualize the results, plotting the scRNA-seq cells based on dataset batches and pre-annotated labels annotations on the UMAP embedding. We also add pre-computed cell annotations to this object (you can download the cell annotation metadata at [this link](https://seurat.nygenome.org/vignette_data/atomic_integration/pbmc_annotations.txt)). ```{r split.dim} -annotation_data <- read.table("/brahms/haoy/vignette_data/PBMCVignette/pbmc_annotations.txt") +annotation_data <- read.table("../data/PBMCVignette/pbmc_annotations.txt") obj.merge <- AddMetaData(obj.merge, metadata = annotation_data) DimPlot(obj.merge, reduction = "umap", group.by = "dataset", shuffle = TRUE, raster = FALSE) DimPlot(obj.merge, reduction = "umap", group.by = "celltype.l2", raster = FALSE) diff --git a/vignettes/bridge_integration_vignette.Rmd b/vignettes/bridge_integration_vignette.Rmd index 827d168a0..bdbdf94a8 100644 --- a/vignettes/bridge_integration_vignette.Rmd +++ b/vignettes/bridge_integration_vignette.Rmd @@ -62,7 +62,7 @@ We start by loading a 10x multiome dataset, consisting of ~12,000 PBMC from a he ```{r} # the 10x hdf5 file contains both data types. -inputdata.10x <- Read10X_h5("/brahms/haoy/vignette_data/pbmc_granulocyte_sorted_10k_filtered_feature_bc_matrix.h5") +inputdata.10x <- Read10X_h5("../data/pbmc_granulocyte_sorted_10k_filtered_feature_bc_matrix.h5") # extract RNA and ATAC data rna_counts <- inputdata.10x$`Gene Expression` atac_counts <- inputdata.10x$Peaks @@ -83,7 +83,7 @@ seqlevelsStyle(annotations) <- 'UCSC' genome(annotations) <- "hg38" # File with ATAC per fragment information file -frag.file <- "/brahms/haoy/vignette_data/pbmc_granulocyte_sorted_10k_atac_fragments.tsv.gz" +frag.file <- "../data/pbmc_granulocyte_sorted_10k_atac_fragments.tsv.gz" # Add in ATAC-seq data as ChromatinAssay object chrom_assay <- CreateChromatinAssay( @@ -123,8 +123,8 @@ We note that it is important to quantify the same set of genomic features in the ```{r, message=FALSE, warning=FALSE} # Load ATAC dataset -atac_pbmc_data <- Read10X_h5(filename = "/brahms/haoy/vignette_data/10k_PBMC_ATAC_nextgem_Chromium_X_filtered_peak_bc_matrix.h5") -fragpath <- "/brahms/haoy/vignette_data/10k_PBMC_ATAC_nextgem_Chromium_X_fragments.tsv.gz" +atac_pbmc_data <- Read10X_h5(filename = "../data/10k_PBMC_ATAC_nextgem_Chromium_X_filtered_peak_bc_matrix.h5") +fragpath <- "../data/10k_PBMC_ATAC_nextgem_Chromium_X_fragments.tsv.gz" # Get gene annotations annotation <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86) @@ -166,7 +166,7 @@ obj.atac <- subset(obj.atac, subset = nCount_ATAC < 7e4 & nCount_ATAC > 2000) We load the reference (download [here](https://atlas.fredhutch.org/data/nygc/multimodal/pbmc_multimodal.h5seurat)) from our recent [paper](https://doi.org/10.1016/j.cell.2021.04.048). This reference is stored as an h5Seurat file, a format that enables on-disk storage of multimodal Seurat objects (more details on h5Seurat and `SeuratDisk` can be found [here](https://mojaveazure.github.io/seurat-disk/index.html)). ```{r pbmc.ref} -obj.rna <- LoadH5Seurat("/brahms/haoy/seurat4_pbmc/pbmc_multimodal.h5seurat") +obj.rna <- LoadH5Seurat("../data/pbmc_multimodal.h5seurat") ```
    **What if I want to use my own reference dataset?** diff --git a/vignettes/multimodal_reference_mapping.Rmd b/vignettes/multimodal_reference_mapping.Rmd index 15f2eb277..6c79330a3 100644 --- a/vignettes/multimodal_reference_mapping.Rmd +++ b/vignettes/multimodal_reference_mapping.Rmd @@ -65,7 +65,7 @@ options(SeuratData.repo.use = "http://satijalab04.nygenome.org") We load the reference (download [here](https://atlas.fredhutch.org/data/nygc/multimodal/pbmc_multimodal.h5seurat)) from our recent [paper](https://doi.org/10.1016/j.cell.2021.04.048), and visualize the pre-computed UMAP. This reference is stored as an h5Seurat file, a format that enables on-disk storage of multimodal Seurat objects (more details on h5Seurat and `SeuratDisk` can be found [here](https://mojaveazure.github.io/seurat-disk/index.html)). ```{r pbmc.ref} -reference <- readRDS("/brahms/haoy/seurat4_pbmc/pbmc_multimodal_2023.rds") +reference <- readRDS("../data/pbmc_multimodal_2023.rds") ``` ```{r ref.dimplot} diff --git a/vignettes/seurat5_atomic_integration.Rmd b/vignettes/seurat5_atomic_integration.Rmd index 7e7fe7725..38e695c01 100644 --- a/vignettes/seurat5_atomic_integration.Rmd +++ b/vignettes/seurat5_atomic_integration.Rmd @@ -65,7 +65,7 @@ We load each object separately, perform basic preprocessing (normalization and v ```{r init, results='hide', message=FALSE, fig.keep='none'} -file.dir <- '/brahms/haoy/vignette_data/PBMCVignette/' +file.dir <- '../data/PBMCVignette/' files.set <- c("arunachalam_2020_processed.HDF5", "combes_2021_processed.HDF5","lee_2020_processed.HDF5","wilk_2020_processed.HDF5","yao_2021_processed.HDF5") atoms.list <- list() @@ -175,7 +175,7 @@ obj.merge <- RunUMAP(obj.merge, reduction = 'integrated_dr', dims = 1:30) Now we can visualize the results, plotting the scRNA-seq cells based on dataset batches and pre-annotated labels annotations on the UMAP embedding. We also add pre-computed cell annotations to this object (you can download the cell annotation metadata at [this link](https://seurat.nygenome.org/vignette_data/atomic_integration/pbmc_annotations.txt)). ```{r split.dim} -annotation_data <- read.table("/brahms/haoy/vignette_data/PBMCVignette/pbmc_annotations.txt") +annotation_data <- read.table("../data/PBMCVignette/pbmc_annotations.txt") obj.merge <- AddMetaData(obj.merge, metadata = annotation_data) DimPlot(obj.merge, reduction = "umap", group.by = "dataset", shuffle = TRUE, raster = FALSE) DimPlot(obj.merge, reduction = "umap", group.by = "celltype.l2", raster = FALSE) diff --git a/vignettes/seurat5_bridge_integration_vignette.Rmd b/vignettes/seurat5_bridge_integration_vignette.Rmd index 827d168a0..bdbdf94a8 100644 --- a/vignettes/seurat5_bridge_integration_vignette.Rmd +++ b/vignettes/seurat5_bridge_integration_vignette.Rmd @@ -62,7 +62,7 @@ We start by loading a 10x multiome dataset, consisting of ~12,000 PBMC from a he ```{r} # the 10x hdf5 file contains both data types. -inputdata.10x <- Read10X_h5("/brahms/haoy/vignette_data/pbmc_granulocyte_sorted_10k_filtered_feature_bc_matrix.h5") +inputdata.10x <- Read10X_h5("../data/pbmc_granulocyte_sorted_10k_filtered_feature_bc_matrix.h5") # extract RNA and ATAC data rna_counts <- inputdata.10x$`Gene Expression` atac_counts <- inputdata.10x$Peaks @@ -83,7 +83,7 @@ seqlevelsStyle(annotations) <- 'UCSC' genome(annotations) <- "hg38" # File with ATAC per fragment information file -frag.file <- "/brahms/haoy/vignette_data/pbmc_granulocyte_sorted_10k_atac_fragments.tsv.gz" +frag.file <- "../data/pbmc_granulocyte_sorted_10k_atac_fragments.tsv.gz" # Add in ATAC-seq data as ChromatinAssay object chrom_assay <- CreateChromatinAssay( @@ -123,8 +123,8 @@ We note that it is important to quantify the same set of genomic features in the ```{r, message=FALSE, warning=FALSE} # Load ATAC dataset -atac_pbmc_data <- Read10X_h5(filename = "/brahms/haoy/vignette_data/10k_PBMC_ATAC_nextgem_Chromium_X_filtered_peak_bc_matrix.h5") -fragpath <- "/brahms/haoy/vignette_data/10k_PBMC_ATAC_nextgem_Chromium_X_fragments.tsv.gz" +atac_pbmc_data <- Read10X_h5(filename = "../data/10k_PBMC_ATAC_nextgem_Chromium_X_filtered_peak_bc_matrix.h5") +fragpath <- "../data/10k_PBMC_ATAC_nextgem_Chromium_X_fragments.tsv.gz" # Get gene annotations annotation <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86) @@ -166,7 +166,7 @@ obj.atac <- subset(obj.atac, subset = nCount_ATAC < 7e4 & nCount_ATAC > 2000) We load the reference (download [here](https://atlas.fredhutch.org/data/nygc/multimodal/pbmc_multimodal.h5seurat)) from our recent [paper](https://doi.org/10.1016/j.cell.2021.04.048). This reference is stored as an h5Seurat file, a format that enables on-disk storage of multimodal Seurat objects (more details on h5Seurat and `SeuratDisk` can be found [here](https://mojaveazure.github.io/seurat-disk/index.html)). ```{r pbmc.ref} -obj.rna <- LoadH5Seurat("/brahms/haoy/seurat4_pbmc/pbmc_multimodal.h5seurat") +obj.rna <- LoadH5Seurat("../data/pbmc_multimodal.h5seurat") ```
    **What if I want to use my own reference dataset?** diff --git a/vignettes/seurat5_hashing_vignette.Rmd b/vignettes/seurat5_hashing_vignette.Rmd index d389067c8..a1cc52d96 100644 --- a/vignettes/seurat5_hashing_vignette.Rmd +++ b/vignettes/seurat5_hashing_vignette.Rmd @@ -224,10 +224,10 @@ library(Seurat) options(Seurat.object.assay.version = "v5") # Read in UMI count matrix for RNA -hto12.umis <- readRDS("/brahms/hartmana/seurat_site_builder/seurat/data/hto12_umi_mtx.rds") +hto12.umis <- readRDS("../data/hto12_umi_mtx.rds") # Read in HTO count matrix -hto12.htos <- readRDS("/brahms/hartmana/seurat_site_builder/seurat/data/hto12_hto_mtx.rds") +hto12.htos <- readRDS("../data/hto12_hto_mtx.rds") # Select cell barcodes detected in both RNA and HTO cells.use <- intersect(rownames(hto12.htos), colnames(hto12.umis)) diff --git a/vignettes/seurat5_integration_large_datasets.Rmd b/vignettes/seurat5_integration_large_datasets.Rmd index 409761cc2..4a9ceb8ee 100644 --- a/vignettes/seurat5_integration_large_datasets.Rmd +++ b/vignettes/seurat5_integration_large_datasets.Rmd @@ -67,9 +67,6 @@ After acquiring the data, we first perform standard normalization and variable f ```{r hca.full.1} bm280k.data <- Read10X_h5("../data/ica_bone_marrow_h5.h5") -#bm280k.data <- Read10X("/brahms/mollag/seurat_v5/data/e7448a34-b33d-41de-b422-4c09bfeba96b.mtx/") -#colnames(bm280k.data) <- make.unique(c(colnames(bm280k.data)), sep="_") -#bm280k.data <- Read10X_h5("../data/ica_bone_marrow_h5.h5") bm280k <- CreateSeuratObject(counts = bm280k.data, min.cells = 100, min.features = 500) bm280k[["RNA"]] <- split(bm280k[["RNA"]], f = bm280k$orig.ident) diff --git a/vignettes/seurat5_large_dataset_analysis.nb.html b/vignettes/seurat5_large_dataset_analysis.nb.html deleted file mode 100644 index dbd453ec2..000000000 --- a/vignettes/seurat5_large_dataset_analysis.nb.html +++ /dev/null @@ -1,423 +0,0 @@ - - - - - - - - - - - - - -Seurat 5: Large dataset analysis - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    - - - - - - - - -
    -

    load package

    - - - -
    library(Seurat)
    - - -
    Loading required package: SeuratObject
    -Loading required package: sp
    -
    -Attaching package: ‘SeuratObject’
    -
    -The following object is masked from ‘package:base’:
    -
    -    intersect
    -
    -Registered S3 method overwritten by 'data.table':
    -  method           from
    -  print.data.table     
    -Registered S3 method overwritten by 'htmlwidgets':
    -  method           from         
    -  print.htmlwidget tools:rstudio
    - - -
    library(BPCells)
    - - - -
    -
    -

    load matrix

    - - - -
    time0_loadMatrix <- system.time({ 
    -  #mat <- open_matrix_dir('/brahms/haoy/test/pbmc_150k_sparse/')
    -  #meta <- readRDS('/brahms/haoy/seurat5/S5_object/ParseBio_PBMC_meta_100K.rds')
    -  mat <- open_matrix_dir('/brahms/haoy/test/pbmc_ParseBio_sparse//')
    -  meta <- readRDS('/brahms/haoy/seurat5/S5_object/ParseBio_PBMC_meta.rds')
    -})
    - - - -
    -
    -

    sketch object

    - - - -
    options(Seurat.object.assay.version = "v5",   Seurat.object.assay.calcn = T)
    -time1_normalize <- system.time({
    -  object <- CreateSeuratObject(counts = mat, meta.data = meta)
    -  object <- NormalizeData(object)
    -})
    - - - -
    -
    -

    integrate sketched assay

    - - - -
    
    -time5_SketchIntegration  <- system.time({
    -  DefaultAssay(object) <- 'sketch'
    -  object <- FindVariableFeatures(object)
    -  features <- SelectIntegrationFeatures5(object)
    -  object <- ScaleData(object, features =  features)
    -  object <- RunPCA(object, features =  features)
    -  DefaultAssay(object) <- 'sketch'
    -  object <- IntegrateLayers(object, 
    -                            method = RPCAIntegration,
    -                            orig = 'pca',
    -                            new.reduction = 'integrated.rpca',
    -                            dims = 1:30, 
    -                            k.anchor = 20,
    -                            reference = which(Layers(object, search = 'data') == 'data.H_3060'))
    -})
    -object <- RunUMAP(object,  reduction = 'integrated.rpca', dims = 1:30, return.model = T)
    -plot.s1 <- DimPlot(object, group.by = 'sample', reduction = 'umap') + NoLegend()
    -plot.s2 <- DimPlot(object, group.by = 'celltype.weight', reduction = 'umap') + NoLegend()
    -
    -plot.s1 + plot.s2
    -
    - - - -
    -
    -

    proporgate embeddings to full data

    - - - -
    time6_UnSketch <- system.time({
    -  object <- IntegrateSketchEmbeddings(object = object,
    -                                       atoms = 'sketch',
    -                                       orig = 'RNA',
    -                                       reduction = 'integrated.rpca' ,
    -                                       layers = Layers(object = object[['RNA']], search = 'data'),
    -                                      features = features  )
    - 
    -})
    - 
    -object <- RunUMAP(object,  reduction = 'integrated.rpca.orig', dims = 1:30 , reduction.name = 'umap.orig', reduction.key = 'Uorig_')
    -
    - - - - - - -
    p1<- DimPlot(object, reduction = 'umap.orig', group.by = 'sample',alpha = 0.1) + NoLegend()
    -p2<- DimPlot(object, reduction = 'umap.orig', group.by = 'celltype.weight', label = T, alpha = 0.1) + NoLegend()
    -p1+p2
    - - - -
    -
    -

    computing time summary

    - - - -
    all_T  <- ls(pattern = 'time')
    -overall <- sum(sapply(all_T, function(x) round(get(x)['elapsed'], digits = 3)))/60
    -
    -
    -for (i in 1:length(all_T)) {
    -  time.i <- get(all_T[i])['elapsed']
    -  if (time.i > 60) {
    -     print(paste(all_T[i], round(time.i/60, digits = 1), 'mins'))
    -  } else {
    -     print(paste(all_T[i], round(time.i, digits = 1), 'secs'))
    -  }
    -}
    -print(paste('Total time ', round(overall, digits = 3), 'mins' ))
    - - - - -
    - -
    LS0tCnRpdGxlOiAiU2V1cmF0IDU6IExhcmdlIGRhdGFzZXQgYW5hbHlzaXMiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCiMjIGxvYWQgcGFja2FnZQoKYGBge3IsIHdhcm5pbmc9RiwgbWVzc2FnZT1GfQpsaWJyYXJ5KFNldXJhdCkKbGlicmFyeShCUENlbGxzKQpgYGAKCiMjIGxvYWQgbWF0cml4CmBgYHtyLCB3YXJuaW5nPUYsIG1lc3NhZ2U9Rn0KdGltZTBfbG9hZE1hdHJpeCA8LSBzeXN0ZW0udGltZSh7IAogICNtYXQgPC0gb3Blbl9tYXRyaXhfZGlyKCcvYnJhaG1zL2hhb3kvdGVzdC9wYm1jXzE1MGtfc3BhcnNlLycpCiAgI21ldGEgPC0gcmVhZFJEUygnL2JyYWhtcy9oYW95L3NldXJhdDUvUzVfb2JqZWN0L1BhcnNlQmlvX1BCTUNfbWV0YV8xMDBLLnJkcycpCiAgbWF0IDwtIG9wZW5fbWF0cml4X2RpcignL2JyYWhtcy9oYW95L3Rlc3QvcGJtY19QYXJzZUJpb19zcGFyc2UvLycpCiAgbWV0YSA8LSByZWFkUkRTKCcvYnJhaG1zL2hhb3kvc2V1cmF0NS9TNV9vYmplY3QvUGFyc2VCaW9fUEJNQ19tZXRhLnJkcycpCn0pCmBgYAoKIyMgc2tldGNoIG9iamVjdApgYGB7cix3YXJuaW5nPUYsIG1lc3NhZ2U9Rn0Kb3B0aW9ucyhTZXVyYXQub2JqZWN0LmFzc2F5LnZlcnNpb24gPSAidjUiLCAgIFNldXJhdC5vYmplY3QuYXNzYXkuY2FsY24gPSBUKQp0aW1lMV9ub3JtYWxpemUgPC0gc3lzdGVtLnRpbWUoewogIG9iamVjdCA8LSBDcmVhdGVTZXVyYXRPYmplY3QoY291bnRzID0gbWF0LCBtZXRhLmRhdGEgPSBtZXRhKQogIG9iamVjdCA8LSBOb3JtYWxpemVEYXRhKG9iamVjdCkKfSkKIAp0aW1lMl9zcGxpdC5tYXQgIDwtIHN5c3RlbS50aW1lKHsKICBvcHRpb25zKFNldXJhdC5vYmplY3QuYXNzYXkuY2FsY24gPSBGQUxTRSkgCiAgb2JqZWN0W1snUk5BJ11dIDwtIHNwbGl0KG9iamVjdFtbJ1JOQSddXSwgZiA9ICBtZXRhJHNhbXBsZSkKfSkKCgp0aW1lM19GaW5kVmFyaWFibGUgIDwtIHN5c3RlbS50aW1lKHsKICBvYmplY3QgPC0gRmluZFZhcmlhYmxlRmVhdHVyZXMob2JqZWN0LCBsYXllciA9ICdjb3VudHMnKQp9KQoKdGltZTRfTGV2ZXJhZ2VTY29yZVNhbXBsaW5nICA8LSBzeXN0ZW0udGltZSh7CiAgb2JqZWN0IDwtIExldmVyYWdlU2NvcmUob2JqZWN0KQogIG9iamVjdCA8LSBMZXZlcmFnZVNjb3JlU2FtcGxpbmcob2JqZWN0ID0gb2JqZWN0LCBuY2VsbHMgPSA1MDAwLCBjYXN0ID0gJ2RnQ01hdHJpeCcpCn0pCmBgYAoKIyMgaW50ZWdyYXRlIHNrZXRjaGVkIGFzc2F5CmBgYHtyfQoKdGltZTVfU2tldGNoSW50ZWdyYXRpb24gIDwtIHN5c3RlbS50aW1lKHsKICBEZWZhdWx0QXNzYXkob2JqZWN0KSA8LSAnc2tldGNoJwogIG9iamVjdCA8LSBGaW5kVmFyaWFibGVGZWF0dXJlcyhvYmplY3QpCiAgZmVhdHVyZXMgPC0gU2VsZWN0SW50ZWdyYXRpb25GZWF0dXJlczUob2JqZWN0KQogIG9iamVjdCA8LSBTY2FsZURhdGEob2JqZWN0LCBmZWF0dXJlcyA9ICBmZWF0dXJlcykKICBvYmplY3QgPC0gUnVuUENBKG9iamVjdCwgZmVhdHVyZXMgPSAgZmVhdHVyZXMpCiAgRGVmYXVsdEFzc2F5KG9iamVjdCkgPC0gJ3NrZXRjaCcKICBvYmplY3QgPC0gSW50ZWdyYXRlTGF5ZXJzKG9iamVjdCwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICBtZXRob2QgPSBSUENBSW50ZWdyYXRpb24sCiAgICAgICAgICAgICAgICAgICAgICAgICAgICBvcmlnID0gJ3BjYScsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICBuZXcucmVkdWN0aW9uID0gJ2ludGVncmF0ZWQucnBjYScsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICBkaW1zID0gMTozMCwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICBrLmFuY2hvciA9IDIwLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgcmVmZXJlbmNlID0gd2hpY2goTGF5ZXJzKG9iamVjdCwgc2VhcmNoID0gJ2RhdGEnKSA9PSAnZGF0YS5IXzMwNjAnKSkKfSkKb2JqZWN0IDwtIFJ1blVNQVAob2JqZWN0LCAgcmVkdWN0aW9uID0gJ2ludGVncmF0ZWQucnBjYScsIGRpbXMgPSAxOjMwLCByZXR1cm4ubW9kZWwgPSBUKQpwbG90LnMxIDwtIERpbVBsb3Qob2JqZWN0LCBncm91cC5ieSA9ICdzYW1wbGUnLCByZWR1Y3Rpb24gPSAndW1hcCcpICsgTm9MZWdlbmQoKQpwbG90LnMyIDwtIERpbVBsb3Qob2JqZWN0LCBncm91cC5ieSA9ICdjZWxsdHlwZS53ZWlnaHQnLCByZWR1Y3Rpb24gPSAndW1hcCcpICsgTm9MZWdlbmQoKQoKcGxvdC5zMSArIHBsb3QuczIKCmBgYAoKCiMjIHByb3BvcmdhdGUgZW1iZWRkaW5ncyB0byBmdWxsIGRhdGEKYGBge3J9CnRpbWU2X1VuU2tldGNoIDwtIHN5c3RlbS50aW1lKHsKICBvYmplY3QgPC0gSW50ZWdyYXRlU2tldGNoRW1iZWRkaW5ncyhvYmplY3QgPSBvYmplY3QsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGF0b21zID0gJ3NrZXRjaCcsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG9yaWcgPSAnUk5BJywKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgcmVkdWN0aW9uID0gJ2ludGVncmF0ZWQucnBjYScgLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBsYXllcnMgPSBMYXllcnMob2JqZWN0ID0gb2JqZWN0W1snUk5BJ11dLCBzZWFyY2ggPSAnZGF0YScpLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGZlYXR1cmVzID0gZmVhdHVyZXMgICkKIAp9KQogCm9iamVjdCA8LSBSdW5VTUFQKG9iamVjdCwgIHJlZHVjdGlvbiA9ICdpbnRlZ3JhdGVkLnJwY2Eub3JpZycsIGRpbXMgPSAxOjMwICwgcmVkdWN0aW9uLm5hbWUgPSAndW1hcC5vcmlnJywgcmVkdWN0aW9uLmtleSA9ICdVb3JpZ18nKQoKYGBgCgpgYGB7cn0KcDE8LSBEaW1QbG90KG9iamVjdCwgcmVkdWN0aW9uID0gJ3VtYXAub3JpZycsIGdyb3VwLmJ5ID0gJ3NhbXBsZScsYWxwaGEgPSAwLjEpICsgTm9MZWdlbmQoKQpwMjwtIERpbVBsb3Qob2JqZWN0LCByZWR1Y3Rpb24gPSAndW1hcC5vcmlnJywgZ3JvdXAuYnkgPSAnY2VsbHR5cGUud2VpZ2h0JywgbGFiZWwgPSBULCBhbHBoYSA9IDAuMSkgKyBOb0xlZ2VuZCgpCnAxK3AyCmBgYAoKIyMgY29tcHV0aW5nIHRpbWUgc3VtbWFyeQpgYGB7cn0KYWxsX1QgIDwtIGxzKHBhdHRlcm4gPSAndGltZScpCm92ZXJhbGwgPC0gc3VtKHNhcHBseShhbGxfVCwgZnVuY3Rpb24oeCkgcm91bmQoZ2V0KHgpWydlbGFwc2VkJ10sIGRpZ2l0cyA9IDMpKSkvNjAKCgpmb3IgKGkgaW4gMTpsZW5ndGgoYWxsX1QpKSB7CiAgdGltZS5pIDwtIGdldChhbGxfVFtpXSlbJ2VsYXBzZWQnXQogIGlmICh0aW1lLmkgPiA2MCkgewogICAgIHByaW50KHBhc3RlKGFsbF9UW2ldLCByb3VuZCh0aW1lLmkvNjAsIGRpZ2l0cyA9IDEpLCAnbWlucycpKQogIH0gZWxzZSB7CiAgICAgcHJpbnQocGFzdGUoYWxsX1RbaV0sIHJvdW5kKHRpbWUuaSwgZGlnaXRzID0gMSksICdzZWNzJykpCiAgfQp9CnByaW50KHBhc3RlKCdUb3RhbCB0aW1lICcsIHJvdW5kKG92ZXJhbGwsIGRpZ2l0cyA9IDMpLCAnbWlucycgKSkKYGBgCgo=
    - - - -
    - - - - - - - - - - - - - - - - diff --git a/vignettes/seurat5_multimodal_reference_mapping.Rmd b/vignettes/seurat5_multimodal_reference_mapping.Rmd index 19c666f67..b2e1c8305 100644 --- a/vignettes/seurat5_multimodal_reference_mapping.Rmd +++ b/vignettes/seurat5_multimodal_reference_mapping.Rmd @@ -65,7 +65,7 @@ options(SeuratData.repo.use = "http://satijalab04.nygenome.org") We load the reference (download [here](https://atlas.fredhutch.org/data/nygc/multimodal/pbmc_multimodal.h5seurat)) from our recent [paper](https://doi.org/10.1016/j.cell.2021.04.048), and visualize the pre-computed UMAP. This reference is stored as an h5Seurat file, a format that enables on-disk storage of multimodal Seurat objects (more details on h5Seurat and `SeuratDisk` can be found [here](https://mojaveazure.github.io/seurat-disk/index.html)). ```{r pbmc.ref} -reference <- readRDS("/brahms/haoy/seurat4_pbmc/pbmc_multimodal_2023.rds") +reference <- readRDS("../data/pbmc_multimodal_2023.rds") ``` ```{r ref.dimplot} @@ -274,7 +274,7 @@ bm <- FindNeighbors( object = bm, reduction = "spca", dims = 1:50, - graph.name = "spca.annoy.neighbors", + graph.name = "spca.annoy.neighbors", k.param = 50, cache.index = TRUE, return.neighbor = TRUE, @@ -323,8 +323,8 @@ for (i in 1:length(hcabm40k.batches)) { reference = bm, query = hcabm40k.batches[[i]], k.filter = NA, - reference.reduction = "spca", - reference.neighbors = "spca.annoy.neighbors", + reference.reduction = "spca", + reference.neighbors = "spca.annoy.neighbors", dims = 1:50 ) } diff --git a/vignettes/seurat5_sketch_integration.nb.html b/vignettes/seurat5_sketch_integration.nb.html deleted file mode 100644 index 04479b6c9..000000000 --- a/vignettes/seurat5_sketch_integration.nb.html +++ /dev/null @@ -1,483 +0,0 @@ - - - - - - - - - - - - - -Seurat 5: Sketch integration - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    - - - - - - - - - - - -
    remotes::install_github("mojaveazure/seurat-object", ref = "feat/CalN_generic")
    -remotes::install_github("satijalab/seurat-private", ref = "feat/S5_transferAnchors")
    - - - -
    -

    load library

    - - - -
    library(Seurat)
    -library(BPCells)
    -library(Azimuth)
    - - -
    Registered S3 method overwritten by 'SeuratDisk':
    -  method            from  
    -  as.sparse.H5Group Seurat
    -Attaching shinyBS
    - - - -
    -
    -

    load data from h5ad

    - - - -
    t0_CreateObject <- system.time({
    -  mat <- open_matrix_dir("/brahms/haoy/test/mouse_1M_neurons_counts") 
    -  options(Seurat.object.assay.version = "v5",  Seurat.object.assay.calcn = T)
    -  obj <- CreateSeuratObject(counts = mat )
    -})
    - - - -
    -
    -

    create sketch assay

    - - - -
    t1_CreateSketchAssay <- system.time({
    -  obj <- NormalizeData(obj)
    -  obj <- FindVariableFeatures(obj, layer = 'counts')
    -  obj <- LeverageScore(obj)
    -  obj <- LeverageScoreSampling(object = obj, ncells = 50000, cast = 'dgCMatrix')
    -})
    - - -
    Normalizing layer: counts
    -Finding variable features for layer counts
    - - -
    debugging in: VariableFeatures(object = object, nfeatures = nselect, simplify = TRUE)
    -debug: {
    -    UseMethod(generic = "VariableFeatures", object = object)
    -}
    - - -
    Q
    - - -
    Timing stopped at: 195.3 12.01 2058
    - - - -
    -
    -

    Sketch assay clustering

    - - - -
    t2_SketchClustering <- system.time({
    -  obj <- SCTransform(object = obj)
    -  obj <- RunPCA(obj)
    -  obj <- FindNeighbors(obj, dims = 1:30)
    -  obj <- FindClusters(obj, v)
    -})
    - - -
    Running SCTransform on assay: RNA
    -Running SCTransform on layer: counts
    -Using block 79 from counts to learn model.
    -Error in dim(x) <- length(x) : 
    -  invalid first argument, must be vector (list or atomic)
    - - -
    Timing stopped at: 2.016 0.048 2.061
    - - - - - - - - - - -
    options(Seurat.object.assay.version = "v3",  Seurat.object.assay.calcn = T)
    -obj.v3 <- CreateSeuratObject(counts =  as.sparse(obj[['sketch']]$counts) )
    -
    -obj.v3 <- RunAzimuth(query = obj.v3, assay = 'RNA', reference = 'mousecortexref', do.adt = F) 
    -
    -obj[['refAssay']] <- obj.v3[['refAssay']]
    -obj$predicted.subclass <- obj.v3$predicted.subclass
    -obj$predicted.cluster <- obj.v3$predicted.cluster
    -obj$predicted.subclass_smooth <- Seurat:::SmoothLabels(labels = obj$predicted.subclass , clusters = obj$SCT_snn_res.0.8  )
    - - - -
    -
    -

    Project full cells to PCA from sketch assay

    - - - -
    t3_ProjectEmbedding <- system.time({
    -  ref.emb <- ProjectCellEmbeddings(query = obj,
    -                                   reference = obj,
    -                                   query.assay = 'RNA', 
    -                                   reference.assay = 'SCT',
    -                                   normalization.method = 'SCT',
    -                                                reduction = 'pca')
    -obj[['pca.orig']] <- CreateDimReducObject(embeddings = ref.emb, assay = 'RNA')
    -DefaultAssay(obj) <- 'RNA'
    -})
    - - - -
    -
    -

    Transfer labels and umap from sketch to full data

    - - - -
    t4_transferLabel <- system.time({
    -  obj <- TransferSketchLabels(object = obj,
    -                            atoms = 'sketch',
    -                            reduction = 'pca.orig',
    -                            dims = 1:30,
    -                            refdata = list(cluster_full = 'SCT_snn_res.0.8',
    -                                           subclass_full ='predicted.subclass'),
    -                            reduction.model = 'umap'
    -                            )
    -})
    - - - - - - -
    library(ggplot2)
    -DimPlot(obj, label = T, reduction = 'ref.umap', group.by = 'predicted.cluster_full', alpha = 0.1) + NoLegend()  
    -DimPlot(obj, label = T, reduction = 'ref.umap', group.by = 'predicted.subclass_full', alpha = 0.1) + NoLegend()  
    - - - - - - -
    all_T  <- ls(pattern = '^t')
    -overall <- sum(sapply(all_T, function(x) round(get(x)['elapsed'], digits = 3)))/60
    -
    -
    -for (i in 1:length(all_T)) {
    -  T_i <- get(all_T[i])['elapsed']
    -  if (T_i > 60) {
    -     print(paste(all_T[i], round(T_i/60, digits = 1), 'mins'))
    -  } else {
    -     print(paste(all_T[i], round(T_i, digits = 1), 'secs'))
    -  }
    -}
    -print(paste('Total time ', round(overall, digits = 3), 'mins' ))
    - - - - - - -
    obj[['pca.nn']] <- Seurat:::NNHelper(data = obj[['pca.orig']]@cell.embeddings[,1:30], 
    -                            k = 30, 
    -                            method = "hnsw", 
    -                            metric = "cosine", 
    -                            n_threads = 10)
    -obj <- RunUMAP(obj, nn.name = "pca.nn", reduction.name = 'umap.orig', reduction.key = 'Uo_')
    -
    - - - - - - -
    DimPlot(obj, label = T, reduction = 'umap.orig', group.by = 'predicted.cluster_full', alpha = 0.1) + NoLegend()  
    -  DimPlot(obj, label = T, reduction = 'umap.orig', group.by = 'predicted.subclass_full', alpha = 0.1) + NoLegend()  
    - 
    -#saveRDS(obj, file = "/brahms/haoy/test/mouse_1M_neurons_seurat.rds")
    - - -
    - -
    LS0tCnRpdGxlOiAiU2V1cmF0IDU6IFNrZXRjaCBpbnRlZ3JhdGlvbiIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3IgaW5zdGFsbF9zZXVyYXQsIHdhcm5pbmc9RkFMU0UsIG1lc3NhZ2U9RkFMU0UsIGV2YWw9RkFMU0V9CnJlbW90ZXM6Omluc3RhbGxfZ2l0aHViKCJtb2phdmVhenVyZS9zZXVyYXQtb2JqZWN0IiwgcmVmID0gImZlYXQvQ2FsTl9nZW5lcmljIikKcmVtb3Rlczo6aW5zdGFsbF9naXRodWIoInNhdGlqYWxhYi9zZXVyYXQtcHJpdmF0ZSIsIHJlZiA9ICJmZWF0L1M1X3RyYW5zZmVyQW5jaG9ycyIpCmBgYAogCiMjIGxvYWQgbGlicmFyeQpgYGB7ciwgd2FybmluZz1GQUxTRSwgbWVzc2FnZT1GQUxTRX0KbGlicmFyeShTZXVyYXQpCmxpYnJhcnkoQlBDZWxscykKbGlicmFyeShBemltdXRoKQpgYGAKIAojIyBsb2FkIGRhdGEgZnJvbSBoNWFkIApgYGB7ciwgd2FybmluZz1GQUxTRSwgbWVzc2FnZT1GQUxTRX0KdDBfQ3JlYXRlT2JqZWN0IDwtIHN5c3RlbS50aW1lKHsKICBtYXQgPC0gb3Blbl9tYXRyaXhfZGlyKCIvYnJhaG1zL2hhb3kvdGVzdC9tb3VzZV8xTV9uZXVyb25zX2NvdW50cyIpIAogIG9wdGlvbnMoU2V1cmF0Lm9iamVjdC5hc3NheS52ZXJzaW9uID0gInY1IiwgIFNldXJhdC5vYmplY3QuYXNzYXkuY2FsY24gPSBUKQogIG9iaiA8LSBDcmVhdGVTZXVyYXRPYmplY3QoY291bnRzID0gbWF0ICkKfSkKYGBgCgojIyBjcmVhdGUgc2tldGNoIGFzc2F5CmBgYHtyLCB3YXJuaW5nPUZBTFNFLCBtZXNzYWdlPUZBTFNFfQp0MV9DcmVhdGVTa2V0Y2hBc3NheSA8LSBzeXN0ZW0udGltZSh7CiAgb2JqIDwtIE5vcm1hbGl6ZURhdGEob2JqKQogIG9iaiA8LSBGaW5kVmFyaWFibGVGZWF0dXJlcyhvYmosIGxheWVyID0gJ2NvdW50cycpCiAgb2JqIDwtIExldmVyYWdlU2NvcmUob2JqKQogIG9iaiA8LSBMZXZlcmFnZVNjb3JlU2FtcGxpbmcob2JqZWN0ID0gb2JqLCBuY2VsbHMgPSA1MDAwMCwgY2FzdCA9ICdkZ0NNYXRyaXgnKQp9KQpgYGAKIyMgU2tldGNoIGFzc2F5IGNsdXN0ZXJpbmcKYGBge3IsIHdhcm5pbmc9RkFMU0UsIG1lc3NhZ2U9RkFMU0V9CnQyX1NrZXRjaENsdXN0ZXJpbmcgPC0gc3lzdGVtLnRpbWUoewogIG9iaiA8LSBTQ1RyYW5zZm9ybShvYmplY3QgPSBvYmopCiAgb2JqIDwtIFJ1blBDQShvYmopCiAgb2JqIDwtIEZpbmROZWlnaGJvcnMob2JqLCBkaW1zID0gMTozMCkKICBvYmogPC0gRmluZENsdXN0ZXJzKG9iaiwgdikKfSkKCm9iaiA8LSBSdW5VTUFQKG9iaiwgZGltcyA9IDE6MzAsIHJldHVybi5tb2RlbCA9IFQsIHZlcmJvc2UgPSBGKQpgYGAKCmBgYHtyfQpEaW1QbG90KG9iaiwgbGFiZWwgPSBULCByZWR1Y3Rpb24gPSAndW1hcCcpICsgTm9MZWdlbmQoKSAKYGBgCgpgYGB7cn0Kb3B0aW9ucyhTZXVyYXQub2JqZWN0LmFzc2F5LnZlcnNpb24gPSAidjMiLCAgU2V1cmF0Lm9iamVjdC5hc3NheS5jYWxjbiA9IFQpCm9iai52MyA8LSBDcmVhdGVTZXVyYXRPYmplY3QoY291bnRzID0gIGFzLnNwYXJzZShvYmpbWydza2V0Y2gnXV0kY291bnRzKSApCgpvYmoudjMgPC0gUnVuQXppbXV0aChxdWVyeSA9IG9iai52MywgYXNzYXkgPSAnUk5BJywgcmVmZXJlbmNlID0gJ21vdXNlY29ydGV4cmVmJywgZG8uYWR0ID0gRikgCgpvYmpbWydyZWZBc3NheSddXSA8LSBvYmoudjNbWydyZWZBc3NheSddXQpvYmokcHJlZGljdGVkLnN1YmNsYXNzIDwtIG9iai52MyRwcmVkaWN0ZWQuc3ViY2xhc3MKb2JqJHByZWRpY3RlZC5jbHVzdGVyIDwtIG9iai52MyRwcmVkaWN0ZWQuY2x1c3RlcgpvYmokcHJlZGljdGVkLnN1YmNsYXNzX3Ntb290aCA8LSBTZXVyYXQ6OjpTbW9vdGhMYWJlbHMobGFiZWxzID0gb2JqJHByZWRpY3RlZC5zdWJjbGFzcyAsIGNsdXN0ZXJzID0gb2JqJFNDVF9zbm5fcmVzLjAuOCAgKQpgYGAKCiMjIFByb2plY3QgZnVsbCBjZWxscyB0byBQQ0EgZnJvbSBza2V0Y2ggYXNzYXkgCmBgYHtyLCB3YXJuaW5nPUZBTFNFLCBtZXNzYWdlPUZBTFNFfQp0M19Qcm9qZWN0RW1iZWRkaW5nIDwtIHN5c3RlbS50aW1lKHsKICByZWYuZW1iIDwtIFByb2plY3RDZWxsRW1iZWRkaW5ncyhxdWVyeSA9IG9iaiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICByZWZlcmVuY2UgPSBvYmosCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgcXVlcnkuYXNzYXkgPSAnUk5BJywgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgcmVmZXJlbmNlLmFzc2F5ID0gJ1NDVCcsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbm9ybWFsaXphdGlvbi5tZXRob2QgPSAnU0NUJywKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgcmVkdWN0aW9uID0gJ3BjYScpCm9ialtbJ3BjYS5vcmlnJ11dIDwtIENyZWF0ZURpbVJlZHVjT2JqZWN0KGVtYmVkZGluZ3MgPSByZWYuZW1iLCBhc3NheSA9ICdSTkEnKQpEZWZhdWx0QXNzYXkob2JqKSA8LSAnUk5BJwp9KQpgYGAgCgojIyBUcmFuc2ZlciBsYWJlbHMgYW5kIHVtYXAgZnJvbSBza2V0Y2ggdG8gZnVsbCBkYXRhCmBgYHtyLCB3YXJuaW5nPUZBTFNFLCBtZXNzYWdlPUZBTFNFfQp0NF90cmFuc2ZlckxhYmVsIDwtIHN5c3RlbS50aW1lKHsKICBvYmogPC0gVHJhbnNmZXJTa2V0Y2hMYWJlbHMob2JqZWN0ID0gb2JqLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgYXRvbXMgPSAnc2tldGNoJywKICAgICAgICAgICAgICAgICAgICAgICAgICAgIHJlZHVjdGlvbiA9ICdwY2Eub3JpZycsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICBkaW1zID0gMTozMCwKICAgICAgICAgICAgICAgICAgICAgICAgICAgIHJlZmRhdGEgPSBsaXN0KGNsdXN0ZXJfZnVsbCA9ICdTQ1Rfc25uX3Jlcy4wLjgnLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgc3ViY2xhc3NfZnVsbCA9J3ByZWRpY3RlZC5zdWJjbGFzcycpLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgcmVkdWN0aW9uLm1vZGVsID0gJ3VtYXAnCiAgICAgICAgICAgICAgICAgICAgICAgICAgICApCn0pCmBgYAoKCmBgYHtyfQpsaWJyYXJ5KGdncGxvdDIpCkRpbVBsb3Qob2JqLCBsYWJlbCA9IFQsIHJlZHVjdGlvbiA9ICdyZWYudW1hcCcsIGdyb3VwLmJ5ID0gJ3ByZWRpY3RlZC5jbHVzdGVyX2Z1bGwnLCBhbHBoYSA9IDAuMSkgKyBOb0xlZ2VuZCgpICAKRGltUGxvdChvYmosIGxhYmVsID0gVCwgcmVkdWN0aW9uID0gJ3JlZi51bWFwJywgZ3JvdXAuYnkgPSAncHJlZGljdGVkLnN1YmNsYXNzX2Z1bGwnLCBhbHBoYSA9IDAuMSkgKyBOb0xlZ2VuZCgpICAKYGBgCgpgYGB7cn0KYWxsX1QgIDwtIGxzKHBhdHRlcm4gPSAnXnQnKQpvdmVyYWxsIDwtIHN1bShzYXBwbHkoYWxsX1QsIGZ1bmN0aW9uKHgpIHJvdW5kKGdldCh4KVsnZWxhcHNlZCddLCBkaWdpdHMgPSAzKSkpLzYwCgoKZm9yIChpIGluIDE6bGVuZ3RoKGFsbF9UKSkgewogIFRfaSA8LSBnZXQoYWxsX1RbaV0pWydlbGFwc2VkJ10KICBpZiAoVF9pID4gNjApIHsKICAgICBwcmludChwYXN0ZShhbGxfVFtpXSwgcm91bmQoVF9pLzYwLCBkaWdpdHMgPSAxKSwgJ21pbnMnKSkKICB9IGVsc2UgewogICAgIHByaW50KHBhc3RlKGFsbF9UW2ldLCByb3VuZChUX2ksIGRpZ2l0cyA9IDEpLCAnc2VjcycpKQogIH0KfQpwcmludChwYXN0ZSgnVG90YWwgdGltZSAnLCByb3VuZChvdmVyYWxsLCBkaWdpdHMgPSAzKSwgJ21pbnMnICkpCmBgYAoKCmBgYHtyfQpvYmpbWydwY2Eubm4nXV0gPC0gU2V1cmF0Ojo6Tk5IZWxwZXIoZGF0YSA9IG9ialtbJ3BjYS5vcmlnJ11dQGNlbGwuZW1iZWRkaW5nc1ssMTozMF0sIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgayA9IDMwLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgIG1ldGhvZCA9ICJobnN3IiwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICBtZXRyaWMgPSAiY29zaW5lIiwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICBuX3RocmVhZHMgPSAxMCkKb2JqIDwtIFJ1blVNQVAob2JqLCBubi5uYW1lID0gInBjYS5ubiIsIHJlZHVjdGlvbi5uYW1lID0gJ3VtYXAub3JpZycsIHJlZHVjdGlvbi5rZXkgPSAnVW9fJykKCmBgYAoKYGBge3J9CkRpbVBsb3Qob2JqLCBsYWJlbCA9IFQsIHJlZHVjdGlvbiA9ICd1bWFwLm9yaWcnLCBncm91cC5ieSA9ICdwcmVkaWN0ZWQuY2x1c3Rlcl9mdWxsJywgYWxwaGEgPSAwLjEpICsgTm9MZWdlbmQoKSAgCiAgRGltUGxvdChvYmosIGxhYmVsID0gVCwgcmVkdWN0aW9uID0gJ3VtYXAub3JpZycsIGdyb3VwLmJ5ID0gJ3ByZWRpY3RlZC5zdWJjbGFzc19mdWxsJywgYWxwaGEgPSAwLjEpICsgTm9MZWdlbmQoKSAgCiAKI3NhdmVSRFMob2JqLCBmaWxlID0gIi9icmFobXMvaGFveS90ZXN0L21vdXNlXzFNX25ldXJvbnNfc2V1cmF0LnJkcyIpCmBgYAo=
    - - - -
    - - - - - - - - - - - - - - - - diff --git a/vignettes/seurat5_spatial_vignette_2.Rmd b/vignettes/seurat5_spatial_vignette_2.Rmd index 35feef372..5c759c60c 100644 --- a/vignettes/seurat5_spatial_vignette_2.Rmd +++ b/vignettes/seurat5_spatial_vignette_2.Rmd @@ -62,7 +62,7 @@ We use the `LoadVizgen()` function, which we have written to read in the output ```{r, message=FALSE, warning=FALSE} # Loading segmentations is a slow process and multi processing with the future pacakge is recommended -vizgen.obj <- LoadVizgen(data.dir = "/brahms/hartmana/spatial_vignette_data/vizgen/s2r1/", fov = "s2r1") +vizgen.obj <- LoadVizgen(data.dir = "../data/vizgen/s2r1/", fov = "s2r1") ``` The next pieces of information are specific to imaging assays, and is stored in the images slot of the resulting Seurat object: @@ -208,12 +208,12 @@ In this vignette, we load one of 8 samples (lung 5, replicate 1). We use the `Lo For this dataset, instead of performing unsupervised analysis, we map the Nanostring profiles to our Azimuth Healthy Human Lung reference, which was defined by scRNA-seq. We used Azimuth version 0.4.3 with the [human lung](https://azimuth.hubmapconsortium.org/references/#Human%20-%20Lung%20v1) reference version 1.0.0. You can download the precomputed results [here](https://seurat.nygenome.org/vignette_data/spatial_vignette_2/nanostring_data.Rds), which include annotations, prediction scores, and a UMAP visualization. The median number of detected transcripts/cell is 249, which does create uncertainty for the annotation process. ```{r load} -nano.obj <- LoadNanostring(data.dir = "/brahms/hartmana/spatial_vignette_data/nanostring/lung5_rep1", fov="lung5.rep1") +nano.obj <- LoadNanostring(data.dir = "../data/nanostring/lung5_rep1", fov="lung5.rep1") ``` ```{r integration} # add in precomputed Azimuth annotations -azimuth.data <- readRDS("/brahms/hartmana/spatial_vignette_data/nanostring_data.Rds") +azimuth.data <- readRDS("../data/nanostring_data.Rds") nano.obj <- AddMetaData(nano.obj, metadata = azimuth.data$annotations) nano.obj[["proj.umap"]] <- azimuth.data$umap Idents(nano.obj) <- nano.obj$predicted.annotation.l1 @@ -295,7 +295,7 @@ First, we load in the data of a HuBMAP dataset using the `LoadAkoya()` function ```{r} codex.obj <- LoadAkoya( - filename = "/brahms/hartmana/spatial_vignette_data/LN7910_20_008_11022020_reg001_compensated.csv", + filename = "../data/LN7910_20_008_11022020_reg001_compensated.csv", type = "processor", fov = "HBM754.WKLP.262" ) diff --git a/vignettes/spatial_vignette_2.Rmd b/vignettes/spatial_vignette_2.Rmd index 940814771..0ff3428e6 100644 --- a/vignettes/spatial_vignette_2.Rmd +++ b/vignettes/spatial_vignette_2.Rmd @@ -61,7 +61,7 @@ We use the `LoadVizgen()` function, which we have written to read in the output ```{r, message=FALSE, warning=FALSE} # Loading segmentations is a slow process and multi processing with the future pacakge is recommended -vizgen.obj <- LoadVizgen(data.dir = "/brahms/hartmana/spatial_vignette_data/vizgen/s2r1/", fov = "s2r1") +vizgen.obj <- LoadVizgen(data.dir = "../data/vizgen/s2r1/", fov = "s2r1") ``` The next pieces of information are specific to imaging assays, and is stored in the images slot of the resulting Seurat object: @@ -207,12 +207,12 @@ In this vignette, we load one of 8 samples (lung 5, replicate 1). We use the `Lo For this dataset, instead of performing unsupervised analysis, we map the Nanostring profiles to our Azimuth Healthy Human Lung reference, which was defined by scRNA-seq. We used Azimuth version 0.4.3 with the [human lung](https://azimuth.hubmapconsortium.org/references/#Human%20-%20Lung%20v1) reference version 1.0.0. You can download the precomputed results [here](https://seurat.nygenome.org/vignette_data/spatial_vignette_2/nanostring_data.Rds), which include annotations, prediction scores, and a UMAP visualization. The median number of detected transcripts/cell is 249, which does create uncertainty for the annotation process. ```{r load} -nano.obj <- LoadNanostring(data.dir = "/brahms/hartmana/spatial_vignette_data/nanostring/lung5_rep1", fov="lung5.rep1") +nano.obj <- LoadNanostring(data.dir = "../data/nanostring/lung5_rep1", fov="lung5.rep1") ``` ```{r integration} # add in precomputed Azimuth annotations -azimuth.data <- readRDS("/brahms/hartmana/spatial_vignette_data/nanostring_data.Rds") +azimuth.data <- readRDS("../data/nanostring_data.Rds") nano.obj <- AddMetaData(nano.obj, metadata = azimuth.data$annotations) nano.obj[["proj.umap"]] <- azimuth.data$umap Idents(nano.obj) <- nano.obj$predicted.annotation.l1 @@ -294,7 +294,7 @@ First, we load in the data of a HuBMAP dataset using the `LoadAkoya()` function ```{r} codex.obj <- LoadAkoya( - filename = "/brahms/hartmana/spatial_vignette_data/LN7910_20_008_11022020_reg001_compensated.csv", + filename = "../data/LN7910_20_008_11022020_reg001_compensated.csv", type = "processor", fov = "HBM754.WKLP.262" ) From 92631d42df3602eb3a1ce1086c9fd90d94d60226 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Wed, 1 Feb 2023 09:05:35 -0500 Subject: [PATCH 462/979] fix object name typo --- vignettes/seurat5_integration_large_datasets.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/seurat5_integration_large_datasets.Rmd b/vignettes/seurat5_integration_large_datasets.Rmd index 4a9ceb8ee..176a4ca14 100644 --- a/vignettes/seurat5_integration_large_datasets.Rmd +++ b/vignettes/seurat5_integration_large_datasets.Rmd @@ -107,7 +107,7 @@ DimPlot(bm280k, group.by = "orig.ident") ```{r save.img, include=TRUE} library(ggplot2) -plot <- DimPlot(bm280k.integrated, group.by = "orig.ident") + xlab("UMAP 1") + ylab("UMAP 2") + +plot <- DimPlot(bm280k, group.by = "orig.ident") + xlab("UMAP 1") + ylab("UMAP 2") + theme(axis.title = element_text(size = 18), legend.text = element_text(size = 18)) + guides(colour = guide_legend(override.aes = list(size = 10))) ggsave(filename = "../output/images/bm280k_integrated.jpg", height = 7, width = 12, plot = plot, quality = 50) From e18de11e0acfaf639252f5a2efdc94834f3bbc95 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Wed, 1 Feb 2023 09:10:25 -0500 Subject: [PATCH 463/979] increase future globals max size --- vignettes/seurat5_integration_rpca.Rmd | 1 + 1 file changed, 1 insertion(+) diff --git a/vignettes/seurat5_integration_rpca.Rmd b/vignettes/seurat5_integration_rpca.Rmd index 20ea5f554..a59463944 100644 --- a/vignettes/seurat5_integration_rpca.Rmd +++ b/vignettes/seurat5_integration_rpca.Rmd @@ -44,6 +44,7 @@ Below, we demonstrate the use of reciprocal PCA to align the same stimulated and ```{r, include=TRUE} options(SeuratData.repo.use = "http://satijalab04.nygenome.org") +options(future.globals.maxSize = 1e9) ``` ```{r installdata} From 39eb994a08e714bbd0c1ecf44437d58ce6013fc7 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Wed, 1 Feb 2023 09:24:24 -0500 Subject: [PATCH 464/979] fix assay5 cast --- vignettes/seurat5_visualization_vignette.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/seurat5_visualization_vignette.Rmd b/vignettes/seurat5_visualization_vignette.Rmd index b49f4539e..f5f2052b4 100644 --- a/vignettes/seurat5_visualization_vignette.Rmd +++ b/vignettes/seurat5_visualization_vignette.Rmd @@ -54,7 +54,7 @@ library(patchwork) data("pbmc3k.final") pbmc3k.final <- UpdateSeuratObject(pbmc3k.final) -pbmc3k.final[["RNA"]] <- as(pbmc3k.final, Class = "Assay5") +pbmc3k.final[["RNA"]] <- as(pbmc3k.final[["RNA"]], Class = "Assay5") pbmc3k.final <- NormalizeData(pbmc3k.final) pbmc3k.final <- FindVariableFeatures(pbmc3k.final) pbmc3k.final <- ScaleData(pbmc3k.final) From e746d5236f70f2f607c27accd06ee92cc0d6ab14 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 1 Feb 2023 17:41:28 -0500 Subject: [PATCH 465/979] fix s5 integration_mapping vig --- vignettes/seurat5_integration_mapping.Rmd | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/vignettes/seurat5_integration_mapping.Rmd b/vignettes/seurat5_integration_mapping.Rmd index 25ab4caf3..1cbeb9983 100644 --- a/vignettes/seurat5_integration_mapping.Rmd +++ b/vignettes/seurat5_integration_mapping.Rmd @@ -75,12 +75,9 @@ Next, we identify anchors using the `FindIntegrationAnchors()` function, which t * We use all default parameters here for identifying anchors, including the 'dimensionality' of the dataset (30; feel free to try varying this parameter over a broad range, for example between 10 and 50). ```{r integration.anchors, warning = FALSE, message = FALSE} -pancreas.ref <- DietSeurat(panc8, layers = c("celseq", "celseq2", "smartseq2")) -pancreas.ref <- as(object = pancreas.ref[["RNA"]], Class = "Assay5") -pancreas.ref <- CreateSeuratObject(pancreas.ref, meta.data = panc8@meta.data) +pancreas.ref <- subset(panc8, subset = tech %in% c("celseq", "celseq2", "smartseq2")) pancreas.ref <- ScaleData(pancreas.ref) pancreas.ref <- RunPCA(pancreas.ref) -#pancreas.anchors <- FindIntegrationAnchors(object.list = reference.list, dims = 1:30) ``` We then pass these anchors to the `IntegrateData()` function, which returns a Seurat object. @@ -90,7 +87,7 @@ We then pass these anchors to the `IntegrateData()` function, which returns a Se ```{r data.integration, warning = FALSE, message = FALSE} pancreas.ref <- IntegrateLayers(object = pancreas.ref, method = CCAIntegration, - verbose = F) + verbose = FALSE) ``` After running `IntegrateData()`, the `Seurat` object will contain a new `Assay` with the integrated expression matrix. Note that the original (uncorrected values) are still stored in the object in the "RNA" assay, so you can switch back and forth. @@ -127,11 +124,7 @@ Seurat also supports the projection of reference data (or meta data) onto a quer After finding anchors, we use the `TransferData()` function to classify the query cells based on reference data (a vector of reference cell type labels). `TransferData()` returns a matrix with predicted IDs and prediction scores, which we can add to the query metadata. ```{r label.transfer, warning = FALSE, message = FALSE} -# do we want a different query and reference object or just have different layers?? -pancreas.query <- DietSeurat(panc8, layers = "fluidigmc1", assays = "RNA", ) -pancreas.query <- as(object = pancreas.query[["RNA"]], Class = "Assay5") -pancreas.query <- CreateSeuratObject(pancreas.query, meta.data = panc8[[]]) - +pancreas.query <- subset(panc8, subset = tech == "fluidigmc1") pancreas.anchors <- FindTransferAnchors(reference = pancreas.ref, query = pancreas.query, dims = 1:30, reference.reduction = "integrated.dr", k.filter = NA) predictions <- TransferData(anchorset = pancreas.anchors, refdata = pancreas.ref$celltype, dims = 1:30) From 602d7f955df204690a6499c3cc8ff50b5ee57fbd Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 1 Feb 2023 20:46:41 -0500 Subject: [PATCH 466/979] PrepSCTFindMarkers for v5 --- R/differential_expression.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/R/differential_expression.R b/R/differential_expression.R index 8497b3a4e..e206d01dd 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -2117,6 +2117,11 @@ PrepSCTFindMarkers <- function(object, assay = "SCT", verbose = TRUE) { paste(umi.assay, collapse = ", ") ) } + umi.layers <- Layers(object = object, assay = umi.assay, search = 'counts') + if (length(x = umi.layers) > 1) { + object[[umi.assay]] <- JoinLayers(object = object[[umi.assay]], + layers = "counts", new = "counts") + } raw_umi <- GetAssayData(object = object, assay = umi.assay, slot = "counts") corrected_counts <- Matrix( nrow = nrow(x = raw_umi), From 7d4dd06554ba86dca1665b5ddace350e4cb7cbfa Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 1 Feb 2023 21:54:53 -0500 Subject: [PATCH 467/979] fix SCT merge --- R/integration.R | 6 ------ R/objects.R | 18 +++++++++++++++--- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/R/integration.R b/R/integration.R index 85a375645..dd6fcd053 100644 --- a/R/integration.R +++ b/R/integration.R @@ -818,12 +818,6 @@ FindTransferAnchors <- function( features = features, verbose = FALSE )) - reference <- ScaleData( - object = reference, - features = features, - do.scale = FALSE, - verbose = FALSE - ) features <- intersect( x = features, y = rownames(reference[[reference.assay]]$scale.data) diff --git a/R/objects.R b/R/objects.R index 5175d0987..9f6def44b 100644 --- a/R/objects.R +++ b/R/objects.R @@ -2180,14 +2180,26 @@ merge.SCTAssay <- function( ... ) { assays <- c(x, y) + if (any(sapply( + X = assays, + FUN = function(assay.i) inherits(x = assay.i, what = "Assay5") + ))) { + return(merge(x = as(x, "Assay5"), y, ...)) + } parent.call <- grep(pattern = "merge.Seurat", x = sys.calls()) if (length(x = parent.call) > 0) { # Try and fill in missing residuals if called in the context of merge.Seurat - all.features <- unique(x = unlist(x = lapply(X = assays, FUN = function(assay) { - if (inherits(x = x, what = "SCTAssay")) { + all.features <- unique( + x = unlist( + x = lapply( + X = assays, + FUN = function(assay) { + if (inherits(x = assay, what = "SCTAssay")) { return(rownames(x = GetAssayData(object = assay, slot = "scale.data"))) } - }))) + }) + ) + ) if (!is.null(all.features)) { assays <- lapply(X = 1:length(x = assays), FUN = function(assay) { if (inherits(x = assays[[assay]], what = "SCTAssay")) { From 32f34b0d99e31d1fe7cf5a080a19e3da0cfc51ef Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 1 Feb 2023 21:57:16 -0500 Subject: [PATCH 468/979] bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2014c5c19..cece3a6dd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9029 -Date: 2023-01-31 +Version: 4.9.9.9030 +Date: 2023-02-01 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From c465379a91ded4492f20fe5a59a0eb4aa0d15861 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Thu, 2 Feb 2023 16:33:07 -0500 Subject: [PATCH 469/979] move ensemble conversion function to azimuth; dont eval RCTD codeblock --- .../BPCells_sketch_clustering_mouse_brain.Rmd | 12 ++++-------- .../BPCells_sketch_clustering_mouse_brain_SCT.Rmd | 14 ++++---------- vignettes/seurat5_spatial_vignette.Rmd | 2 +- vignettes/spatial_vignette.Rmd | 2 +- 4 files changed, 10 insertions(+), 20 deletions(-) diff --git a/vignettes/BPCells_sketch_clustering_mouse_brain.Rmd b/vignettes/BPCells_sketch_clustering_mouse_brain.Rmd index fd7013abf..fcbad751e 100755 --- a/vignettes/BPCells_sketch_clustering_mouse_brain.Rmd +++ b/vignettes/BPCells_sketch_clustering_mouse_brain.Rmd @@ -35,14 +35,10 @@ library(BPCells) ## load data from h5ad ```{r, warning=FALSE, message=FALSE} t0_CreateObject <- system.time({ - -mat <- open_matrix_dir("../data/mouse_1M_neurons_counts") -devtools::load_all("~/share/package/MetricPatch/") -mat <- ConvertEnsembleToSymbol(mat = mat, species = 'mouse') - -options(Seurat.object.assay.version = "v5", Seurat.object.assay.calcn = T) -obj <- CreateSeuratObject(counts = mat ) - + mat <- open_matrix_dir("../data/mouse_1M_neurons_counts") + mat <- Azimuth::ConvertEnsembleToSymbol(mat = mat, species = 'mouse') + options(Seurat.object.assay.version = "v5", Seurat.object.assay.calcn = T) + obj <- CreateSeuratObject(counts = mat) }) ``` diff --git a/vignettes/BPCells_sketch_clustering_mouse_brain_SCT.Rmd b/vignettes/BPCells_sketch_clustering_mouse_brain_SCT.Rmd index 6dfdedeb9..4f75b3735 100755 --- a/vignettes/BPCells_sketch_clustering_mouse_brain_SCT.Rmd +++ b/vignettes/BPCells_sketch_clustering_mouse_brain_SCT.Rmd @@ -36,16 +36,10 @@ library(Azimuth) ## load data from h5ad ```{r, warning=FALSE, message=FALSE} t0_CreateObject <- system.time({ - -mat <- open_matrix_dir("../data/mouse_1M_neurons_counts") -# devtools::load_all("~/share/package/MetricPatch/") -devtools::load_all("/home/haoy/share/package/MetricPatch/") -mat <- ConvertEnsembleToSymbol(mat = mat, species = 'mouse') - - -options(Seurat.object.assay.version = "v5", Seurat.object.assay.calcn = T) -obj <- CreateSeuratObject(counts = mat ) - + mat <- open_matrix_dir("../data/mouse_1M_neurons_counts") + mat <- Azimuth::ConvertEnsembleToSymbol(mat = mat, species = 'mouse') + options(Seurat.object.assay.version = "v5", Seurat.object.assay.calcn = T) + obj <- CreateSeuratObject(counts = mat) }) ``` diff --git a/vignettes/seurat5_spatial_vignette.Rmd b/vignettes/seurat5_spatial_vignette.Rmd index bb3a3609b..72d4dfd8a 100644 --- a/vignettes/seurat5_spatial_vignette.Rmd +++ b/vignettes/seurat5_spatial_vignette.Rmd @@ -467,7 +467,7 @@ devtools::install_github("dmcable/spacexr", build_vignettes = FALSE) Annotation using RCTD -```{r rctd} +```{r rctd, eval=FALSE} library(spacexr) # set up reference diff --git a/vignettes/spatial_vignette.Rmd b/vignettes/spatial_vignette.Rmd index 91cfd7bcf..d05245568 100644 --- a/vignettes/spatial_vignette.Rmd +++ b/vignettes/spatial_vignette.Rmd @@ -462,7 +462,7 @@ devtools::install_github("dmcable/spacexr", build_vignettes = FALSE) Annotation using RCTD -```{r rctd} +```{r rctd, eval=FALSE} library(spacexr) # set up reference From 7118124c39dd9c5fd5eb60d0b1207e17e743d279 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Thu, 2 Feb 2023 17:52:47 -0500 Subject: [PATCH 470/979] add fake annotations in BPCell vignettes; fix code which isnt evald --- vignettes/BPCells_sketch_clustering_mouse_brain.Rmd | 13 +++---------- .../BPCells_sketch_clustering_mouse_brain_SCT.Rmd | 12 +++--------- vignettes/seurat5_integration_mapping.Rmd | 8 ++++---- 3 files changed, 10 insertions(+), 23 deletions(-) diff --git a/vignettes/BPCells_sketch_clustering_mouse_brain.Rmd b/vignettes/BPCells_sketch_clustering_mouse_brain.Rmd index fcbad751e..705699972 100755 --- a/vignettes/BPCells_sketch_clustering_mouse_brain.Rmd +++ b/vignettes/BPCells_sketch_clustering_mouse_brain.Rmd @@ -72,16 +72,9 @@ DimPlot(obj, label = T, reduction = 'umap') + NoLegend() ``` ## Azimuth mapping to annotate clusters ```{r, warning=F, message=F} - - -options(Seurat.object.assay.version = "v3", Seurat.object.assay.calcn = T) -obj.v3 <- CreateSeuratObject(counts = as.sparse(obj[['RNA']]$counts[,colnames(obj[['sketch']])]) ) -library(Azimuth) -obj.v3 <- RunAzimuth(query = obj.v3, assay = 'RNA', reference = 'mousecortexref', do.adt = F) - -obj$predicted.subclass <- obj.v3$predicted.subclass -obj$predicted.cluster <- obj.v3$predicted.cluster - +# add some fake annotations to the object +obj$predicted.subclass <- sample(c("celltype1", "celltype2"), length(Cells(obj)), replace = TRUE) +obj$predicted.cluster <- sample(c("celltype1", "celltype2", "celltype3", "celltype4"), length(Cells(obj)), replace = TRUE) ``` diff --git a/vignettes/BPCells_sketch_clustering_mouse_brain_SCT.Rmd b/vignettes/BPCells_sketch_clustering_mouse_brain_SCT.Rmd index 4f75b3735..ca1984a0a 100755 --- a/vignettes/BPCells_sketch_clustering_mouse_brain_SCT.Rmd +++ b/vignettes/BPCells_sketch_clustering_mouse_brain_SCT.Rmd @@ -72,15 +72,9 @@ obj <- RunUMAP(obj, dims = 1:50, return.model = T, verbose = F) DimPlot(obj, label = T, reduction = 'umap') + NoLegend() ``` ```{r} - -options(Seurat.object.assay.version = "v3", Seurat.object.assay.calcn = T) -obj.v3 <- CreateSeuratObject(counts = as.sparse(obj[['sketch']]$counts) ) -obj.v3 <- RunAzimuth(query = obj.v3, assay = 'RNA', reference = 'mousecortexref', do.adt = F) - -obj$predicted.subclass <- obj.v3$predicted.subclass -obj$predicted.cluster <- obj.v3$predicted.cluster - - +# add some fake annotations to the object +obj$predicted.subclass <- sample(c("celltype1", "celltype2"), length(Cells(obj)), replace = TRUE) +obj$predicted.cluster <- sample(c("celltype1", "celltype2", "celltype3", "celltype4"), length(Cells(obj)), replace = TRUE) ``` ```{r,fig.height = 20, fig.width = 15} features.set <- c('Aqp4', 'Sox10', 'Slc17a7', 'Aif1', 'Foxj1', 'Pax6', 'Slc17a6', 'Lum', 'Nanog', 'Gad2', 'Foxj1', 'Cldn5','Alas2') diff --git a/vignettes/seurat5_integration_mapping.Rmd b/vignettes/seurat5_integration_mapping.Rmd index 1cbeb9983..d7bea6a73 100644 --- a/vignettes/seurat5_integration_mapping.Rmd +++ b/vignettes/seurat5_integration_mapping.Rmd @@ -169,21 +169,21 @@ pancreas.query <- MapQuery( ```{r, eval=FALSE} pancreas.query <- TransferData( anchorset = pancreas.anchors, - reference = panc8, + reference = pancreas.ref, query = pancreas.query, refdata = list(celltype = "celltype") ) pancreas.query <- IntegrateEmbeddings( anchorset = pancreas.anchors, - reference = panc8, + reference = pancreas.ref, query = pancreas.query, new.reduction.name = "ref.pca" ) pancreas.query <- ProjectUMAP( query = pancreas.query, query.reduction = "ref.pca", - reference = panc8, - reference.reduction = "pca", + reference = pancreas.ref, + reference.reduction = "integrated.dr", reduction.model = "umap" ) ``` From 17d6e46a2d508694da2dff4e7be1e673a8285c4d Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sat, 4 Feb 2023 23:26:48 -0500 Subject: [PATCH 471/979] only generate one leverage score column --- R/sketching.R | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/R/sketching.R b/R/sketching.R index 6e932695f..1a32def89 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -307,11 +307,11 @@ LeverageScore.DelayedMatrix <- function( #' @method LeverageScore StdAssay +#' #' @export #' LeverageScore.StdAssay <- function( object, - # features = NULL, nsketch = 5000L, ndims = NULL, method = CountSketch, @@ -329,13 +329,13 @@ LeverageScore.StdAssay <- function( } scores <- SeuratObject:::EmptyDF(n = ncol(x = object)) row.names(x = scores) <- colnames(x = object) - scores[, layer] <- NA_real_ + scores[, 1] <- NA_real_ for (i in seq_along(along.with = layer)) { l <- layer[i] if (isTRUE(x = verbose)) { message("Running LeverageScore for layer ", l) } - scores[Cells(x = object, layer = l), l] <- LeverageScore( + scores[Cells(x = object, layer = l), 1] <- LeverageScore( object = LayerData( object = object, layer = l, @@ -355,7 +355,6 @@ LeverageScore.StdAssay <- function( ... ) } - names(x = scores) <- paste0('leverage_score_', names(x = scores)) return(scores) } @@ -370,9 +369,10 @@ LeverageScore.Assay <- LeverageScore.StdAssay LeverageScore.Seurat <- function( object, assay = NULL, - # features = NULL, nsketch = 5000L, ndims = NULL, + var.name = 'leverage.score', + over.write = FALSE, method = CountSketch, vf.method = NULL, layer = 'data', @@ -381,12 +381,14 @@ LeverageScore.Seurat <- function( verbose = TRUE, ... ) { + if (!over.write) { + var.name <- CheckMetaVarName(object = object, var.name = var.name) + } assay <- assay[1L] %||% DefaultAssay(object = object) assay <- match.arg(arg = assay, choices = Assays(object = object)) method <- enquo(arg = method) scores <- LeverageScore( object = object[[assay]], - # features = features, nsketch = nsketch, ndims = ndims, method = method, @@ -397,7 +399,7 @@ LeverageScore.Seurat <- function( verbose = verbose, ... ) - names(x = scores) <- paste0("seurat_", names(x = scores)) + names(x = scores) <- var.name object[[]] <- scores return(object) } From 92ae45685d6d5cdfa3aa51fe019ca9b62d82eb0e Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sat, 4 Feb 2023 23:54:16 -0500 Subject: [PATCH 472/979] rewrite leverage score sampling --- R/sketching.R | 46 +++++++++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/R/sketching.R b/R/sketching.R index 1a32def89..44104ee42 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -81,15 +81,17 @@ LeverageScoreSampling <- function( assay = NULL, ncells = 5000L, save = 'sketch', + var.name = "leverage.score", + over.write = FALSE, default = TRUE, - seed = NA_integer_, + seed = 123L, cast = NULL, layers = c('counts', 'data'), + verbose = TRUE, ... ) { assay <- assay[1L] %||% DefaultAssay(object = object) assay <- match.arg(arg = assay, choices = Assays(object = object)) - # TODO: fix this in [[<-,Seurat5 if (save == assay) { abort(message = "Cannot overwrite existing assays") } @@ -99,39 +101,45 @@ LeverageScoreSampling <- function( } object[[save]] <- NULL } - vars <- grep(pattern = "^seurat_leverage_score_", x = names(x = object[[]]), - value = TRUE) - names(x = vars) <- vars - vars <- gsub(pattern = "^seurat_leverage_score_", replacement = "", - x = vars) - vars <- vars[vars %in% Layers(object = object[[assay]])] - if (!length(x = vars)) { - stop("No leverage scores found for assay ", assay, call. = FALSE) + if (!over.write) { + var.name <- CheckMetaVarName(object = object, var.name = var.name) } + if (verbose) { + message("Calcuating Leverage Score") + } + object <- LeverageScore( + object = object, + assay = assay, + var.name = var.name, + over.write = over.write, + seed = seed, + verbose = verbose, + ... + ) + leverage.score <- object[[var.name]] + layers.data <- Layers(object = object[[assay]], search = 'data') cells <- lapply( - X = seq_along(along.with = vars), + X = seq_along(along.with = layers.data), FUN = function(i, seed) { - if (!is.na(x = seed)) { - set.seed(seed = seed) - } - lcells <- Cells(x = object[[assay]], layer = vars[i]) + set.seed(seed = seed) + lcells <- Cells(x = object[[assay]], layer = layers.data[i]) if (length(x = lcells) < ncells) { return(lcells) } return(sample( x = lcells, - size = min(ncells), - prob = object[[names(x = vars)[i], drop = TRUE, na.rm = TRUE]] + size = ncells, + prob = leverage.score[lcells,] )) }, seed = seed ) sketched <- suppressWarnings(expr = subset( x = object[[assay]], - cells = Reduce(f = union, x = cells), + cells = unlist(cells), layers = Layers(object = object[[assay]], search = layers) )) - for (lyr in vars) { + for (lyr in layers.data) { try( expr = VariableFeatures(object = sketched, method = "sketch", layer = lyr) <- VariableFeatures(object = object[[assay]], layer = lyr), From 6292bcd42d57e2217381c697fe19ec0fbff50173 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Mon, 6 Feb 2023 14:17:29 -0500 Subject: [PATCH 473/979] make data slot if it doesnt exist in query --- R/integration.R | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/R/integration.R b/R/integration.R index dd6fcd053..3a8a106e2 100644 --- a/R/integration.R +++ b/R/integration.R @@ -833,6 +833,19 @@ FindTransferAnchors <- function( )) } } + # Make data slot if DNE + if (inherits(x = query[[query.assay]], what = "Assay5")){ + if (is.null( + tryCatch(expr = slot(object = query[[query.assay]], + name = "data"), + error = function (e) return(NULL)) + ) + ) { + LayerData(object = query[[query.assay]], layer = "data") <- matrix(, + nrow = nrow(query[[query.assay]]), + ncol = ncol(query[[query.assay]])) + } + } # Rename query assay w same name as reference assay if (query.assay != reference.assay) { suppressWarnings(expr = query <- RenameAssays(query, assay.name = query.assay, new.assay.name = reference.assay)) From 8029591af06cb1828197d21b348d72b4c4cc4e94 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Mon, 6 Feb 2023 16:13:07 -0500 Subject: [PATCH 474/979] format --- R/integration.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/integration.R b/R/integration.R index 3a8a106e2..f9cd9ec66 100644 --- a/R/integration.R +++ b/R/integration.R @@ -842,8 +842,9 @@ FindTransferAnchors <- function( ) ) { LayerData(object = query[[query.assay]], layer = "data") <- matrix(, - nrow = nrow(query[[query.assay]]), - ncol = ncol(query[[query.assay]])) + nrow = nrow(query[[query.assay]]), + ncol = ncol(query[[query.assay]]) + ) } } # Rename query assay w same name as reference assay From b8cf02c3fb5227e4c1370e66e95132f4bb42b2b7 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Mon, 6 Feb 2023 18:11:55 -0500 Subject: [PATCH 475/979] moving to validation --- R/integration.R | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/R/integration.R b/R/integration.R index f9cd9ec66..7eb61dcaf 100644 --- a/R/integration.R +++ b/R/integration.R @@ -833,20 +833,6 @@ FindTransferAnchors <- function( )) } } - # Make data slot if DNE - if (inherits(x = query[[query.assay]], what = "Assay5")){ - if (is.null( - tryCatch(expr = slot(object = query[[query.assay]], - name = "data"), - error = function (e) return(NULL)) - ) - ) { - LayerData(object = query[[query.assay]], layer = "data") <- matrix(, - nrow = nrow(query[[query.assay]]), - ncol = ncol(query[[query.assay]]) - ) - } - } # Rename query assay w same name as reference assay if (query.assay != reference.assay) { suppressWarnings(expr = query <- RenameAssays(query, assay.name = query.assay, new.assay.name = reference.assay)) @@ -6051,6 +6037,21 @@ ValidateParams_FindTransferAnchors <- function( "SCTransform. Please either run SCTransform or set normalization.method = 'LogNormalize'.", call. = FALSE) } + # Make data slot if DNE + if (inherits(x = query[[query.assay]], what = "Assay5")){ + if (is.null( + tryCatch(expr = slot(object = query[[query.assay]], + name = "data"), + error = function (e) return(NULL)) + ) + ) { + LayerData(object = query[[query.assay]], layer = "data") <- matrix(, + nrow = nrow(query[[query.assay]]), + ncol = ncol(query[[query.assay]]) + ) + ModifyParam(param = "query", value = query) + } + } # features must be in both reference and query query.assay.check <- query.assay reference.assay.check <- reference.assay From 76ca983a13580e211a96ac7435cade845cd80237 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Mon, 6 Feb 2023 18:13:20 -0500 Subject: [PATCH 476/979] bump version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index cece3a6dd..e2dc34c95 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 4.9.9.9030 +Version: 4.9.9.9031 Date: 2023-02-01 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. From 65b2455f3f7c12da701d275a8a72cb37b7f812a2 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 7 Feb 2023 11:01:00 -0500 Subject: [PATCH 477/979] replace NA matrix to sparse matrix --- R/integration.R | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/R/integration.R b/R/integration.R index 7eb61dcaf..4826f9b31 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6045,10 +6045,16 @@ ValidateParams_FindTransferAnchors <- function( error = function (e) return(NULL)) ) ) { - LayerData(object = query[[query.assay]], layer = "data") <- matrix(, - nrow = nrow(query[[query.assay]]), - ncol = ncol(query[[query.assay]]) - ) + LayerData( + object = query[[query.assay]], + layer = "data") <- sparseMatrix( + i = 1, + j = 1, + x = 1, + dims = c(nrow(x = query[[query.assay]]), + ncol(x = query[[query.assay]]) + ) + ) ModifyParam(param = "query", value = query) } } From c1a227d75b8c9bcf1452056a7980370e26ef88be Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 7 Feb 2023 11:15:49 -0500 Subject: [PATCH 478/979] change print message and warning --- R/integration.R | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/R/integration.R b/R/integration.R index 4826f9b31..4966ce65e 100644 --- a/R/integration.R +++ b/R/integration.R @@ -835,7 +835,12 @@ FindTransferAnchors <- function( } # Rename query assay w same name as reference assay if (query.assay != reference.assay) { - suppressWarnings(expr = query <- RenameAssays(query, assay.name = query.assay, new.assay.name = reference.assay)) + suppressWarnings(expr = query <- RenameAssays( + object = query, + assay.name = query.assay, + new.assay.name = reference.assay, + verbose = FALSE + )) DefaultAssay(query) <- reference.assay } # only keep necessary info from objects @@ -854,12 +859,14 @@ FindTransferAnchors <- function( warnings("reference assay is diffrent from the assay.used in", reference.reduction) slot(object = reference[[reference.reduction]], name = "assay.used") <- reference.assay } - reference <- DietSeurat( - object = reference, - assays = reference.assay, - dimreducs = reference.reduction, - features = features, - scale.data = TRUE + suppressWarnings( + reference <- DietSeurat( + object = reference, + assays = reference.assay, + dimreducs = reference.reduction, + features = features, + scale.data = TRUE + ) ) # append query and reference to cell names - mainly to avoid name conflicts query <- RenameCells( From 50f979bb2d77fc3b4cd6f06d0efee20e3240329f Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 7 Feb 2023 11:35:17 -0500 Subject: [PATCH 479/979] fix VariableFeatures.SCTAssay --- R/objects.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/objects.R b/R/objects.R index 9f6def44b..2862dd0fb 100644 --- a/R/objects.R +++ b/R/objects.R @@ -1934,12 +1934,12 @@ VariableFeatures.SCTAssay <- function( use.var.features = TRUE, ... ) { - nfeatures <- nfeatures %||% 3000 + # Is the information already in var.features? + var.features.existing <- slot(object = object, name = "var.features") + nfeatures <- nfeatures %||% length(x = var.features.existing) %||% 3000 if (is.null(x = layer)) { layer <- levels(x = object) } - # Is the information already in var.features? - var.features.existing <- object@var.features if (simplify == TRUE & use.var.features == TRUE & length(var.features.existing)>=nfeatures){ return (head(x = var.features.existing, n = nfeatures)) } From ed29a32978ff5e0e0aeb6beccbb5b76809f294a1 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Tue, 7 Feb 2023 17:49:05 -0500 Subject: [PATCH 480/979] Default to NULL nfeatures in VariableFeatures for SCTAssay --- R/objects.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/objects.R b/R/objects.R index 2862dd0fb..5a396d98d 100644 --- a/R/objects.R +++ b/R/objects.R @@ -1929,7 +1929,7 @@ VariableFeatures.SCTModel <- function(object, nfeatures = 3000, ...) { VariableFeatures.SCTAssay <- function( object, layer = NULL, - nfeatures = 3000, + nfeatures = NULL, simplify = TRUE, use.var.features = TRUE, ... From dec7b93142e8fd5ff18778b0828ee19d5b94156b Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 9 Feb 2023 13:39:52 -0500 Subject: [PATCH 481/979] fix rpca transferAnchor --- R/integration.R | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/R/integration.R b/R/integration.R index 4966ce65e..c3040e254 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1020,13 +1020,13 @@ FindTransferAnchors <- function( reduction = reference.reduction, query = query, scale = scale, + normalization.method = normalization.method, dims = dims, feature.mean = feature.mean, verbose = verbose ) orig.embeddings <- Embeddings(object = reference[[reference.reduction]])[, dims] orig.loadings <- Loadings(object = reference[[reference.reduction]]) - combined.pca <- CreateDimReducObject( embeddings = as.matrix(x = rbind(orig.embeddings, projected.pca)), key = "ProjectPC_", @@ -4406,8 +4406,9 @@ FindNN <- function( method = nn.method, n.trees = n.trees, eps = eps, - index = nn.idx2 + index = if (reduction.2 == nn.reduction) nn.idx2 else NULL ) + nnba <- NNHelper( data = Embeddings(object = object[[reduction]])[cells1, nn.dims], query = Embeddings(object = object[[reduction]])[cells2, nn.dims], @@ -4415,7 +4416,7 @@ FindNN <- function( method = nn.method, n.trees = n.trees, eps = eps, - index = nn.idx1 + index = if (reduction == nn.reduction) nn.idx1 else NULL ) } else { dim.data.opposite <- Embeddings(object = object[[reduction]])[ ,dims] @@ -4428,7 +4429,7 @@ FindNN <- function( method = nn.method, n.trees = n.trees, eps = eps, - index = nn.idx2 + index = if (reduction == nn.reduction) nn.idx2 else NULL ) nnba <- NNHelper( data = dims.cells1.opposite, @@ -4437,7 +4438,7 @@ FindNN <- function( method = nn.method, n.trees = n.trees, eps = eps, - index = nn.idx1 + index = if (reduction == nn.reduction) nn.idx1 else NULL ) } object <- SetIntegrationData( @@ -5152,7 +5153,7 @@ ProjectCellEmbeddings.SCTAssay <- function( x = list( rownames(x = Loadings(object = reference[[reduction]])), rownames(x = reference[[reference.assay]]), - rownames(x = query) + rownames(x = query$scale.data) ) ) query.data <- GetAssayData( @@ -6030,8 +6031,22 @@ ValidateParams_FindTransferAnchors <- function( "you can set recompute.residuals to FALSE", call. = FALSE) } } - DefaultAssay(query) <- query.umi.assay - ModifyParam(param = "query.assay", value = query.umi.assay) + if (reduction %in% c('cca', 'rpca')) { + query <- SCTransform( + object = query, + reference.SCT.model = slot(object = reference[[reference.assay]], name = "SCTModel.list")[[1]], + residual.features = features, + assay = query.umi.assay, + new.assay.name = new.sct.assay, + verbose = FALSE + ) + } else { + new.sct.assay <- query.umi.assay + } + + + DefaultAssay(query) <- new.sct.assay + ModifyParam(param = "query.assay", value = new.sct.assay) ModifyParam(param = "query", value = query) ModifyParam(param = "reference", value = reference) } @@ -6071,6 +6086,9 @@ ValidateParams_FindTransferAnchors <- function( ref.features <- rownames(x = reference[[reference.assay.check]]) query.features <- rownames(x = query[[query.assay.check]]) if (normalization.method == "SCT") { + if (IsSCT(query[[query.assay.check]])) { + query.features <- rownames(x = query[[query.assay.check]]$scale.data) + } query.model.features <- rownames(x = Misc(object = query[[query.assay]])$vst.out$gene_attr) query.features <- unique(c(query.features, query.model.features)) ref.model.features <- rownames(x = Misc(object = reference[[reference.assay]])$vst.out$gene_attr) From 7b9c38301b19696baa2cc973db3f059a6984b11b Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 9 Feb 2023 13:42:33 -0500 Subject: [PATCH 482/979] bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e2dc34c95..b6e05535e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9031 -Date: 2023-02-01 +Version: 4.9.9.9032 +Date: 2023-02-09 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From 35089643bab09ebe47a59a20eeae41b0a0e6e451 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 15 Feb 2023 23:31:33 -0500 Subject: [PATCH 483/979] fix SCT var --- R/preprocessing5.R | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index dd43dc3b2..bc9cd8004 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1663,9 +1663,9 @@ SCTransform.StdAssay <- function( variable.feature.list[[dataset.names[dataset.index]]] <- rownames(assay.out) } } - # Return array by merging everythin +# Return array by merging everythin if (length(x = sct.assay.list) > 1){ - vf.list <- lapply(X = sct.assay.list, FUN = function(object) VariableFeatures(object = object)) + vf.list <- lapply(X = sct.assay.list, FUN = function(object.i) VariableFeatures(object = object.i)) variable.features.union <- Reduce(f = union, x = vf.list) var.features.sorted <- sort( x = table(unlist(x = vf.list, use.names = FALSE)), @@ -1678,7 +1678,6 @@ SCTransform.StdAssay <- function( var.features <- variable.features.union for (layer.name in names(sct.assay.list)){ vst_out <- SCTModel_to_vst(SCTModel = slot(object = sct.assay.list[[layer.name]], name = "SCTModel.list")[[1]]) - all_cells <- Cells(x = object, layer = paste0(layer, ".", layer.name)) all_features <- Features(x = object, layer = paste0(layer, ".", layer.name)) variable.features.target <- intersect(x = rownames(x = vst_out$model_pars_fit), y = var.features) @@ -1718,9 +1717,11 @@ SCTransform.StdAssay <- function( VariableFeatures(sct.assay.list[[layer.name]]) <- rownames(x = merged_residual) } merged.assay <- merge(x = sct.assay.list[[1]], y = sct.assay.list[2:length(sct.assay.list)]) - - #VariableFeatures(object = merged.assay) <- intersect(x = var.features, y = rownames(x = GetAssayData(object = merged.assay, slot='scale.data'))) - VariableFeatures(object = merged.assay) <- VariableFeatures(object = merged.assay, use.var.features = FALSE) + VariableFeatures(object = merged.assay) <- VariableFeatures( + object = merged.assay, + use.var.features = FALSE, + nfeatures = variable.features.n + ) # set the names of SCTmodels to be layer names models <- slot(object = merged.assay, name="SCTModel.list") names(models) <- names(x = sct.assay.list) From 92cd89610ca86e806457fdd214032886f9509830 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 15 Feb 2023 23:34:32 -0500 Subject: [PATCH 484/979] bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b6e05535e..1562c059e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9032 -Date: 2023-02-09 +Version: 4.9.9.9033 +Date: 2023-02-15 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From a79eb9e3b7c99bb0fae3536e3af77919cd13e6bd Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 16 Feb 2023 09:50:40 -0500 Subject: [PATCH 485/979] fix layer checking --- R/integration.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/integration.R b/R/integration.R index c3040e254..6ec43ded9 100644 --- a/R/integration.R +++ b/R/integration.R @@ -6062,8 +6062,7 @@ ValidateParams_FindTransferAnchors <- function( # Make data slot if DNE if (inherits(x = query[[query.assay]], what = "Assay5")){ if (is.null( - tryCatch(expr = slot(object = query[[query.assay]], - name = "data"), + tryCatch(expr = Layers(object = query[[query.assay]], search = 'data'), error = function (e) return(NULL)) ) ) { From 8ba22e24345f3424e9d56b51d838ecb308564c05 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sat, 18 Feb 2023 14:04:42 -0500 Subject: [PATCH 486/979] add assay5 spca --- R/dimensional_reduction.R | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R index 72272ddaf..847c86104 100644 --- a/R/dimensional_reduction.R +++ b/R/dimensional_reduction.R @@ -2521,6 +2521,45 @@ RunSPCA.Assay <- function( return(reduction.data) } +#' @param features Features to compute SPCA on. If features=NULL, SPCA will be run +#' using the variable features for the Assay. +#' +#' @rdname RunSPCA +#' @concept dimensional_reduction +#' @export +#' @method RunSPCA Assay5 +#' +RunSPCA.Assay5 <- function( + object, + assay = NULL, + features = NULL, + npcs = 50, + reduction.key = "SPC_", + graph = NULL, + verbose = TRUE, + seed.use = 42, + layer = 'scale.data', + ... +) { + data.use <- PrepDR5( + object = object, + features = features, + layer = layer, + verbose = verbose + ) + reduction.data <- RunSPCA( + object = data.use, + assay = assay, + npcs = npcs, + reduction.key = reduction.key, + graph = graph, + verbose = verbose, + seed.use = seed.use, + ... + ) + return(reduction.data) +} + #' @param reduction.name dimensional reduction name, spca by default #' @rdname RunSPCA #' @concept dimensional_reduction From d506c30b0c668e4750917e4392c5b98de7cd43ef Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sat, 18 Feb 2023 14:15:56 -0500 Subject: [PATCH 487/979] update docu --- NAMESPACE | 1 + man/RunSPCA.Rd | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index c230cc587..51fcee1c2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -113,6 +113,7 @@ S3method(RunSLSI,Assay) S3method(RunSLSI,Seurat) S3method(RunSLSI,default) S3method(RunSPCA,Assay) +S3method(RunSPCA,Assay5) S3method(RunSPCA,Seurat) S3method(RunSPCA,default) S3method(RunTSNE,DimReduc) diff --git a/man/RunSPCA.Rd b/man/RunSPCA.Rd index fb166c30c..f3deaa81c 100644 --- a/man/RunSPCA.Rd +++ b/man/RunSPCA.Rd @@ -4,6 +4,7 @@ \alias{RunSPCA} \alias{RunSPCA.default} \alias{RunSPCA.Assay} +\alias{RunSPCA.Assay5} \alias{RunSPCA.Seurat} \title{Run Supervised Principal Component Analysis} \usage{ @@ -32,6 +33,19 @@ RunSPCA(object, ...) ... ) +\method{RunSPCA}{Assay5}( + object, + assay = NULL, + features = NULL, + npcs = 50, + reduction.key = "SPC_", + graph = NULL, + verbose = TRUE, + seed.use = 42, + layer = "scale.data", + ... +) + \method{RunSPCA}{Seurat}( object, assay = NULL, From 2d8d85a02d6a8e7fd5825e256d1f193f00905e62 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sat, 18 Feb 2023 14:16:23 -0500 Subject: [PATCH 488/979] bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1562c059e..79e94e513 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9033 -Date: 2023-02-15 +Version: 4.9.9.9034 +Date: 2023-02-18 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From 57cf06fd2cc78a793c45a83b0934477849ea0fbd Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Wed, 22 Feb 2023 10:04:46 -0500 Subject: [PATCH 489/979] update BPCell vignette timing --- vignettes/BPCells_COVID_SCTMapping.Rmd | 13 ++++++++++--- vignettes/BPCells_COVID_logMapping.Rmd | 13 ++++++++++--- .../BPCells_sketch_clustering_mouse_brain.Rmd | 18 +++++++++++++----- ...Cells_sketch_clustering_mouse_brain_SCT.Rmd | 12 ++++++++++-- 4 files changed, 43 insertions(+), 13 deletions(-) diff --git a/vignettes/BPCells_COVID_SCTMapping.Rmd b/vignettes/BPCells_COVID_SCTMapping.Rmd index 6da646d94..cb4e2572d 100755 --- a/vignettes/BPCells_COVID_SCTMapping.Rmd +++ b/vignettes/BPCells_COVID_SCTMapping.Rmd @@ -130,11 +130,11 @@ bulk$disease <- sapply(strsplit(Cells(bulk), split = "_"), '[', 3) ## computing time summary ```{r} all_T <- ls(pattern = 'time') -overall <- sum(sapply(all_T, function(x) round(get(x)['elapsed'], digits = 3)))/60 +overall <- sum(sapply(all_T, function(x) round(as.numeric(get(x)['elapsed']), digits = 3)))/60 for (i in 1:length(all_T)) { - T_i <- get(all_T[i])['elapsed'] + T_i <- as.numeric(get(all_T[i])['elapsed']) if (T_i > 60) { print(paste(all_T[i], round(T_i/60, digits = 1), 'mins')) } else { @@ -181,4 +181,11 @@ VlnPlot(bulk, features = 'IFI44L', group.by = 'celltype', split.by = 'disease') VlnPlot(object, features = 'IFI44L', group.by = 'predicted.celltype.l2', split.by = 'Status') ``` - \ No newline at end of file + +```{r print_times} +as.data.frame(all_times) +``` + +```{r session_info} +sessionInfo() +``` diff --git a/vignettes/BPCells_COVID_logMapping.Rmd b/vignettes/BPCells_COVID_logMapping.Rmd index 77b2f660d..6136393b5 100755 --- a/vignettes/BPCells_COVID_logMapping.Rmd +++ b/vignettes/BPCells_COVID_logMapping.Rmd @@ -134,11 +134,11 @@ bulk$disease <- sapply(strsplit(Cells(bulk), split = "_"), '[', 3) ## computing time summary ```{r} all_T <- ls(pattern = 'time') -overall <- sum(sapply(all_T, function(x) round(get(x)['elapsed'], digits = 3)))/60 +overall <- sum(sapply(all_T, function(x) round(as.numeric(get(x)['elapsed']), digits = 3)))/60 for (i in 1:length(all_T)) { - T_i <- get(all_T[i])['elapsed'] + T_i <- as.numeric(get(all_T[i]))['elapsed'] if (T_i > 60) { print(paste(all_T[i], round(T_i/60, digits = 1), 'mins')) } else { @@ -185,4 +185,11 @@ VlnPlot(bulk, features = 'IFI44L', group.by = 'celltype', split.by = 'disease') VlnPlot(object, features = 'IFI44L', group.by = 'predicted.celltype.l2', split.by = 'Status') ``` - \ No newline at end of file + +```{r print_times} +as.data.frame(all_times) +``` + +```{r session_info} +sessionInfo() +``` \ No newline at end of file diff --git a/vignettes/BPCells_sketch_clustering_mouse_brain.Rmd b/vignettes/BPCells_sketch_clustering_mouse_brain.Rmd index 705699972..d2a0dce3b 100755 --- a/vignettes/BPCells_sketch_clustering_mouse_brain.Rmd +++ b/vignettes/BPCells_sketch_clustering_mouse_brain.Rmd @@ -138,11 +138,11 @@ DimPlot(obj, label = T, reduction = 'ref.umap', group.by = 'predicted.cluster_an ```{r} all_T <- ls(pattern = '^t') -overall <- sum(sapply(all_T, function(x) round(get(x)['elapsed'], digits = 3)))/60 +overall <- sum(sapply(all_T, function(x) round(as.numeric(get(x)['elapsed']), digits = 3)))/60 for (i in 1:length(all_T)) { - time.i <- get(all_T[i])['elapsed'] + time.i <- as.numeric(get(all_T[i])['elapsed']) if (time.i > 60) { print(paste(all_T[i], round(time.i/60, digits = 1), 'mins')) } else { @@ -158,8 +158,8 @@ print(paste('Total time ', round(overall, digits = 3), 'mins' )) obj[['pca.nn']] <- Seurat:::NNHelper(data = obj[['pca.orig']]@cell.embeddings[,1:50], k = 30, - method = "hnsw", - metric = "cosine", + method = "hnsw", + metric = "cosine", n_threads = 10) obj <- RunUMAP(obj, nn.name = "pca.nn", reduction.name = 'umap.orig', reduction.key = 'Uo_') @@ -168,4 +168,12 @@ obj <- RunUMAP(obj, nn.name = "pca.nn", reduction.name = 'umap.orig', reduction. ```{r} DimPlot(obj, label = T, reduction = 'umap.orig', group.by = 'predicted.subclass_full', alpha = 0.1) + NoLegend() DimPlot(obj, label = T, reduction = 'umap.orig', group.by = 'predicted.cluster_anno_full', alpha = 0.1) + NoLegend() -``` \ No newline at end of file +``` + +```{r print_times} +as.data.frame(all_times) +``` + +```{r session_info} +sessionInfo() +``` diff --git a/vignettes/BPCells_sketch_clustering_mouse_brain_SCT.Rmd b/vignettes/BPCells_sketch_clustering_mouse_brain_SCT.Rmd index ca1984a0a..fabe99974 100755 --- a/vignettes/BPCells_sketch_clustering_mouse_brain_SCT.Rmd +++ b/vignettes/BPCells_sketch_clustering_mouse_brain_SCT.Rmd @@ -132,11 +132,11 @@ DimPlot(obj, label = T, reduction = 'ref.umap', group.by = 'predicted.subclass_f ```{r} all_T <- ls(pattern = '^t') -overall <- sum(sapply(all_T, function(x) round(get(x)['elapsed'], digits = 3)))/60 +overall <- sum(sapply(all_T, function(x) round(as.numeric(get(x)['elapsed']), digits = 3)))/60 for (i in 1:length(all_T)) { - T_i <- get(all_T[i])['elapsed'] + T_i <- as.numeric(get(all_T[i])['elapsed']) if (T_i > 60) { print(paste(all_T[i], round(T_i/60, digits = 1), 'mins')) } else { @@ -160,3 +160,11 @@ obj <- RunUMAP(obj, nn.name = "pca.nn", reduction.name = 'umap.orig', reduction. DimPlot(obj, label = T, reduction = 'umap.orig', group.by = 'predicted.cluster_full', alpha = 0.1) + NoLegend() DimPlot(obj, label = T, reduction = 'umap.orig', group.by = 'predicted.subclass_full', alpha = 0.1) + NoLegend() ``` + +```{r print_times} +as.data.frame(all_times) +``` + +```{r session_info} +sessionInfo() +``` From 3f1fabf5f0081ba1a9c1eab50634c79da138cbcb Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Wed, 22 Feb 2023 10:58:41 -0500 Subject: [PATCH 490/979] update spatial feature selection --- vignettes/seurat5_spatial_vignette.Rmd | 2 +- vignettes/spatial_vignette.Rmd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/vignettes/seurat5_spatial_vignette.Rmd b/vignettes/seurat5_spatial_vignette.Rmd index 72d4dfd8a..b804304bf 100644 --- a/vignettes/seurat5_spatial_vignette.Rmd +++ b/vignettes/seurat5_spatial_vignette.Rmd @@ -307,7 +307,7 @@ Based on these prediction scores, we can also predict *cell types* whose locatio ```{r sc.data8, fig.height = 10} cortex <- FindSpatiallyVariableFeatures(cortex, assay = "predictions", selection.method = "moransi", features = rownames(cortex), r.metric = 5, slot = "data") -top.clusters <- head(SpatiallyVariableFeatures(cortex), 4) +top.clusters <- head(SpatiallyVariableFeatures(cortex, selection.method = "moransi"), 4) SpatialPlot(object = cortex, features = top.clusters, ncol = 2) ``` diff --git a/vignettes/spatial_vignette.Rmd b/vignettes/spatial_vignette.Rmd index d05245568..76f74163d 100644 --- a/vignettes/spatial_vignette.Rmd +++ b/vignettes/spatial_vignette.Rmd @@ -304,7 +304,7 @@ Based on these prediction scores, we can also predict *cell types* whose locatio ```{r sc.data8, fig.height = 10} cortex <- FindSpatiallyVariableFeatures(cortex, assay = "predictions", selection.method = "moransi", features = rownames(cortex), r.metric = 5, slot = "data") -top.clusters <- head(SpatiallyVariableFeatures(cortex), 4) +top.clusters <- head(SpatiallyVariableFeatures(cortex, selection.method = "moransi"), 4) SpatialPlot(object = cortex, features = top.clusters, ncol = 2) ``` From 2ea5e21feddaf58c15a186f6146961a5048b7760 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Fri, 24 Feb 2023 14:59:51 -0500 Subject: [PATCH 491/979] update bpcells vignettes --- vignettes/BPCells_COVID_logMapping.Rmd | 195 ------------------ ...ells_sketch_clustering_mouse_brain_SCT.Rmd | 170 --------------- vignettes/BPCells_sketch_inte_1M_SCT.Rmd | 173 ---------------- ...ID_SCTMapping.Rmd => COVID_SCTMapping.Rmd} | 103 +++++---- ...n.Rmd => MouseBrain_sketch_clustering.Rmd} | 92 ++++----- ...1M.Rmd => ParseBio_sketch_integration.Rmd} | 113 ++++++---- 6 files changed, 152 insertions(+), 694 deletions(-) delete mode 100755 vignettes/BPCells_COVID_logMapping.Rmd delete mode 100755 vignettes/BPCells_sketch_clustering_mouse_brain_SCT.Rmd delete mode 100755 vignettes/BPCells_sketch_inte_1M_SCT.Rmd rename vignettes/{BPCells_COVID_SCTMapping.Rmd => COVID_SCTMapping.Rmd} (61%) rename vignettes/{BPCells_sketch_clustering_mouse_brain.Rmd => MouseBrain_sketch_clustering.Rmd} (58%) rename vignettes/{BPCells_sketch_inte_1M.Rmd => ParseBio_sketch_integration.Rmd} (66%) diff --git a/vignettes/BPCells_COVID_logMapping.Rmd b/vignettes/BPCells_COVID_logMapping.Rmd deleted file mode 100755 index 6136393b5..000000000 --- a/vignettes/BPCells_COVID_logMapping.Rmd +++ /dev/null @@ -1,195 +0,0 @@ ---- -title: "COVID Log Normalization Mapping" -output: html_notebook ---- - -```{r setup, include=FALSE} -all_times <- list() # store the time for each chunk -knitr::knit_hooks$set(time_it = local({ - now <- NULL - function(before, options) { - if (before) { - now <<- Sys.time() - } else { - res <- difftime(Sys.time(), now, units = "secs") - all_times[[options$label]] <<- res - } - } -})) -knitr::opts_chunk$set( - tidy = TRUE, - tidy.opts = list(width.cutoff = 95), - message = FALSE, - warning = FALSE, - time_it = TRUE, - error = TRUE -) -``` - -## load package - -```{r, warning=F, message=F} -library(Seurat) -library(BPCells) -library(dplyr) -``` - -## load matrix -```{r, warning=F, message=F} - -time0_loadMatrix <- system.time({ -meta.list <- readRDS('../data/PBMCVignette/PBMC_meta.list') - -file.dir <- "../data/PBMCVignette/" -files.set <- c("arunachalam_2020_processed.BPCells", "combes_2021_processed.BPCells", "lee_2020_processed.BPCells", - "wilk_2020_processed.BPCells", "yao_2021_processed.BPCells") -input.list <- list() -for (i in 1:length(files.set)) { - input.list[[i]] <- open_matrix_dir(dir = paste0(file.dir, files.set[i]) ) -} - names(input.list) <- paste0('counts.',gsub('_processed.BPCells','',files.set)) -}) -``` - -## load query -```{r,warning=F, message=F} - -options(Seurat.object.assay.version = "v5", Seurat.object.assay.calcn = T) -time1_normalize <- system.time({ - i = 4 - object <- CreateSeuratObject(counts = input.list[[i]], meta.data = meta.list[[i]] ) - object <- NormalizeData(object, verbose = FALSE) -}) - -``` - -## load reference -```{r} -library(SeuratData) -data("pbmc3k") -obj.ref <- pbmc3k -obj.ref <- UpdateSeuratObject(obj.ref) -obj.ref$seurat_annotations <- as.character(obj.ref$seurat_annotations) -obj.ref$seurat_annotations[is.na(obj.ref$seurat_annotations)] <- 'other' -obj.ref$celltype.l1 <- obj.ref$celltype.l2 <- obj.ref$seurat_annotations -obj.ref <- NormalizeData(obj.ref) %>% FindVariableFeatures() %>% ScaleData() %>% RunPCA() -obj.ref <- RunUMAP(obj.ref, dims = 1:30, return.model = T, reduction.name = 'wnn.umap', reduction.key = 'W_') -``` -## mapping -```{r} - -time2_anchoring <- system.time({ -anchor <- FindTransferAnchors(reference = obj.ref, - query = object, - reference.reduction = 'pca', - k.filter = NA, - k.anchor = 5, - features = rownames(obj.ref[['pca']]@feature.loadings)) -}) - -time3_MapQuery <- system.time({ -object <- MapQuery( - anchorset = anchor, - query = object, - reference = obj.ref, - refdata = list( - l1.s5 = "celltype.l1", - l2.s5 = "celltype.l2" - ), - reduction.model = "wnn.umap" -)}) - -``` -```{r} -anchor -``` - - -```{r} - p1<- DimPlot(object, reduction = 'ref.umap', group.by = 'predicted.l2.s5',alpha = 0.9, label = T) + NoLegend() - - p2<- DimPlot(object, reduction = 'ref.umap', group.by = 'cell.type.fine',alpha = 0.9, label = T) + NoLegend() - p1+p2 -``` - -## pseudo-bulk -```{r} - -time4_bulk <- system.time( bulk <- AverageExpression(object, - method = 'aggregate', - return.seurat = T, - slot = 'counts', - assays = 'RNA', - group.by = c("predicted.celltype.l2","Donor","Status") - ) -) - -bulk$celltype <- sapply(strsplit(Cells(bulk), split = "_"), '[', 1) -bulk$donor <- sapply(strsplit(Cells(bulk), split = "_"), '[', 2) -bulk$disease <- sapply(strsplit(Cells(bulk), split = "_"), '[', 3) - - -``` - -## computing time summary -```{r} -all_T <- ls(pattern = 'time') -overall <- sum(sapply(all_T, function(x) round(as.numeric(get(x)['elapsed']), digits = 3)))/60 - - -for (i in 1:length(all_T)) { - T_i <- as.numeric(get(all_T[i]))['elapsed'] - if (T_i > 60) { - print(paste(all_T[i], round(T_i/60, digits = 1), 'mins')) - } else { - print(paste(all_T[i], round(T_i, digits = 1), 'secs')) - } -} -print(paste('Total time ', round(overall, digits = 1), 'mins' )) -``` -```{R} -marker.list <- list() -celltype.set <- unique(bulk$celltype ) -for (i in seq_along(celltype.set)) { - bulk.i <- subset(bulk, subset = celltype == celltype.set[i]) - Idents(bulk.i) <- 'disease' - if (any(table(bulk.i$disease) < 3)) { - marker.list[[i]] <- EmptyDF(n = 0) - } else { - marker.list[[i]] <- FindMarkers(bulk.i, ident.1 = 'COVID',ident.2 = 'Healthy', slot = 'counts', test.use = 'DESeq2', verbose = F ) - } - -} -names(marker.list) <- celltype.set - -``` -```{r} -marker.list.filter <- lapply(marker.list, function(x) { - if(nrow(x) > 0) { - x <- x[x$p_val_adj < 0.01 & !is.na(x$p_val_adj ),] - } - if (nrow(x) > 0) { - return(x) - } -}) - - -``` -```{r} -object$Status <- factor(object$Status, levels = c('Healthy', 'COVID')) -bulk$disease <- factor(bulk$disease, levels = c('Healthy', 'COVID')) - -``` -```{r} -VlnPlot(bulk, features = 'IFI44L', group.by = 'celltype', split.by = 'disease') - -VlnPlot(object, features = 'IFI44L', group.by = 'predicted.celltype.l2', split.by = 'Status') -``` - -```{r print_times} -as.data.frame(all_times) -``` - -```{r session_info} -sessionInfo() -``` \ No newline at end of file diff --git a/vignettes/BPCells_sketch_clustering_mouse_brain_SCT.Rmd b/vignettes/BPCells_sketch_clustering_mouse_brain_SCT.Rmd deleted file mode 100755 index fabe99974..000000000 --- a/vignettes/BPCells_sketch_clustering_mouse_brain_SCT.Rmd +++ /dev/null @@ -1,170 +0,0 @@ ---- -title: "Sketch clustering in mouse brain (SCTransform)" -output: html_notebook ---- - -```{r setup, include=FALSE} -all_times <- list() # store the time for each chunk -knitr::knit_hooks$set(time_it = local({ - now <- NULL - function(before, options) { - if (before) { - now <<- Sys.time() - } else { - res <- difftime(Sys.time(), now, units = "secs") - all_times[[options$label]] <<- res - } - } -})) -knitr::opts_chunk$set( - tidy = TRUE, - tidy.opts = list(width.cutoff = 95), - message = FALSE, - warning = FALSE, - time_it = TRUE, - error = TRUE -) -``` - -## load library -```{r, warning=FALSE, message=FALSE} -library(Seurat) -library(BPCells) -library(Azimuth) -``` - -## load data from h5ad -```{r, warning=FALSE, message=FALSE} -t0_CreateObject <- system.time({ - mat <- open_matrix_dir("../data/mouse_1M_neurons_counts") - mat <- Azimuth::ConvertEnsembleToSymbol(mat = mat, species = 'mouse') - options(Seurat.object.assay.version = "v5", Seurat.object.assay.calcn = T) - obj <- CreateSeuratObject(counts = mat) -}) - -``` - -## create sketch assay -```{r, warning=FALSE, message=FALSE} -t1_CreateSketchAssay <- system.time({ -obj <- NormalizeData(obj) -obj <- FindVariableFeatures(obj, layer = 'counts') -obj <- LeverageScore(obj) -#obj <- LeverageScoreSampling(object = obj, ncells = 5000, cast = 'dgCMatrix') -obj <- LeverageScoreSampling(object = obj, ncells = 50000, cast = 'dgCMatrix') - -}) - -``` -## Sketch assay clustering -```{r, warning=FALSE, message=FALSE} -t2_SketchClustering <- system.time({ -obj <- SCTransform(object = obj) -obj <- RunPCA(obj) -obj <- FindNeighbors(obj, dims = 1:50) -obj <- FindClusters(obj) -}) - -obj <- RunUMAP(obj, dims = 1:50, return.model = T, verbose = F) -``` - -```{r} -DimPlot(obj, label = T, reduction = 'umap') + NoLegend() -``` -```{r} -# add some fake annotations to the object -obj$predicted.subclass <- sample(c("celltype1", "celltype2"), length(Cells(obj)), replace = TRUE) -obj$predicted.cluster <- sample(c("celltype1", "celltype2", "celltype3", "celltype4"), length(Cells(obj)), replace = TRUE) -``` -```{r,fig.height = 20, fig.width = 15} -features.set <- c('Aqp4', 'Sox10', 'Slc17a7', 'Aif1', 'Foxj1', 'Pax6', 'Slc17a6', 'Lum', 'Nanog', 'Gad2', 'Foxj1', 'Cldn5','Alas2') -features.gaba.set <- c('Gad1','Mef2c','Sst','Lhx6','Nr2f2','Prox1') -DefaultAssay(obj) <- 'sketch' -FeaturePlot(obj, reduction = 'umap', features = features.set, max.cutoff = "q99", min.cutoff = 'q1') -FeaturePlot(obj, reduction = 'umap', features = features.gaba.set, max.cutoff = "q99", min.cutoff = 'q1') - -``` - - -## Project full cells to PCA from sketch assay -```{r, warning=FALSE, message=FALSE} -t3_ProjectEmbedding <- system.time({ - ref.emb <- ProjectCellEmbeddings(query = obj, - reference = obj, - query.assay = 'RNA', - reference.assay = 'SCT', - normalization.method = 'SCT', - reduction = 'pca') -obj[['pca.orig']] <- CreateDimReducObject(embeddings = ref.emb, assay = 'RNA') -DefaultAssay(obj) <- 'RNA' -}) - - - -``` - -## Transfer labels and umap from sketch to full data -```{r, warning=FALSE, message=FALSE} -t4_transferLabel <- system.time({ - options(future.globals.maxSize = 1e9) - obj <- TransferSketchLabels(object = obj, - atoms = 'sketch', - reduction = 'pca.orig', - dims = 1:50, - refdata = list(cluster_full = 'SCT_snn_res.0.8', - subclass_full ='predicted.subclass'), - reduction.model = 'umap' - ) -}) - -``` - - -```{r} -library(ggplot2) - - -DimPlot(obj, label = T, reduction = 'ref.umap', group.by = 'predicted.cluster_full', alpha = 0.1) + NoLegend() -DimPlot(obj, label = T, reduction = 'ref.umap', group.by = 'predicted.subclass_full', alpha = 0.1) + NoLegend() - - -``` - -```{r} -all_T <- ls(pattern = '^t') -overall <- sum(sapply(all_T, function(x) round(as.numeric(get(x)['elapsed']), digits = 3)))/60 - - -for (i in 1:length(all_T)) { - T_i <- as.numeric(get(all_T[i])['elapsed']) - if (T_i > 60) { - print(paste(all_T[i], round(T_i/60, digits = 1), 'mins')) - } else { - print(paste(all_T[i], round(T_i, digits = 1), 'secs')) - } -} -print(paste('Total time ', round(overall, digits = 3), 'mins' )) -``` - - -```{r} -obj[['pca.nn']] <- Seurat:::NNHelper(data = obj[['pca.orig']]@cell.embeddings[,1:50], - k = 30, - method = "hnsw", - metric = "cosine", - n_threads = 10) -obj <- RunUMAP(obj, nn.name = "pca.nn", reduction.name = 'umap.orig', reduction.key = 'Uo_') -``` - -```{r} -DimPlot(obj, label = T, reduction = 'umap.orig', group.by = 'predicted.cluster_full', alpha = 0.1) + NoLegend() -DimPlot(obj, label = T, reduction = 'umap.orig', group.by = 'predicted.subclass_full', alpha = 0.1) + NoLegend() -``` - -```{r print_times} -as.data.frame(all_times) -``` - -```{r session_info} -sessionInfo() -``` diff --git a/vignettes/BPCells_sketch_inte_1M_SCT.Rmd b/vignettes/BPCells_sketch_inte_1M_SCT.Rmd deleted file mode 100755 index 19d97370a..000000000 --- a/vignettes/BPCells_sketch_inte_1M_SCT.Rmd +++ /dev/null @@ -1,173 +0,0 @@ ---- -title: "Sketch integration using SCTransform" -output: html_notebook ---- - -```{r setup, include=FALSE} -all_times <- list() # store the time for each chunk -knitr::knit_hooks$set(time_it = local({ - now <- NULL - function(before, options) { - if (before) { - now <<- Sys.time() - } else { - res <- difftime(Sys.time(), now, units = "secs") - all_times[[options$label]] <<- res - } - } -})) -knitr::opts_chunk$set( - tidy = TRUE, - tidy.opts = list(width.cutoff = 95), - message = FALSE, - warning = FALSE, - time_it = TRUE, - error = TRUE -) -``` - -## load package - -```{r, warning=F, message=F} -library(Seurat) -library(BPCells) -library(dplyr) -``` - -## load matrix -```{r, warning=F, message=F} -time0_loadMatrix <- system.time({ - mat <- open_matrix_dir('../data/pbmc_ParseBio_sparse/') - meta <- readRDS('../data/ParseBio_PBMC_meta.rds') -}) -``` - -## sketch object -```{r,warning=F, message=F} - -options(Seurat.object.assay.version = "v5", Seurat.object.assay.calcn = T) -time1_normalize <- system.time({ - object <- CreateSeuratObject(counts = mat, meta.data = meta) - object <- NormalizeData(object) -}) - - -time2_split.mat <- system.time({ - options(Seurat.object.assay.calcn = FALSE) - object[['RNA']] <- split(object[['RNA']], f = meta$sample) -}) - - -time3_FindVariable <- system.time({ - object <- FindVariableFeatures(object, layer = 'counts') -} -) - -time4_LeverageScoreSampling <- system.time({ - object <- LeverageScore(object) - object <- LeverageScoreSampling(object = object, ncells = 5000, cast = 'dgCMatrix') -}) - -``` - -## integrate sketched assay -```{r} - -time5_SketchIntegration <- system.time({ - DefaultAssay(object) <- 'sketch' - object <- SCTransform(object) - object <- RunPCA(object, verbose = F) - features <- rownames(object[['pca']][]) - DefaultAssay(object) <- 'SCT' - options(future.globals.maxSize = 8e9) - object <- IntegrateLayers(object, - method = RPCAIntegration, - orig = 'pca', - new.reduction = 'integrated.rpca', - normalization.method = "SCT", - dims = 1:30, - k.anchor = 20, - features = features, - reference = which(Layers(object[['sketch']], search = 'data') %in% c( 'data.H_3060')), - verbose = F) - -}) -object <- RunUMAP(object, reduction = 'integrated.rpca', dims = 1:30, return.model = T, verbose = F) -plot.s1 <- DimPlot(object, group.by = 'sample', reduction = 'umap') + NoLegend() -plot.s2 <- DimPlot(object, group.by = 'celltype.weight', reduction = 'umap') + NoLegend() -``` -```{r} -plot.s1 + plot.s2 - -``` - - -## proporgate embeddings to full data -```{r} -time6_UnSketch <- system.time({ - object <- IntegrateSketchEmbeddings(object = object, - atoms = 'sketch', - orig = 'RNA', - reduction = 'integrated.rpca' , - layers = Layers(object = object[['RNA']], search = 'data'), - features = features ) - -}) - - -``` - -```{r} - object <- RunUMAP(object, reduction = 'integrated.rpca.orig', dims = 1:30 , reduction.name = 'umap.orig', reduction.key = 'Uorig_') -``` - -## pseudo-bulk -```{r} -time8_bulk <- system.time( bulk <- AverageExpression(object, - return.seurat = T, - slot = 'counts', - assays = 'RNA', - group.by = c("celltype.weight","sample") - ) -) -marker <- FindAllMarkers(object = bulk, only.pos = TRUE, verbose = FALSE) -marker %>% - group_by(cluster) %>% - top_n(n = -5, wt = p_val) -> top5 -bulk <- ScaleData(bulk, features = top5$gene) - -``` - -## computing time summary -```{r} -all_T <- ls(pattern = 'time') -overall <- sum(sapply(all_T, function(x) round(get(x)['elapsed'], digits = 3)))/60 - - -for (i in 1:length(all_T)) { - T_i <- get(all_T[i])['elapsed'] - if (T_i > 60) { - print(paste(all_T[i], round(T_i/60, digits = 1), 'mins')) - } else { - print(paste(all_T[i], round(T_i, digits = 1), 'secs')) - } -} -print(paste('Total time ', round(overall, digits = 1), 'mins' )) -``` - - -```{r} -object$disease <- sapply(strsplit(x = object$sample, split = "_"), '[', 1) - -``` -```{r} -p1<- DimPlot(object, reduction = 'umap.orig', group.by = 'sample',alpha = 0.1) + NoLegend() -p2<- DimPlot(object, reduction = 'umap.orig', group.by = 'celltype.weight', label = T, alpha = 0.1) + NoLegend() -p3<- DimPlot(object, reduction = 'umap.orig', group.by = 'disease', label = T, alpha = 0.1) + NoLegend() - -p1+p2 + p3 - -``` -```{r,fig.height = 20, fig.width = 15} -DoHeatmap(bulk, features = top5$gene) + NoLegend() -``` diff --git a/vignettes/BPCells_COVID_SCTMapping.Rmd b/vignettes/COVID_SCTMapping.Rmd similarity index 61% rename from vignettes/BPCells_COVID_SCTMapping.Rmd rename to vignettes/COVID_SCTMapping.Rmd index cb4e2572d..89afe42f3 100755 --- a/vignettes/BPCells_COVID_SCTMapping.Rmd +++ b/vignettes/COVID_SCTMapping.Rmd @@ -1,31 +1,8 @@ --- -title: "COVID SCTransform Mapping" +title: "R Notebook" output: html_notebook --- -```{r setup, include=FALSE} -all_times <- list() # store the time for each chunk -knitr::knit_hooks$set(time_it = local({ - now <- NULL - function(before, options) { - if (before) { - now <<- Sys.time() - } else { - res <- difftime(Sys.time(), now, units = "secs") - all_times[[options$label]] <<- res - } - } -})) -knitr::opts_chunk$set( - tidy = TRUE, - tidy.opts = list(width.cutoff = 95), - message = FALSE, - warning = FALSE, - time_it = TRUE, - error = TRUE -) -``` - ## load package ```{r, warning=F, message=F} @@ -38,16 +15,28 @@ library(dplyr) ```{r, warning=F, message=F} time0_loadMatrix <- system.time({ -meta.list <- readRDS('../data/PBMCVignette/PBMC_meta.list') - -file.dir <- "../data/PBMCVignette/" + +file.dir <- "/brahms/haoy/vignette_data/PBMCVignette/" files.set <- c("arunachalam_2020_processed.BPCells", "combes_2021_processed.BPCells", "lee_2020_processed.BPCells", "wilk_2020_processed.BPCells", "yao_2021_processed.BPCells") +meta.list <- readRDS('/brahms/haoy/vignette_data/PBMCVignette/PBMC_meta.list') +names(meta.list) <- gsub('_processed.BPCells','',files.set) + input.list <- list() for (i in 1:length(files.set)) { input.list[[i]] <- open_matrix_dir(dir = paste0(file.dir, files.set[i]) ) + colnames(input.list[[i]]) <- paste0(names(meta.list)[i], "_", colnames(input.list[[i]])) + rownames(meta.list[[i]]) <-paste0(names(meta.list)[i], "_", rownames(meta.list[[i]])) + meta.list[[i]]$batch <- names(meta.list)[i] + meta.list[[i]]$celltype <- meta.list[[i]]$predicted.celltype.l2 + } names(input.list) <- paste0('counts.',gsub('_processed.BPCells','',files.set)) + meta_data <- lapply(meta.list, function(x) { + x <- x[,c('batch', 'celltype', 'patient', 'disease_status_standard' )] + return(x) + }) + meta_data <- Reduce(rbind, meta_data) }) ``` @@ -56,8 +45,8 @@ for (i in 1:length(files.set)) { options(Seurat.object.assay.version = "v5", Seurat.object.assay.calcn = T) time1_normalize <- system.time({ - i = 4 - object <- CreateSeuratObject(counts = input.list[[i]], meta.data = meta.list[[i]] ) + + object <- CreateSeuratObject(counts = input.list[1:2], meta.data = meta_data) object <- NormalizeData(object, verbose = FALSE) }) @@ -66,25 +55,28 @@ time1_normalize <- system.time({ ## load reference ```{r} -obj.ref <- readRDS("../data/pbmc_multimodal_2023.rds") - +obj.ref <- readRDS("/brahms/haoy/seurat4_pbmc/pbmc_multimodal_2023.rds") +obj.ref ``` ## mapping ```{r} + time2_anchoring <- system.time({ anchor <- FindTransferAnchors(reference = obj.ref, query = object, - reference.reduction = 'pca', + reference.reduction = 'spca', normalization.method = 'SCT', + dims = 1:50, k.filter = NA, k.anchor = 5, - features = rownames(obj.ref[['pca']]@feature.loadings)) + features = rownames(obj.ref[['spca']]@feature.loadings)) }) time3_MapQuery <- system.time({ -object <- MapQuery( + + object <- MapQuery( anchorset = anchor, query = object, reference = obj.ref, @@ -93,7 +85,8 @@ object <- MapQuery( l2.s5 = "celltype.l2" ), reduction.model = "wnn.umap" -)}) +) +}) ``` ```{r} @@ -103,9 +96,14 @@ anchor ```{r} p1<- DimPlot(object, reduction = 'ref.umap', group.by = 'predicted.l2.s5',alpha = 0.8, label = T) + NoLegend() - - p2<- DimPlot(object, reduction = 'ref.umap', group.by = 'cell.type.fine',alpha = 0.8, label = T) + NoLegend() + p2<- DimPlot(object, reduction = 'ref.umap', group.by = 'celltype',alpha = 0.8, label = T) + NoLegend() + p1+p2 + +``` +```{r, fig.width=10, fig.height=10} +p3 <-DimPlot(object, reduction = 'ref.umap', group.by = 'celltype',alpha = 0.8, label = T, split.by = 'batch', ncol = 3) + NoLegend() +p3 ``` ## pseudo-bulk @@ -116,25 +114,25 @@ time4_bulk <- system.time( bulk <- AverageExpression(object, return.seurat = T, slot = 'counts', assays = 'RNA', - group.by = c("predicted.l2.s5","Donor","Status") + group.by = c("predicted.l2.s5","patient","disease_status_standard") ) ) bulk$celltype <- sapply(strsplit(Cells(bulk), split = "_"), '[', 1) bulk$donor <- sapply(strsplit(Cells(bulk), split = "_"), '[', 2) bulk$disease <- sapply(strsplit(Cells(bulk), split = "_"), '[', 3) - +bulk <- subset(bulk, subset = disease != 'other') ``` ## computing time summary ```{r} all_T <- ls(pattern = 'time') -overall <- sum(sapply(all_T, function(x) round(as.numeric(get(x)['elapsed']), digits = 3)))/60 +overall <- sum(sapply(all_T, function(x) round(get(x)['elapsed'], digits = 3)))/60 for (i in 1:length(all_T)) { - T_i <- as.numeric(get(all_T[i])['elapsed']) + T_i <- get(all_T[i])['elapsed'] if (T_i > 60) { print(paste(all_T[i], round(T_i/60, digits = 1), 'mins')) } else { @@ -152,7 +150,7 @@ for (i in seq_along(celltype.set)) { if (any(table(bulk.i$disease) < 3)) { marker.list[[i]] <- EmptyDF(n = 0) } else { - marker.list[[i]] <- FindMarkers(bulk.i, ident.1 = 'COVID',ident.2 = 'Healthy', slot = 'counts', test.use = 'DESeq2', verbose = F ) + marker.list[[i]] <- FindMarkers(bulk.i, ident.1 = 'COVID-19',ident.2 = 'healthy', slot = 'counts', test.use = 'DESeq2', verbose = F ) } } @@ -171,21 +169,12 @@ marker.list.filter <- lapply(marker.list, function(x) { ``` -```{r} -object$Status <- factor(object$Status, levels = c('Healthy', 'COVID')) -bulk$disease <- factor(bulk$disease, levels = c('Healthy', 'COVID')) +```{r} +bulk$disease <- factor(bulk$disease, levels = c('healthy', 'COVID-19')) ``` -```{r} -VlnPlot(bulk, features = 'IFI44L', group.by = 'celltype', split.by = 'disease') - -VlnPlot(object, features = 'IFI44L', group.by = 'predicted.celltype.l2', split.by = 'Status') -``` - -```{r print_times} -as.data.frame(all_times) -``` - -```{r session_info} -sessionInfo() +```{r, fig.width=10, fig.height=5} +VlnPlot(bulk, features = 'MX1', group.by = 'celltype', split.by = 'disease', cols = c("#377eb8", "#e41a1c")) + ``` + \ No newline at end of file diff --git a/vignettes/BPCells_sketch_clustering_mouse_brain.Rmd b/vignettes/MouseBrain_sketch_clustering.Rmd similarity index 58% rename from vignettes/BPCells_sketch_clustering_mouse_brain.Rmd rename to vignettes/MouseBrain_sketch_clustering.Rmd index d2a0dce3b..d2755b6a7 100755 --- a/vignettes/BPCells_sketch_clustering_mouse_brain.Rmd +++ b/vignettes/MouseBrain_sketch_clustering.Rmd @@ -1,31 +1,8 @@ --- -title: "Sketch clustering in mouse brain" +title: "R Notebook" output: html_notebook --- -```{r setup, include=FALSE} -all_times <- list() # store the time for each chunk -knitr::knit_hooks$set(time_it = local({ - now <- NULL - function(before, options) { - if (before) { - now <<- Sys.time() - } else { - res <- difftime(Sys.time(), now, units = "secs") - all_times[[options$label]] <<- res - } - } -})) -knitr::opts_chunk$set( - tidy = TRUE, - tidy.opts = list(width.cutoff = 95), - message = FALSE, - warning = FALSE, - time_it = TRUE, - error = TRUE -) -``` - ## load library ```{r, warning=FALSE, message=FALSE} library(Seurat) @@ -35,10 +12,15 @@ library(BPCells) ## load data from h5ad ```{r, warning=FALSE, message=FALSE} t0_CreateObject <- system.time({ - mat <- open_matrix_dir("../data/mouse_1M_neurons_counts") - mat <- Azimuth::ConvertEnsembleToSymbol(mat = mat, species = 'mouse') - options(Seurat.object.assay.version = "v5", Seurat.object.assay.calcn = T) - obj <- CreateSeuratObject(counts = mat) + +mat <- open_matrix_dir("/brahms/haoy/test/mouse_1M_neurons_counts")[,1:1e5] + +devtools::load_all("/brahms/haoy//package/MetricPatch/") +mat <- ConvertEnsembleToSymbol(mat = mat, species = 'mouse') + +options(Seurat.object.assay.version = "v5", Seurat.object.assay.calcn = T) +obj <- CreateSeuratObject(counts = mat ) + }) ``` @@ -48,8 +30,7 @@ t0_CreateObject <- system.time({ t1_CreateSketchAssay <- system.time({ obj <- NormalizeData(obj) obj <- FindVariableFeatures(obj, layer = 'counts') -obj <- LeverageScore(obj) -obj <- LeverageScoreSampling(object = obj, ncells = 50000, cast = 'dgCMatrix') +obj <- LeverageScoreSampling(object = obj, ncells = 5000, cast = 'dgCMatrix') }) @@ -70,20 +51,11 @@ obj <- RunUMAP(obj, dims = 1:50, return.model = T) ```{r} DimPlot(obj, label = T, reduction = 'umap') + NoLegend() ``` -## Azimuth mapping to annotate clusters -```{r, warning=F, message=F} -# add some fake annotations to the object -obj$predicted.subclass <- sample(c("celltype1", "celltype2"), length(Cells(obj)), replace = TRUE) -obj$predicted.cluster <- sample(c("celltype1", "celltype2", "celltype3", "celltype4"), length(Cells(obj)), replace = TRUE) -``` - ```{r} DimPlot(obj, reduction = 'umap', label = T) + NoLegend() -DimPlot(obj, reduction = 'umap', group.by = 'predicted.subclass',label = T) + NoLegend() -DimPlot(obj, reduction = 'umap', group.by = 'predicted.cluster',label = T) + NoLegend() - + ``` ```{r,fig.height = 20, fig.width = 15} features.set <- c('Aqp4', 'Sox10', 'Slc17a7', 'Aif1', 'Foxj1', 'Pax6', 'Slc17a6', 'Lum', 'Nanog', 'Gad2', 'Foxj1', 'Cldn5','Alas2') @@ -118,9 +90,7 @@ t4_transferLabel <- system.time({ atoms = 'sketch', reduction = 'pca.orig', dims = 1:50, - refdata = list(cluster_full = 'sketch_snn_res.0.8', - subclass_full ='predicted.subclass', - cluster_anno_full = 'predicted.cluster'), + refdata = list(cluster_full = 'sketch_snn_res.0.8'), reduction.model = 'umap' ) }) @@ -130,19 +100,17 @@ t4_transferLabel <- system.time({ ```{r} library(ggplot2) -DimPlot(obj, label = T, reduction = 'ref.umap', group.by = 'predicted.subclass_full', alpha = 0.1) + NoLegend() - -DimPlot(obj, label = T, reduction = 'ref.umap', group.by = 'predicted.cluster_anno_full', alpha = 0.1) + NoLegend() +DimPlot(obj, label = T, reduction = 'ref.umap', group.by = 'predicted.cluster_full', alpha = 0.1) + NoLegend() ``` ```{r} all_T <- ls(pattern = '^t') -overall <- sum(sapply(all_T, function(x) round(as.numeric(get(x)['elapsed']), digits = 3)))/60 +overall <- sum(sapply(all_T, function(x) round(get(x)['elapsed'], digits = 3)))/60 for (i in 1:length(all_T)) { - time.i <- as.numeric(get(all_T[i])['elapsed']) + time.i <- get(all_T[i])['elapsed'] if (time.i > 60) { print(paste(all_T[i], round(time.i/60, digits = 1), 'mins')) } else { @@ -158,22 +126,32 @@ print(paste('Total time ', round(overall, digits = 3), 'mins' )) obj[['pca.nn']] <- Seurat:::NNHelper(data = obj[['pca.orig']]@cell.embeddings[,1:50], k = 30, - method = "hnsw", - metric = "cosine", + method = "hnsw", + metric = "cosine", n_threads = 10) obj <- RunUMAP(obj, nn.name = "pca.nn", reduction.name = 'umap.orig', reduction.key = 'Uo_') ``` ```{r} -DimPlot(obj, label = T, reduction = 'umap.orig', group.by = 'predicted.subclass_full', alpha = 0.1) + NoLegend() -DimPlot(obj, label = T, reduction = 'umap.orig', group.by = 'predicted.cluster_anno_full', alpha = 0.1) + NoLegend() -``` +DimPlot(obj, label = T, reduction = 'umap.orig', group.by = 'predicted.cluster_full', alpha = 0.1) + NoLegend() -```{r print_times} -as.data.frame(all_times) ``` -```{r session_info} -sessionInfo() +## sub type clustering +```{r} +obj.sub <- subset(obj, subset = predicted.cluster_full %in% c(5, 12)) +obj.sub[['sketch']] <- NULL +obj.sub[['RNA']] <- CastAssay(object = obj.sub[['RNA']], to = "dgCMatrix" ) +obj.sub <- FindVariableFeatures(obj.sub, layer = 'counts') +obj.sub <- ScaleData(obj.sub) +obj.sub <- RunPCA(obj.sub) +obj.sub <- RunUMAP(obj.sub, dims = 1:30) +obj.sub <- FindNeighbors(obj.sub, dims = 1:30) +obj.sub <- FindClusters(obj.sub) ``` + +```{r} +DimPlot(obj.sub, label = T) + NoLegend() + +``` \ No newline at end of file diff --git a/vignettes/BPCells_sketch_inte_1M.Rmd b/vignettes/ParseBio_sketch_integration.Rmd similarity index 66% rename from vignettes/BPCells_sketch_inte_1M.Rmd rename to vignettes/ParseBio_sketch_integration.Rmd index 2d284b54e..d16a5ce0c 100755 --- a/vignettes/BPCells_sketch_inte_1M.Rmd +++ b/vignettes/ParseBio_sketch_integration.Rmd @@ -1,31 +1,8 @@ --- -title: "Sketch integration" +title: "R Notebook" output: html_notebook --- -```{r setup, include=FALSE} -all_times <- list() # store the time for each chunk -knitr::knit_hooks$set(time_it = local({ - now <- NULL - function(before, options) { - if (before) { - now <<- Sys.time() - } else { - res <- difftime(Sys.time(), now, units = "secs") - all_times[[options$label]] <<- res - } - } -})) -knitr::opts_chunk$set( - tidy = TRUE, - tidy.opts = list(width.cutoff = 95), - message = FALSE, - warning = FALSE, - time_it = TRUE, - error = TRUE -) -``` - ## load package ```{r, warning=F, message=F} @@ -36,10 +13,14 @@ library(dplyr) ## load matrix ```{r, warning=F, message=F} - + time0_loadMatrix <- system.time({ - mat <- open_matrix_dir('../data/pbmc_ParseBio_sparse//') - meta <- readRDS('../data/ParseBio_PBMC_meta.rds') + mat <- open_matrix_dir('/brahms/haoy/test/pbmc_150k_sparse/') + meta <- readRDS('/brahms/haoy/seurat5/S5_object/ParseBio_PBMC_meta_100K.rds') + meta$disease <- sample(c('H','D'), nrow(meta), replace = T) + #mat <- open_matrix_dir('/brahms/haoy/test/pbmc_ParseBio_sparse//') + #meta <- readRDS('/brahms/haoy/seurat5/S5_object/ParseBio_PBMC_meta.rds') + }) ``` @@ -50,6 +31,12 @@ options(Seurat.object.assay.version = "v5", Seurat.object.assay.calcn = T) time1_normalize <- system.time({ object <- CreateSeuratObject(counts = mat, meta.data = meta) object <- NormalizeData(object, verbose = FALSE) + + # object[['RNA']]$data <- write_matrix_dir( + # mat = object[['RNA']]$data, + # dir = '/brahms/haoy/test/pbmc_ParseBio_sparse_data' + # ) + #object[['RNA']]$data <- open_matrix_dir(dir = '/brahms/haoy/test/pbmc_ParseBio_sparse_data') }) @@ -65,7 +52,6 @@ time3_FindVariable <- system.time({ ) time4_LeverageScoreSampling <- system.time({ - object <- LeverageScore(object, verbose = FALSE) object <- LeverageScoreSampling(object = object, ncells = 5000, cast = 'dgCMatrix', verbose = FALSE) }) @@ -82,7 +68,7 @@ time5_SketchIntegration <- system.time({ object <- RunPCA(object, features = features, verbose = F) DefaultAssay(object) <- 'sketch' options(future.globals.maxSize = 3e9) - object <- IntegrateLayers(object, + object <- IntegrateLayers(object, method = RPCAIntegration, orig = 'pca', new.reduction = 'integrated.rpca', @@ -110,7 +96,7 @@ time6_UnSketch <- system.time({ orig = 'RNA', reduction = 'integrated.rpca' , layers = Layers(object = object[['RNA']], search = 'data'), - features = features ) + features = features) }) @@ -120,22 +106,60 @@ time6_UnSketch <- system.time({ ```{r} object <- RunUMAP(object, reduction = 'integrated.rpca.orig', dims = 1:30 , reduction.name = 'umap.orig', reduction.key = 'Uorig_') ``` +```{r} +p1<- DimPlot(object, reduction = 'umap.orig', group.by = 'sample',alpha = 0.5) + NoLegend() +p2<- DimPlot(object, reduction = 'umap.orig', group.by = 'celltype.weight', label = T, alpha = 0.5) + NoLegend() +p1+p2 + +``` +## save object +```{r} +#time7_saveRDS <- system.time(saveRDS(object, "/brahms/haoy/test/pbmc_ParseBio_seurat.rds")) +``` ## pseudo-bulk ```{r} -time8_bulk <- system.time( bulk <- AverageExpression(object, + +time8_bulk <- system.time( bulk <- AggregateExpression(object, return.seurat = T, slot = 'counts', assays = 'RNA', - group.by = c("celltype.weight","sample") + group.by = c("celltype.weight","sample","disease") ) ) -marker <- FindAllMarkers(object = bulk, only.pos = TRUE, verbose = FALSE) -marker %>% - group_by(cluster) %>% - top_n(n = -5, wt = p_val) -> top5 -bulk <- ScaleData(bulk, features = top5$gene) +bulk$celltype <- sapply(strsplit(Cells(bulk), split = "_"), '[', 1) +bulk$donor <- sapply(strsplit(Cells(bulk), split = "_"), '[', 2) +bulk$disease <- sapply(strsplit(Cells(bulk), split = "_"), '[', 3) + +marker.list <- list() +celltype.set <- unique(bulk$celltype ) +for (i in seq_along(celltype.set)) { + bulk.i <- subset(bulk, subset = celltype == celltype.set[i]) + Idents(bulk.i) <- 'disease' + if (any(table(bulk.i$disease) < 3)) { + marker.list[[i]] <- EmptyDF(n = 0) + } else { + marker.list[[i]] <- FindMarkers(bulk.i, ident.1 = 'D',ident.2 = 'H', slot = 'counts', test.use = 'DESeq2', verbose = F ) + } +} +names(marker.list) <- celltype.set +``` + +```{r} +marker.list.filter <- lapply(marker.list, function(x) { + if(nrow(x) > 0) { + x <- x[x$p_val_adj < 0.01 & !is.na(x$p_val_adj ),] + } + if (nrow(x) > 0) { + return(x) + } +}) + +``` + +```{r} +VlnPlot(bulk, features = 'FOXO3', group.by = 'celltype', split.by = 'disease') ``` ## computing time summary @@ -155,12 +179,17 @@ for (i in 1:length(all_T)) { print(paste('Total time ', round(overall, digits = 1), 'mins' )) ``` -```{r} -p1<- DimPlot(object, reduction = 'umap.orig', group.by = 'sample',alpha = 0.1) + NoLegend() -p2<- DimPlot(object, reduction = 'umap.orig', group.by = 'celltype.weight', label = T, alpha = 0.1) + NoLegend() -p1+p2 -``` + + ```{r,fig.height = 20, fig.width = 15} + +Idents(bulk) <- 'celltype' +marker <- FindAllMarkers(object = bulk, only.pos = TRUE, verbose = FALSE) +marker %>% + group_by(cluster) %>% + top_n(n = -5, wt = p_val) -> top5 +bulk <- ScaleData(bulk, features = top5$gene) DoHeatmap(bulk, features = top5$gene) + NoLegend() + ``` From 7118ac902889e019be21e9f8adabd14e090ff2fd Mon Sep 17 00:00:00 2001 From: Gesmira Date: Fri, 24 Feb 2023 16:44:01 -0500 Subject: [PATCH 492/979] trycatch for removing layers in diet seurat --- R/objects.R | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/R/objects.R b/R/objects.R index 5a396d98d..c0d07bd01 100644 --- a/R/objects.R +++ b/R/objects.R @@ -567,7 +567,15 @@ DietSeurat <- function( abort(message = "Cannot remove both 'counts' and 'data' from v3 Assays") } for (lyr in layers.rm) { - object[[assay]][[lyr]] <- NULL + object <- tryCatch(expr = { + object[[assay]][[lyr]] <- NULL + object + }, error = function(e) { + slot(object = object[[assay]], name = lyr) <- new(Class = "dgCMatrix") + message("Converting layer ", lyr, " in assay ", + assay, " to empty dgCMatrix") + object + }) } } if (!is.null(x = features)) { From 9afe20b582cce4d005350032445ef6bb9e2ed032 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Sat, 25 Feb 2023 11:52:49 -0500 Subject: [PATCH 493/979] remove atomic and bridge integration vignettes --- vignettes/atomic_integration.Rmd | 192 ------------ vignettes/bridge_integration_vignette.Rmd | 286 ------------------ vignettes/seurat5_atomic_integration.Rmd | 192 ------------ .../seurat5_bridge_integration_vignette.Rmd | 286 ------------------ 4 files changed, 956 deletions(-) delete mode 100644 vignettes/atomic_integration.Rmd delete mode 100644 vignettes/bridge_integration_vignette.Rmd delete mode 100644 vignettes/seurat5_atomic_integration.Rmd delete mode 100644 vignettes/seurat5_bridge_integration_vignette.Rmd diff --git a/vignettes/atomic_integration.Rmd b/vignettes/atomic_integration.Rmd deleted file mode 100644 index 38e695c01..000000000 --- a/vignettes/atomic_integration.Rmd +++ /dev/null @@ -1,192 +0,0 @@ ---- -title: "Atomic sketch integration for scRNA-seq data" -output: - html_document: - df_print: paged ---- - -```{r setup, include=FALSE} -all_times <- list() # store the time for each chunk -knitr::knit_hooks$set(time_it = local({ - now <- NULL - function(before, options) { - if (before) { - now <<- Sys.time() - } else { - res <- difftime(Sys.time(), now, units = "secs") - all_times[[options$label]] <<- res - } - } -})) -knitr::opts_chunk$set( - tidy = TRUE, - tidy.opts = list(width.cutoff = 95), - fig.width = 10, - message = FALSE, - warning = FALSE, - time_it = TRUE, - error = TRUE -) -``` - - -The recent increase in publicly available single-cell datasets poses a significant challenge for integrative analysis. For example, multiple tissues have now been profiled across dozens of studies, representing hundreds of individuals and millions of cells. In [Hao et al, 2022](https://www.biorxiv.org/content/10.1101/2022.02.24.481684v1) proposed a dictionary learning based method, atomic sketch integration, could also enable efficient and large-scale integrative analysis. Our procedure enables the integration of large compendiums of datasets without ever needing to load the full scale of data into memory at once. In [our manuscript](https://www.biorxiv.org/content/10.1101/2022.02.24.481684v1) we use atomic sketch integration to integrate millions of scRNA-seq from human lung and human PBMC. - -In this vignette, we demonstrate how to use atomic sketch integration to harmonize scRNA-seq experiments from five studies, each profiling of human immune cells (PBMC) from COVID patients. Specifically, we demonstrate how to perform the following steps - -* Sample a representative subset of cells ('atoms') from each dataset -* Integrate the atoms from each dataset -* Reconstruct (integrate) the full datasets, based on the atoms - -First, we install the updated version of Seurat that supports this infrastructure, as well as other packages necessary for this vignette. - -```{r install, eval=FALSE} -if (!requireNamespace("remotes", quietly = TRUE)) { - install.packages("remotes") -} -remotes::install_github("satijalab/seurat", "feat/dictionary") -``` - -```{r message=FALSE, warning=FALSE} -library(Seurat) -library(SeuratDisk) -library(patchwork) -``` - -## Downloading datasets - -We obtained datasets in h5seurat format from a public [resource compiled by the Gottardo Lab](https://atlas.fredhutch.org/fredhutch/covid/). In this analysis, we use the [Arunachalam](https://s3.us-west-2.amazonaws.com/atlas.fredhutch.org/data/hutch/covid19/downloads/arunachalam_2020_processed.HDF5), [Combes](https://s3.us-west-2.amazonaws.com/atlas.fredhutch.org/data/hutch/covid19/downloads/combes_2021_processed.HDF5), [Lee](https://s3.us-west-2.amazonaws.com/atlas.fredhutch.org/data/hutch/covid19/downloads/lee_2020_processed.HDF5), [Wilk](https://s3.us-west-2.amazonaws.com/atlas.fredhutch.org/data/hutch/covid19/downloads/wilk_2020_processed.HDF5), and [Yao](https://s3.us-west-2.amazonaws.com/atlas.fredhutch.org/data/hutch/covid19/downloads/yao_2021_processed.HDF5) datasets, but you can download additional data from this resource and include it in the vignette below. - -## Sample representative atoms from each dataset - -Inspired by pioneering work aiming to identify ['sketches'](https://www.sciencedirect.com/science/article/pii/S2405471219301528) of scRNA-seq data, our first step is to sample a representative set of cells from each dataset. We compute a leverage score (estimate of ['statistical leverage'](https://arxiv.org/abs/1109.3843)) for each cell, which helps to identify cells that are likely to be member of rare subpopulations and ensure that these are included in our representative sample. Importantly, the estimation of leverage scores only requires data normalization, can be computed efficiently for sparse datasets, and does not require any intensive computation or dimensional reduction steps. - -We load each object separately, perform basic preprocessing (normalization and variable feature selection), and select and store 5,000 representative cells (which we call 'atoms') from each dataset. We then delete the full dataset from memory, before loading the next one in. - -```{r init, results='hide', message=FALSE, fig.keep='none'} - -file.dir <- '../data/PBMCVignette/' -files.set <- c("arunachalam_2020_processed.HDF5", "combes_2021_processed.HDF5","lee_2020_processed.HDF5","wilk_2020_processed.HDF5","yao_2021_processed.HDF5") - -atoms.list <- list() -for (i in 1:length(files.set)) { - - # load in Seurat object - object <- LoadH5Seurat(file = paste0(file.dir ,files.set[i]), assays = 'RNA') - dataset_name <- gsub("_processed.HDF5", "", files.set[i]) - object$dataset <- dataset_name - - # Rename cells to avoid future conflicts - object <- RenameCells(object = object, add.cell.id = dataset_name) - - # basic preprocessing - object <- NormalizeData(object) - object <- FindVariableFeatures(object) - - # calculate leverage score and sample 5000 cells based on leverage score - atoms.i <- LeverageScoreSampling(object = object, num.cells = 5000) - atoms.list[[i]] <- atoms.i -} - -# delete full object from memory -# note that this is optional, if you can store the full datasets in memory, you dont have to reload them later -rm(object) -``` - - -## Perform integration on the atoms from different datasets - -Next we perform integrative analysis on the 'atoms' from each of the datasets. Here, we utilize a new wrapper function that takes a list of Seurat object and runs an optimized version of the [Fast integration using reciprocal PCA](https://satijalab.org/seurat/articles/integration_rpca.html) in Seurat workflow. The function performs all corrections in low-dimensional space (rather than on the expression values themselves) to further improve speed and memory usage, and outputs a merged Seurat object where all cells have been placed in an integrated low-dimensional space (stored as `integrated_dr`). We perform SCTransform normalization prior to performing integration, but this step is optional. - -However, we emphasize that you can perform integration here using any analysis technique that places cells across datasets into a shared space. For example, we also demonstrate below how to use [Harmony](https://github.com/immunogenomics/harmony), as an alternative integration approach. - - -```{r fast.integration} -# optional step: SCTransform normalization -for (i in 1:length(atoms.list)) { - atoms.list[[i]] <- SCTransform(atoms.list[[i]], verbose = FALSE) -} - -# perform integration -features <- SelectIntegrationFeatures(object.list = atoms.list) -atoms.merge <- FastRPCAIntegration(object.list = atoms.list, dims = 1:30, normalization.method = 'SCT', anchor.features = features) - -# we can generate a 2D visualization representing the integrated atoms -atom.reduction <- 'integrated_dr' -atoms.merge <- RunUMAP(atoms.merge, reduction = atom.reduction, dims = 1:30, return.model = TRUE) -DimPlot(atoms.merge, group.by = 'dataset') -``` - -
    - **Alternative: integrate atoms using Harmony** - -As an alternative approach to integrate atoms, and to demonstrate the flexibility of our atomic sketch procedure, we can also use the [Harmony within the Seurat workflow](https://github.com/immunogenomics/harmony) to integrate the atoms. The integration procedure returns a Seurat object with a low-dimensional space (stored as the `harmony` dimensional reduction) that jointly represents atoms from all datasets. - -```{r, eval = FALSE} -library(harmony) -atoms.merge <- merge(atoms.list[[1]], atoms.list[2:length(atoms.list)]) -VariableFeatures(atoms.merge) <- SelectIntegrationFeatures(object.list = atoms.list) -atoms.merge <- ScaleData(atoms.merge) -atoms.merge <- RunPCA(atoms.merge) -atoms.merge <- RunHarmony(atoms.merge, project.dim = FALSE, group.by.vars = 'dataset') -atom.reduction <- 'harmony' - -atoms.merge <- RunUMAP(atoms.merge, reduction = atom.reduction, dims = 1:30, return.model = TRUE) -DimPlot(atoms.merge, group.by = 'dataset') -``` - -
    - ---- - -## Integrate all cells from all datasets - -Now that we have integrated the subset of atoms of each dataset, placing them each in an integrated low-dimensional space, we can now place each cell from each dataset in this space as well. We load the full datasets back in individually, and use the `IntegrateSketchEmbeddings` function to integrate all cells. After this function is run, each cell in the object has a - -```{r load.full.data} -integrated_objects <- list() -for (i in 1:length(files.set)) { - - # load in Seurat object / basic preprocessing - object <- LoadH5Seurat(file = paste0(file.dir , files.set[i]), assays = 'RNA') - dataset_name <- gsub("_processed.HDF5", "", files.set[i]) - object$dataset <- dataset_name - object <- RenameCells(object = object, add.cell.id = dataset_name) - object <- NormalizeData(object) - - # Integrate all cells into the same space as the atoms - object <- IntegrateSketchEmbeddings(object = object, atom.sketch.object = atoms.merge, atom.sketch.reduction = atom.reduction, features = features) - - # At this point, you can save the results/delete the object - # Since we want to compute a joint visualization of all cells later, - # we save the object with the dimensional reduction and just the top 100 variable features - object <- DietSeurat(object, features = features[1:100], dimreducs = 'integrated_dr') - integrated_objects[[i]] <- object - rm(object) -} -``` - -We perform UMAP visualization on the integrated embeddings. -```{r } -obj.merge <- merge(integrated_objects[[1]], integrated_objects[2:length(integrated_objects)], merge.dr = 'integrated_dr') -obj.merge <- RunUMAP(obj.merge, reduction = 'integrated_dr', dims = 1:30) -``` - -Now we can visualize the results, plotting the scRNA-seq cells based on dataset batches and pre-annotated labels annotations on the UMAP embedding. We also add pre-computed cell annotations to this object (you can download the cell annotation metadata at [this link](https://seurat.nygenome.org/vignette_data/atomic_integration/pbmc_annotations.txt)). - -```{r split.dim} -annotation_data <- read.table("../data/PBMCVignette/pbmc_annotations.txt") -obj.merge <- AddMetaData(obj.merge, metadata = annotation_data) -DimPlot(obj.merge, reduction = "umap", group.by = "dataset", shuffle = TRUE, raster = FALSE) -DimPlot(obj.merge, reduction = "umap", group.by = "celltype.l2", raster = FALSE) -``` - -Note that Neutrophils are present primarily in a single dataset (Combes), present at very low frequency in two others (Wilk and Lee), and absent in the remaining datasets. Despite the fact that this population is not present in all samples, it is correctly integrated by our atomic sketch procedure. - - -
    - **Session Info** -```{r} -sessionInfo() -``` -
    diff --git a/vignettes/bridge_integration_vignette.Rmd b/vignettes/bridge_integration_vignette.Rmd deleted file mode 100644 index bdbdf94a8..000000000 --- a/vignettes/bridge_integration_vignette.Rmd +++ /dev/null @@ -1,286 +0,0 @@ ---- -title: "Dictionary Learning for cross-modality integration" -output: - html_document: - theme: united - df_print: kable - pdf_document: default -date: 'Compiled: `r format(Sys.Date(), "%B %d, %Y")`' ---- - -```{r setup, include=FALSE} -all_times <- list() # store the time for each chunk -knitr::knit_hooks$set(time_it = local({ - now <- NULL - function(before, options) { - if (before) { - now <<- Sys.time() - } else { - res <- difftime(Sys.time(), now) - all_times[[options$label]] <<- res - } - } -})) -knitr::opts_chunk$set( - message = FALSE, - warning = FALSE, - time_it = TRUE, - error = TRUE -) -``` - - -In the same way that read mapping tools have transformed genome sequence analysis, the ability to map new datasets to established references represents an exciting opportunity for the field of single-cell genomics. Along with others in the community, we have developed [tools to map and interpret query datasets](https://satijalab.org/seurat/articles/multimodal_reference_mapping.html), and have also constructed a [set of scRNA-seq datasets for diverse mammalian tissues](http://azimuth.hubmapconsortium.org). - -A key challenge is to extend this reference mapping framework to technologies that do not measure gene expression, even if the underlying reference is based on scRNA-seq. In [Hao et al, bioRxiv 2022](https://www.biorxiv.org/content/10.1101/2022.02.24.481684v1), we introduce 'bridge integration', which enables the mapping of complementary technologies (like scATAC-seq, scDNAme, CyTOF), onto scRNA-seq references, using a 'multi-omic' dataset as a molecular bridge. In this vignette, we demonstrate how to map an scATAC-seq dataset of human PBMC, onto our previously constructed [PBMC reference](https://azimuth.hubmapconsortium.org/references/human_pbmc/). We use a publicly available 10x multiome dataset, which simultaneously measures gene expression and chromatin accessibility in the same cell, as a bridge dataset. - -In this vignette we demonstrate: - -* Loading in and pre-processing the scATAC-seq, multiome, and scRNA-seq reference datasets -* Mapping the scATAC-seq dataset via bridge integration -* Exploring and assessing the resulting annotations - -First, we install the updated version of Seurat that supports this infrastructure, as well as other packages necessary for this vignette. - -```{r, message=FALSE, warning=FALSE} -library(remotes) -remotes::install_github("satijalab/seurat", "feat/dictionary", quiet = TRUE) -library(Seurat) -library(SeuratDisk) -library(Signac) -library(EnsDb.Hsapiens.v86) -library(dplyr) -library(ggplot2) -``` - -## Load the bridge, query, and reference datasets - -We start by loading a 10x multiome dataset, consisting of ~12,000 PBMC from a healthy donor. The dataset measures RNA-seq and ATAC-seq in the same cell, and is available for download from 10x Genomics [here](https://www.10xgenomics.com/resources/datasets/pbmc-from-a-healthy-donor-granulocytes-removed-through-cell-sorting-10-k-1-standard-2-0-0). We follow the loading instructions from the [Signac package vignettes](https://satijalab.org/signac/articles/pbmc_multiomic.html). Note that when using Signac, please make sure you are using the [latest version of Bioconductor]([http://www.bioconductor.org/news/bioc_3_14_release/]), as [users have reported errors](https://github.com/timoast/signac/issues/687) when using older BioC versions. - -
    - **Load and setup the 10x multiome object** - -```{r} -# the 10x hdf5 file contains both data types. -inputdata.10x <- Read10X_h5("../data/pbmc_granulocyte_sorted_10k_filtered_feature_bc_matrix.h5") -# extract RNA and ATAC data -rna_counts <- inputdata.10x$`Gene Expression` -atac_counts <- inputdata.10x$Peaks -# Create Seurat object -obj.multi <- CreateSeuratObject(counts = rna_counts) -# Get % of mitochondrial genes -obj.multi[["percent.mt"]] <- PercentageFeatureSet(obj.multi, pattern = "^MT-") - -# add the ATAC-seq assay -grange.counts <- StringToGRanges(rownames(atac_counts), sep = c(":", "-")) -grange.use <- seqnames(grange.counts) %in% standardChromosomes(grange.counts) -atac_counts <- atac_counts[as.vector(grange.use), ] - -# Get gene annotations -annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86) -# Change style to UCSC -seqlevelsStyle(annotations) <- 'UCSC' -genome(annotations) <- "hg38" - -# File with ATAC per fragment information file -frag.file <- "../data/pbmc_granulocyte_sorted_10k_atac_fragments.tsv.gz" - -# Add in ATAC-seq data as ChromatinAssay object -chrom_assay <- CreateChromatinAssay( - counts = atac_counts, - sep = c(":", "-"), - genome = 'hg38', - fragments = frag.file, - min.cells = 10, - annotation = annotations -) - -# Add the ATAC assay to the multiome object -obj.multi[["ATAC"]] <- chrom_assay - -# Filter ATAC data based on QC metrics -obj.multi <- subset( - x = obj.multi, - subset = nCount_ATAC < 7e4 & - nCount_ATAC > 5e3 & - nCount_RNA < 25000 & - nCount_RNA > 1000 & - percent.mt < 20 -) - - -``` -
    - ---- - -The scATAC-seq query dataset represents ~10,000 PBMC from a healthy donor, and is available for download [here](https://www.10xgenomics.com/resources/datasets/10-k-human-pbm-cs-atac-v-1-1-chromium-x-1-1-standard-2-0-0). We load in the peak/cell matrix, store the path to the fragments file, and add gene annotations to the object, following the steps as with the ATAC data in the multiome experiment. - -We note that it is important to quantify the same set of genomic features in the query dataset as are quantified in the multi-omic bridge. We therefore requantify the set of scATAC-seq peaks using the `FeatureMatrix` command. This is also described in the [Signac vignettes](https://satijalab.org/signac/articles/integrate_atac.html) and shown below. - -
    - **Load and setup the 10x scATAC-seq query** - -```{r, message=FALSE, warning=FALSE} -# Load ATAC dataset -atac_pbmc_data <- Read10X_h5(filename = "../data/10k_PBMC_ATAC_nextgem_Chromium_X_filtered_peak_bc_matrix.h5") -fragpath <- "../data/10k_PBMC_ATAC_nextgem_Chromium_X_fragments.tsv.gz" - -# Get gene annotations -annotation <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86) - -# Change to UCSC style -seqlevelsStyle(annotation) <- 'UCSC' - -# Create ChromatinAssay for ATAC data -atac_pbmc_assay <- CreateChromatinAssay( - counts = atac_pbmc_data, - sep = c(":", "-"), - fragments = fragpath, - annotation = annotation -) - -# Requantify query ATAC to have same features as multiome ATAC dataset -requant_multiome_ATAC <- FeatureMatrix( - fragments = Fragments(atac_pbmc_assay), - features = granges(obj.multi[['ATAC']]), - cells = Cells(atac_pbmc_assay) -) - -# Create assay with requantified ATAC data -ATAC_assay <- CreateChromatinAssay( - counts = requant_multiome_ATAC, - fragments = fragpath, - annotation = annotation -) - -# Create Seurat sbject -obj.atac <- CreateSeuratObject(counts = ATAC_assay,assay = 'ATAC') -obj.atac[['peak.orig']] <- atac_pbmc_assay -obj.atac <- subset(obj.atac, subset = nCount_ATAC < 7e4 & nCount_ATAC > 2000) -``` -
    - ---- - -We load the reference (download [here](https://atlas.fredhutch.org/data/nygc/multimodal/pbmc_multimodal.h5seurat)) from our recent [paper](https://doi.org/10.1016/j.cell.2021.04.048). This reference is stored as an h5Seurat file, a format that enables on-disk storage of multimodal Seurat objects (more details on h5Seurat and `SeuratDisk` can be found [here](https://mojaveazure.github.io/seurat-disk/index.html)). - -```{r pbmc.ref} -obj.rna <- LoadH5Seurat("../data/pbmc_multimodal.h5seurat") -``` -
    - **What if I want to use my own reference dataset?** - -As an alternative to using a pre-built reference, you can also use your own reference. To demonstrate, you can download a scRNA-seq dataset of 23,837 human PBMC [here](https://www.dropbox.com/s/x8mu9ye2w3a63hf/20k_PBMC_scRNA.rds?dl=0), which we have already annotated. -```{r, message=FALSE, warning=FALSE, eval=FALSE} -obj.rna = readRDS("/path/to/reference.rds") -obj.rna = SCTransform(object = obj.rna) %>% RunPCA() %>% RunUMAP(dims = 1:50, return.model = TRUE) -``` -When using your own reference, set `reference.reduction = "pca"` in the `PrepareBridgeReference` function. - -
    - ---- - -# Preprocessing/normalization for all datasets - -Prior to performing bridge integration, we normalize and pre-process each of the datasets (note that the reference has already been normalized). We normalize gene expression data using [sctransform](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1874-1), and ATAC data using TF-IDF. - -```{r, message=FALSE, warning=FALSE} -# normalize multiome RNA -DefaultAssay(obj.multi) <- "RNA" -obj.multi <- SCTransform(obj.multi, verbose = FALSE) - -# normalize multiome ATAC -DefaultAssay(obj.multi) <- "ATAC" -obj.multi <- RunTFIDF(obj.multi) -obj.multi <- FindTopFeatures(obj.multi, min.cutoff = "q0") - -# normalize query -obj.atac <- RunTFIDF(obj.atac) - -``` - -## Map scATAC-seq dataset using bridge integration - -Now that we have the reference, query, and bridge datasets set up, we can begin integration. The bridge dataset enables translation between the scRNA-seq reference and the scATAC-seq query, effectively augmenting the reference so that it can map a new data type. We call this an extended reference, and first set it up. Note that you can save the results of this function and map multiple scATAC-seq datasets without having to rerun. - - -```{r, message=FALSE, warning=FALSE} -# Drop first dimension for ATAC reduction -dims.atac <- 2:50 -dims.rna <- 1:50 -DefaultAssay(obj.multi) <- "RNA" -DefaultAssay(obj.rna) <- "SCT" -obj.rna.ext <- PrepareBridgeReference(reference = obj.rna, - bridge = obj.multi, - reference.reduction = "spca", - reference.dims = dims.rna, - normalization.method = "SCT" -) -``` - -Now, we can directly find anchors between the extended reference and query objects. We use the `FindBridgeTransferAnchors` function, which translates the query dataset using the same dictionary as was used to translate the reference, and then identifies anchors in this space. The function is meant to mimic our `FindTransferAnchors` function, but to identify correspondences across modalities. - -```{r, message=FALSE, warning=FALSE} -bridge.anchor <- FindBridgeTransferAnchors(extended.reference = obj.rna.ext, - query = obj.atac, - reduction = "lsiproject", - dims = dims.atac -) -``` - - -Once we have identified anchors, we can map the query dataset onto the reference. The `MapQuery` function is the same as we have [previously introduced for reference mapping](https://satijalab.org/seurat/articles/multimodal_reference_mapping.html) . It transfers cell annotations from the reference dataset, and also visualizes the query dataset on a previously computed UMAP embedding. Since our reference dataset contains cell type annotations at three levels of resolution (l1 - l3), we can transfer each level to the query dataset. - - -```{r, message=FALSE, warning=FALSE} - -obj.atac <- MapQuery(anchorset = bridge.anchor, - reference = obj.rna, - query = obj.atac, - refdata = list( - l1 = "celltype.l1", - l2 = "celltype.l2", - l3 = "celltype.l3"), - reduction.model = "wnn.umap" -) -``` - -Now we can visualize the results, plotting the scATAC-seq cells based on their predicted annotations, on the reference UMAP embedding. You can see that each scATAC-seq cell has been assigned a cell name based on the scRNA-seq defined cell ontology. - -```{r, message=FALSE, warning=FALSE} -DimPlot(obj.atac, group.by = "predicted.l2", reduction = "ref.umap", label = TRUE) + ggtitle("ATAC") + NoLegend() -``` - -## Assessing the mapping - -To assess the mapping and cell type predictions, we will first see if the predicted cell type labels are concordant with an unsupervised analysis of the scATAC-seq dataset. We follow the standard unsupervised processing workflow for scATAC-seq data: - -```{r, message=FALSE, warning=FALSE} -obj.atac <- FindTopFeatures(obj.atac, min.cutoff = "q0") -obj.atac <- RunSVD(obj.atac) -obj.atac <- RunUMAP(obj.atac, reduction = "lsi", dims = 2:50) -``` - -Now, we visualize the predicted cluster labels on the unsupervised UMAP emebdding. We can see that predicted cluster labels (from the scRNA-seq reference) are concordant with the structure of the scATAC-seq data. However, there are some cell types (i.e. Treg), that do not appear to separate in unsupervised analysis. These may be prediction errors, or cases where the reference mapping provides additional resolution. - -```{r, pbmcdimplots, message=FALSE, warning=FALSE} -DimPlot(obj.atac, group.by = "predicted.l2", reduction = "umap", label = FALSE) -``` - -Lastly, we validate the predicted cell types for the scATAC-seq data by examining their chromatin accessibility profiles at canonical loci. We use the `CoveragePlot` function to visualize accessibility patterns at the CD8A, FOXP3, and RORC, after grouping cells by their predicted labels. We see expected patterns in each case. For example, the PAX5 locus exhibits peaks that are accessible exclusively in B cells, and the CD8A locus shows the same in CD8 T cell subsets. Similarly, the accessibility of FOXP3, a canonical marker of regulatory T cells (Tregs), in predicted Tregs provides strong support for the accuracy of our prediction. - -```{r, message=FALSE, warning=FALSE} -CoveragePlot(obj.atac, region = "PAX5", group.by = "predicted.l1", idents = c("B", "CD4 T", "Mono", "NK"), window = 200, extend.upstream = -150000) -CoveragePlot(obj.atac, region = "CD8A", group.by = "predicted.l2", idents = c("CD8 Naive", "CD4 Naive", "CD4 TCM", "CD8 TCM"), extend.downstream = 5000, extend.upstream = 5000) -CoveragePlot(obj.atac, region = "FOXP3", group.by = "predicted.l2", idents = c( "CD4 Naive", "CD4 TCM", "CD4 TEM", "Treg"), extend.downstream = 0, extend.upstream = 0) -CoveragePlot(obj.atac, region = "RORC", group.by = "predicted.l2", idents = c("CD8 Naive", "CD8 TEM", "CD8 TCM", "MAIT"), extend.downstream = 5000, extend.upstream = 5000) -``` - -
    - **Session Info** -```{r} -sessionInfo() -``` -
    diff --git a/vignettes/seurat5_atomic_integration.Rmd b/vignettes/seurat5_atomic_integration.Rmd deleted file mode 100644 index 38e695c01..000000000 --- a/vignettes/seurat5_atomic_integration.Rmd +++ /dev/null @@ -1,192 +0,0 @@ ---- -title: "Atomic sketch integration for scRNA-seq data" -output: - html_document: - df_print: paged ---- - -```{r setup, include=FALSE} -all_times <- list() # store the time for each chunk -knitr::knit_hooks$set(time_it = local({ - now <- NULL - function(before, options) { - if (before) { - now <<- Sys.time() - } else { - res <- difftime(Sys.time(), now, units = "secs") - all_times[[options$label]] <<- res - } - } -})) -knitr::opts_chunk$set( - tidy = TRUE, - tidy.opts = list(width.cutoff = 95), - fig.width = 10, - message = FALSE, - warning = FALSE, - time_it = TRUE, - error = TRUE -) -``` - - -The recent increase in publicly available single-cell datasets poses a significant challenge for integrative analysis. For example, multiple tissues have now been profiled across dozens of studies, representing hundreds of individuals and millions of cells. In [Hao et al, 2022](https://www.biorxiv.org/content/10.1101/2022.02.24.481684v1) proposed a dictionary learning based method, atomic sketch integration, could also enable efficient and large-scale integrative analysis. Our procedure enables the integration of large compendiums of datasets without ever needing to load the full scale of data into memory at once. In [our manuscript](https://www.biorxiv.org/content/10.1101/2022.02.24.481684v1) we use atomic sketch integration to integrate millions of scRNA-seq from human lung and human PBMC. - -In this vignette, we demonstrate how to use atomic sketch integration to harmonize scRNA-seq experiments from five studies, each profiling of human immune cells (PBMC) from COVID patients. Specifically, we demonstrate how to perform the following steps - -* Sample a representative subset of cells ('atoms') from each dataset -* Integrate the atoms from each dataset -* Reconstruct (integrate) the full datasets, based on the atoms - -First, we install the updated version of Seurat that supports this infrastructure, as well as other packages necessary for this vignette. - -```{r install, eval=FALSE} -if (!requireNamespace("remotes", quietly = TRUE)) { - install.packages("remotes") -} -remotes::install_github("satijalab/seurat", "feat/dictionary") -``` - -```{r message=FALSE, warning=FALSE} -library(Seurat) -library(SeuratDisk) -library(patchwork) -``` - -## Downloading datasets - -We obtained datasets in h5seurat format from a public [resource compiled by the Gottardo Lab](https://atlas.fredhutch.org/fredhutch/covid/). In this analysis, we use the [Arunachalam](https://s3.us-west-2.amazonaws.com/atlas.fredhutch.org/data/hutch/covid19/downloads/arunachalam_2020_processed.HDF5), [Combes](https://s3.us-west-2.amazonaws.com/atlas.fredhutch.org/data/hutch/covid19/downloads/combes_2021_processed.HDF5), [Lee](https://s3.us-west-2.amazonaws.com/atlas.fredhutch.org/data/hutch/covid19/downloads/lee_2020_processed.HDF5), [Wilk](https://s3.us-west-2.amazonaws.com/atlas.fredhutch.org/data/hutch/covid19/downloads/wilk_2020_processed.HDF5), and [Yao](https://s3.us-west-2.amazonaws.com/atlas.fredhutch.org/data/hutch/covid19/downloads/yao_2021_processed.HDF5) datasets, but you can download additional data from this resource and include it in the vignette below. - -## Sample representative atoms from each dataset - -Inspired by pioneering work aiming to identify ['sketches'](https://www.sciencedirect.com/science/article/pii/S2405471219301528) of scRNA-seq data, our first step is to sample a representative set of cells from each dataset. We compute a leverage score (estimate of ['statistical leverage'](https://arxiv.org/abs/1109.3843)) for each cell, which helps to identify cells that are likely to be member of rare subpopulations and ensure that these are included in our representative sample. Importantly, the estimation of leverage scores only requires data normalization, can be computed efficiently for sparse datasets, and does not require any intensive computation or dimensional reduction steps. - -We load each object separately, perform basic preprocessing (normalization and variable feature selection), and select and store 5,000 representative cells (which we call 'atoms') from each dataset. We then delete the full dataset from memory, before loading the next one in. - -```{r init, results='hide', message=FALSE, fig.keep='none'} - -file.dir <- '../data/PBMCVignette/' -files.set <- c("arunachalam_2020_processed.HDF5", "combes_2021_processed.HDF5","lee_2020_processed.HDF5","wilk_2020_processed.HDF5","yao_2021_processed.HDF5") - -atoms.list <- list() -for (i in 1:length(files.set)) { - - # load in Seurat object - object <- LoadH5Seurat(file = paste0(file.dir ,files.set[i]), assays = 'RNA') - dataset_name <- gsub("_processed.HDF5", "", files.set[i]) - object$dataset <- dataset_name - - # Rename cells to avoid future conflicts - object <- RenameCells(object = object, add.cell.id = dataset_name) - - # basic preprocessing - object <- NormalizeData(object) - object <- FindVariableFeatures(object) - - # calculate leverage score and sample 5000 cells based on leverage score - atoms.i <- LeverageScoreSampling(object = object, num.cells = 5000) - atoms.list[[i]] <- atoms.i -} - -# delete full object from memory -# note that this is optional, if you can store the full datasets in memory, you dont have to reload them later -rm(object) -``` - - -## Perform integration on the atoms from different datasets - -Next we perform integrative analysis on the 'atoms' from each of the datasets. Here, we utilize a new wrapper function that takes a list of Seurat object and runs an optimized version of the [Fast integration using reciprocal PCA](https://satijalab.org/seurat/articles/integration_rpca.html) in Seurat workflow. The function performs all corrections in low-dimensional space (rather than on the expression values themselves) to further improve speed and memory usage, and outputs a merged Seurat object where all cells have been placed in an integrated low-dimensional space (stored as `integrated_dr`). We perform SCTransform normalization prior to performing integration, but this step is optional. - -However, we emphasize that you can perform integration here using any analysis technique that places cells across datasets into a shared space. For example, we also demonstrate below how to use [Harmony](https://github.com/immunogenomics/harmony), as an alternative integration approach. - - -```{r fast.integration} -# optional step: SCTransform normalization -for (i in 1:length(atoms.list)) { - atoms.list[[i]] <- SCTransform(atoms.list[[i]], verbose = FALSE) -} - -# perform integration -features <- SelectIntegrationFeatures(object.list = atoms.list) -atoms.merge <- FastRPCAIntegration(object.list = atoms.list, dims = 1:30, normalization.method = 'SCT', anchor.features = features) - -# we can generate a 2D visualization representing the integrated atoms -atom.reduction <- 'integrated_dr' -atoms.merge <- RunUMAP(atoms.merge, reduction = atom.reduction, dims = 1:30, return.model = TRUE) -DimPlot(atoms.merge, group.by = 'dataset') -``` - -
    - **Alternative: integrate atoms using Harmony** - -As an alternative approach to integrate atoms, and to demonstrate the flexibility of our atomic sketch procedure, we can also use the [Harmony within the Seurat workflow](https://github.com/immunogenomics/harmony) to integrate the atoms. The integration procedure returns a Seurat object with a low-dimensional space (stored as the `harmony` dimensional reduction) that jointly represents atoms from all datasets. - -```{r, eval = FALSE} -library(harmony) -atoms.merge <- merge(atoms.list[[1]], atoms.list[2:length(atoms.list)]) -VariableFeatures(atoms.merge) <- SelectIntegrationFeatures(object.list = atoms.list) -atoms.merge <- ScaleData(atoms.merge) -atoms.merge <- RunPCA(atoms.merge) -atoms.merge <- RunHarmony(atoms.merge, project.dim = FALSE, group.by.vars = 'dataset') -atom.reduction <- 'harmony' - -atoms.merge <- RunUMAP(atoms.merge, reduction = atom.reduction, dims = 1:30, return.model = TRUE) -DimPlot(atoms.merge, group.by = 'dataset') -``` - -
    - ---- - -## Integrate all cells from all datasets - -Now that we have integrated the subset of atoms of each dataset, placing them each in an integrated low-dimensional space, we can now place each cell from each dataset in this space as well. We load the full datasets back in individually, and use the `IntegrateSketchEmbeddings` function to integrate all cells. After this function is run, each cell in the object has a - -```{r load.full.data} -integrated_objects <- list() -for (i in 1:length(files.set)) { - - # load in Seurat object / basic preprocessing - object <- LoadH5Seurat(file = paste0(file.dir , files.set[i]), assays = 'RNA') - dataset_name <- gsub("_processed.HDF5", "", files.set[i]) - object$dataset <- dataset_name - object <- RenameCells(object = object, add.cell.id = dataset_name) - object <- NormalizeData(object) - - # Integrate all cells into the same space as the atoms - object <- IntegrateSketchEmbeddings(object = object, atom.sketch.object = atoms.merge, atom.sketch.reduction = atom.reduction, features = features) - - # At this point, you can save the results/delete the object - # Since we want to compute a joint visualization of all cells later, - # we save the object with the dimensional reduction and just the top 100 variable features - object <- DietSeurat(object, features = features[1:100], dimreducs = 'integrated_dr') - integrated_objects[[i]] <- object - rm(object) -} -``` - -We perform UMAP visualization on the integrated embeddings. -```{r } -obj.merge <- merge(integrated_objects[[1]], integrated_objects[2:length(integrated_objects)], merge.dr = 'integrated_dr') -obj.merge <- RunUMAP(obj.merge, reduction = 'integrated_dr', dims = 1:30) -``` - -Now we can visualize the results, plotting the scRNA-seq cells based on dataset batches and pre-annotated labels annotations on the UMAP embedding. We also add pre-computed cell annotations to this object (you can download the cell annotation metadata at [this link](https://seurat.nygenome.org/vignette_data/atomic_integration/pbmc_annotations.txt)). - -```{r split.dim} -annotation_data <- read.table("../data/PBMCVignette/pbmc_annotations.txt") -obj.merge <- AddMetaData(obj.merge, metadata = annotation_data) -DimPlot(obj.merge, reduction = "umap", group.by = "dataset", shuffle = TRUE, raster = FALSE) -DimPlot(obj.merge, reduction = "umap", group.by = "celltype.l2", raster = FALSE) -``` - -Note that Neutrophils are present primarily in a single dataset (Combes), present at very low frequency in two others (Wilk and Lee), and absent in the remaining datasets. Despite the fact that this population is not present in all samples, it is correctly integrated by our atomic sketch procedure. - - -
    - **Session Info** -```{r} -sessionInfo() -``` -
    diff --git a/vignettes/seurat5_bridge_integration_vignette.Rmd b/vignettes/seurat5_bridge_integration_vignette.Rmd deleted file mode 100644 index bdbdf94a8..000000000 --- a/vignettes/seurat5_bridge_integration_vignette.Rmd +++ /dev/null @@ -1,286 +0,0 @@ ---- -title: "Dictionary Learning for cross-modality integration" -output: - html_document: - theme: united - df_print: kable - pdf_document: default -date: 'Compiled: `r format(Sys.Date(), "%B %d, %Y")`' ---- - -```{r setup, include=FALSE} -all_times <- list() # store the time for each chunk -knitr::knit_hooks$set(time_it = local({ - now <- NULL - function(before, options) { - if (before) { - now <<- Sys.time() - } else { - res <- difftime(Sys.time(), now) - all_times[[options$label]] <<- res - } - } -})) -knitr::opts_chunk$set( - message = FALSE, - warning = FALSE, - time_it = TRUE, - error = TRUE -) -``` - - -In the same way that read mapping tools have transformed genome sequence analysis, the ability to map new datasets to established references represents an exciting opportunity for the field of single-cell genomics. Along with others in the community, we have developed [tools to map and interpret query datasets](https://satijalab.org/seurat/articles/multimodal_reference_mapping.html), and have also constructed a [set of scRNA-seq datasets for diverse mammalian tissues](http://azimuth.hubmapconsortium.org). - -A key challenge is to extend this reference mapping framework to technologies that do not measure gene expression, even if the underlying reference is based on scRNA-seq. In [Hao et al, bioRxiv 2022](https://www.biorxiv.org/content/10.1101/2022.02.24.481684v1), we introduce 'bridge integration', which enables the mapping of complementary technologies (like scATAC-seq, scDNAme, CyTOF), onto scRNA-seq references, using a 'multi-omic' dataset as a molecular bridge. In this vignette, we demonstrate how to map an scATAC-seq dataset of human PBMC, onto our previously constructed [PBMC reference](https://azimuth.hubmapconsortium.org/references/human_pbmc/). We use a publicly available 10x multiome dataset, which simultaneously measures gene expression and chromatin accessibility in the same cell, as a bridge dataset. - -In this vignette we demonstrate: - -* Loading in and pre-processing the scATAC-seq, multiome, and scRNA-seq reference datasets -* Mapping the scATAC-seq dataset via bridge integration -* Exploring and assessing the resulting annotations - -First, we install the updated version of Seurat that supports this infrastructure, as well as other packages necessary for this vignette. - -```{r, message=FALSE, warning=FALSE} -library(remotes) -remotes::install_github("satijalab/seurat", "feat/dictionary", quiet = TRUE) -library(Seurat) -library(SeuratDisk) -library(Signac) -library(EnsDb.Hsapiens.v86) -library(dplyr) -library(ggplot2) -``` - -## Load the bridge, query, and reference datasets - -We start by loading a 10x multiome dataset, consisting of ~12,000 PBMC from a healthy donor. The dataset measures RNA-seq and ATAC-seq in the same cell, and is available for download from 10x Genomics [here](https://www.10xgenomics.com/resources/datasets/pbmc-from-a-healthy-donor-granulocytes-removed-through-cell-sorting-10-k-1-standard-2-0-0). We follow the loading instructions from the [Signac package vignettes](https://satijalab.org/signac/articles/pbmc_multiomic.html). Note that when using Signac, please make sure you are using the [latest version of Bioconductor]([http://www.bioconductor.org/news/bioc_3_14_release/]), as [users have reported errors](https://github.com/timoast/signac/issues/687) when using older BioC versions. - -
    - **Load and setup the 10x multiome object** - -```{r} -# the 10x hdf5 file contains both data types. -inputdata.10x <- Read10X_h5("../data/pbmc_granulocyte_sorted_10k_filtered_feature_bc_matrix.h5") -# extract RNA and ATAC data -rna_counts <- inputdata.10x$`Gene Expression` -atac_counts <- inputdata.10x$Peaks -# Create Seurat object -obj.multi <- CreateSeuratObject(counts = rna_counts) -# Get % of mitochondrial genes -obj.multi[["percent.mt"]] <- PercentageFeatureSet(obj.multi, pattern = "^MT-") - -# add the ATAC-seq assay -grange.counts <- StringToGRanges(rownames(atac_counts), sep = c(":", "-")) -grange.use <- seqnames(grange.counts) %in% standardChromosomes(grange.counts) -atac_counts <- atac_counts[as.vector(grange.use), ] - -# Get gene annotations -annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86) -# Change style to UCSC -seqlevelsStyle(annotations) <- 'UCSC' -genome(annotations) <- "hg38" - -# File with ATAC per fragment information file -frag.file <- "../data/pbmc_granulocyte_sorted_10k_atac_fragments.tsv.gz" - -# Add in ATAC-seq data as ChromatinAssay object -chrom_assay <- CreateChromatinAssay( - counts = atac_counts, - sep = c(":", "-"), - genome = 'hg38', - fragments = frag.file, - min.cells = 10, - annotation = annotations -) - -# Add the ATAC assay to the multiome object -obj.multi[["ATAC"]] <- chrom_assay - -# Filter ATAC data based on QC metrics -obj.multi <- subset( - x = obj.multi, - subset = nCount_ATAC < 7e4 & - nCount_ATAC > 5e3 & - nCount_RNA < 25000 & - nCount_RNA > 1000 & - percent.mt < 20 -) - - -``` -
    - ---- - -The scATAC-seq query dataset represents ~10,000 PBMC from a healthy donor, and is available for download [here](https://www.10xgenomics.com/resources/datasets/10-k-human-pbm-cs-atac-v-1-1-chromium-x-1-1-standard-2-0-0). We load in the peak/cell matrix, store the path to the fragments file, and add gene annotations to the object, following the steps as with the ATAC data in the multiome experiment. - -We note that it is important to quantify the same set of genomic features in the query dataset as are quantified in the multi-omic bridge. We therefore requantify the set of scATAC-seq peaks using the `FeatureMatrix` command. This is also described in the [Signac vignettes](https://satijalab.org/signac/articles/integrate_atac.html) and shown below. - -
    - **Load and setup the 10x scATAC-seq query** - -```{r, message=FALSE, warning=FALSE} -# Load ATAC dataset -atac_pbmc_data <- Read10X_h5(filename = "../data/10k_PBMC_ATAC_nextgem_Chromium_X_filtered_peak_bc_matrix.h5") -fragpath <- "../data/10k_PBMC_ATAC_nextgem_Chromium_X_fragments.tsv.gz" - -# Get gene annotations -annotation <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86) - -# Change to UCSC style -seqlevelsStyle(annotation) <- 'UCSC' - -# Create ChromatinAssay for ATAC data -atac_pbmc_assay <- CreateChromatinAssay( - counts = atac_pbmc_data, - sep = c(":", "-"), - fragments = fragpath, - annotation = annotation -) - -# Requantify query ATAC to have same features as multiome ATAC dataset -requant_multiome_ATAC <- FeatureMatrix( - fragments = Fragments(atac_pbmc_assay), - features = granges(obj.multi[['ATAC']]), - cells = Cells(atac_pbmc_assay) -) - -# Create assay with requantified ATAC data -ATAC_assay <- CreateChromatinAssay( - counts = requant_multiome_ATAC, - fragments = fragpath, - annotation = annotation -) - -# Create Seurat sbject -obj.atac <- CreateSeuratObject(counts = ATAC_assay,assay = 'ATAC') -obj.atac[['peak.orig']] <- atac_pbmc_assay -obj.atac <- subset(obj.atac, subset = nCount_ATAC < 7e4 & nCount_ATAC > 2000) -``` -
    - ---- - -We load the reference (download [here](https://atlas.fredhutch.org/data/nygc/multimodal/pbmc_multimodal.h5seurat)) from our recent [paper](https://doi.org/10.1016/j.cell.2021.04.048). This reference is stored as an h5Seurat file, a format that enables on-disk storage of multimodal Seurat objects (more details on h5Seurat and `SeuratDisk` can be found [here](https://mojaveazure.github.io/seurat-disk/index.html)). - -```{r pbmc.ref} -obj.rna <- LoadH5Seurat("../data/pbmc_multimodal.h5seurat") -``` -
    - **What if I want to use my own reference dataset?** - -As an alternative to using a pre-built reference, you can also use your own reference. To demonstrate, you can download a scRNA-seq dataset of 23,837 human PBMC [here](https://www.dropbox.com/s/x8mu9ye2w3a63hf/20k_PBMC_scRNA.rds?dl=0), which we have already annotated. -```{r, message=FALSE, warning=FALSE, eval=FALSE} -obj.rna = readRDS("/path/to/reference.rds") -obj.rna = SCTransform(object = obj.rna) %>% RunPCA() %>% RunUMAP(dims = 1:50, return.model = TRUE) -``` -When using your own reference, set `reference.reduction = "pca"` in the `PrepareBridgeReference` function. - -
    - ---- - -# Preprocessing/normalization for all datasets - -Prior to performing bridge integration, we normalize and pre-process each of the datasets (note that the reference has already been normalized). We normalize gene expression data using [sctransform](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1874-1), and ATAC data using TF-IDF. - -```{r, message=FALSE, warning=FALSE} -# normalize multiome RNA -DefaultAssay(obj.multi) <- "RNA" -obj.multi <- SCTransform(obj.multi, verbose = FALSE) - -# normalize multiome ATAC -DefaultAssay(obj.multi) <- "ATAC" -obj.multi <- RunTFIDF(obj.multi) -obj.multi <- FindTopFeatures(obj.multi, min.cutoff = "q0") - -# normalize query -obj.atac <- RunTFIDF(obj.atac) - -``` - -## Map scATAC-seq dataset using bridge integration - -Now that we have the reference, query, and bridge datasets set up, we can begin integration. The bridge dataset enables translation between the scRNA-seq reference and the scATAC-seq query, effectively augmenting the reference so that it can map a new data type. We call this an extended reference, and first set it up. Note that you can save the results of this function and map multiple scATAC-seq datasets without having to rerun. - - -```{r, message=FALSE, warning=FALSE} -# Drop first dimension for ATAC reduction -dims.atac <- 2:50 -dims.rna <- 1:50 -DefaultAssay(obj.multi) <- "RNA" -DefaultAssay(obj.rna) <- "SCT" -obj.rna.ext <- PrepareBridgeReference(reference = obj.rna, - bridge = obj.multi, - reference.reduction = "spca", - reference.dims = dims.rna, - normalization.method = "SCT" -) -``` - -Now, we can directly find anchors between the extended reference and query objects. We use the `FindBridgeTransferAnchors` function, which translates the query dataset using the same dictionary as was used to translate the reference, and then identifies anchors in this space. The function is meant to mimic our `FindTransferAnchors` function, but to identify correspondences across modalities. - -```{r, message=FALSE, warning=FALSE} -bridge.anchor <- FindBridgeTransferAnchors(extended.reference = obj.rna.ext, - query = obj.atac, - reduction = "lsiproject", - dims = dims.atac -) -``` - - -Once we have identified anchors, we can map the query dataset onto the reference. The `MapQuery` function is the same as we have [previously introduced for reference mapping](https://satijalab.org/seurat/articles/multimodal_reference_mapping.html) . It transfers cell annotations from the reference dataset, and also visualizes the query dataset on a previously computed UMAP embedding. Since our reference dataset contains cell type annotations at three levels of resolution (l1 - l3), we can transfer each level to the query dataset. - - -```{r, message=FALSE, warning=FALSE} - -obj.atac <- MapQuery(anchorset = bridge.anchor, - reference = obj.rna, - query = obj.atac, - refdata = list( - l1 = "celltype.l1", - l2 = "celltype.l2", - l3 = "celltype.l3"), - reduction.model = "wnn.umap" -) -``` - -Now we can visualize the results, plotting the scATAC-seq cells based on their predicted annotations, on the reference UMAP embedding. You can see that each scATAC-seq cell has been assigned a cell name based on the scRNA-seq defined cell ontology. - -```{r, message=FALSE, warning=FALSE} -DimPlot(obj.atac, group.by = "predicted.l2", reduction = "ref.umap", label = TRUE) + ggtitle("ATAC") + NoLegend() -``` - -## Assessing the mapping - -To assess the mapping and cell type predictions, we will first see if the predicted cell type labels are concordant with an unsupervised analysis of the scATAC-seq dataset. We follow the standard unsupervised processing workflow for scATAC-seq data: - -```{r, message=FALSE, warning=FALSE} -obj.atac <- FindTopFeatures(obj.atac, min.cutoff = "q0") -obj.atac <- RunSVD(obj.atac) -obj.atac <- RunUMAP(obj.atac, reduction = "lsi", dims = 2:50) -``` - -Now, we visualize the predicted cluster labels on the unsupervised UMAP emebdding. We can see that predicted cluster labels (from the scRNA-seq reference) are concordant with the structure of the scATAC-seq data. However, there are some cell types (i.e. Treg), that do not appear to separate in unsupervised analysis. These may be prediction errors, or cases where the reference mapping provides additional resolution. - -```{r, pbmcdimplots, message=FALSE, warning=FALSE} -DimPlot(obj.atac, group.by = "predicted.l2", reduction = "umap", label = FALSE) -``` - -Lastly, we validate the predicted cell types for the scATAC-seq data by examining their chromatin accessibility profiles at canonical loci. We use the `CoveragePlot` function to visualize accessibility patterns at the CD8A, FOXP3, and RORC, after grouping cells by their predicted labels. We see expected patterns in each case. For example, the PAX5 locus exhibits peaks that are accessible exclusively in B cells, and the CD8A locus shows the same in CD8 T cell subsets. Similarly, the accessibility of FOXP3, a canonical marker of regulatory T cells (Tregs), in predicted Tregs provides strong support for the accuracy of our prediction. - -```{r, message=FALSE, warning=FALSE} -CoveragePlot(obj.atac, region = "PAX5", group.by = "predicted.l1", idents = c("B", "CD4 T", "Mono", "NK"), window = 200, extend.upstream = -150000) -CoveragePlot(obj.atac, region = "CD8A", group.by = "predicted.l2", idents = c("CD8 Naive", "CD4 Naive", "CD4 TCM", "CD8 TCM"), extend.downstream = 5000, extend.upstream = 5000) -CoveragePlot(obj.atac, region = "FOXP3", group.by = "predicted.l2", idents = c( "CD4 Naive", "CD4 TCM", "CD4 TEM", "Treg"), extend.downstream = 0, extend.upstream = 0) -CoveragePlot(obj.atac, region = "RORC", group.by = "predicted.l2", idents = c("CD8 Naive", "CD8 TEM", "CD8 TCM", "MAIT"), extend.downstream = 5000, extend.upstream = 5000) -``` - -
    - **Session Info** -```{r} -sessionInfo() -``` -
    From 983edac8a0a789b165b15a50b4ea16df1988ba0a Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Sat, 25 Feb 2023 12:28:14 -0500 Subject: [PATCH 494/979] update BPCells vignettes; drop old sketch and atomic inte refs --- vignettes/COVID_SCTMapping.Rmd | 60 +++++++++++++++--- vignettes/MouseBrain_sketch_clustering.Rmd | 59 ++++++++++++++--- vignettes/ParseBio_sketch_integration.Rmd | 73 +++++++++++++++------- vignettes/vignettes.yaml | 12 ---- vignettes/vignettes_v5.yaml | 40 ++++-------- 5 files changed, 165 insertions(+), 79 deletions(-) diff --git a/vignettes/COVID_SCTMapping.Rmd b/vignettes/COVID_SCTMapping.Rmd index 89afe42f3..a710bd87a 100755 --- a/vignettes/COVID_SCTMapping.Rmd +++ b/vignettes/COVID_SCTMapping.Rmd @@ -1,8 +1,37 @@ --- -title: "R Notebook" -output: html_notebook +title: "Map COVID PBMC datasets to a healthy reference" +output: + html_document: + theme: united + df_print: kable + pdf_document: default +date: 'Compiled: `r Sys.Date()`' --- +```{r setup, include=TRUE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + tidy.opts = list(width.cutoff = 95), + message = FALSE, + warning = FALSE, + fig.width = 10, + time_it = TRUE, + error = TRUE +) +``` + ## load package ```{r, warning=F, message=F} @@ -13,13 +42,12 @@ library(dplyr) ## load matrix ```{r, warning=F, message=F} - time0_loadMatrix <- system.time({ -file.dir <- "/brahms/haoy/vignette_data/PBMCVignette/" +file.dir <- "../data/PBMCVignette/" files.set <- c("arunachalam_2020_processed.BPCells", "combes_2021_processed.BPCells", "lee_2020_processed.BPCells", "wilk_2020_processed.BPCells", "yao_2021_processed.BPCells") -meta.list <- readRDS('/brahms/haoy/vignette_data/PBMCVignette/PBMC_meta.list') +meta.list <- readRDS('../data/PBMCVignette/PBMC_meta.list') names(meta.list) <- gsub('_processed.BPCells','',files.set) input.list <- list() @@ -54,9 +82,7 @@ time1_normalize <- system.time({ ## load reference ```{r} - -obj.ref <- readRDS("/brahms/haoy/seurat4_pbmc/pbmc_multimodal_2023.rds") - +obj.ref <- readRDS("../data/pbmc_multimodal_2023.rds") obj.ref ``` ## mapping @@ -173,8 +199,24 @@ marker.list.filter <- lapply(marker.list, function(x) { bulk$disease <- factor(bulk$disease, levels = c('healthy', 'COVID-19')) ``` + ```{r, fig.width=10, fig.height=5} VlnPlot(bulk, features = 'MX1', group.by = 'celltype', split.by = 'disease', cols = c("#377eb8", "#e41a1c")) +``` +```{r save.img, include=TRUE} +library(ggplot2) +ggsave(filename = "../output/images/COVID_SCTMapping.jpg", height = 10, width = 7, plot = p3, quality = 50) +``` + +```{r save.times, include=TRUE} +print(as.data.frame(all_times)) +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/COVID_SCTMapping.csv") +``` + +
    + **Session Info** +```{r} +sessionInfo() ``` - \ No newline at end of file +
    diff --git a/vignettes/MouseBrain_sketch_clustering.Rmd b/vignettes/MouseBrain_sketch_clustering.Rmd index d2755b6a7..6d090f030 100755 --- a/vignettes/MouseBrain_sketch_clustering.Rmd +++ b/vignettes/MouseBrain_sketch_clustering.Rmd @@ -1,7 +1,36 @@ --- -title: "R Notebook" -output: html_notebook +title: "Mouse brain sketch clustering" +output: + html_document: + theme: united + df_print: kable + pdf_document: default +date: 'Compiled: `r Sys.Date()`' --- + +```{r setup, include=TRUE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + tidy.opts = list(width.cutoff = 95), + message = FALSE, + warning = FALSE, + fig.width = 10, + time_it = TRUE, + error = TRUE +) +``` ## load library ```{r, warning=FALSE, message=FALSE} @@ -13,10 +42,9 @@ library(BPCells) ```{r, warning=FALSE, message=FALSE} t0_CreateObject <- system.time({ -mat <- open_matrix_dir("/brahms/haoy/test/mouse_1M_neurons_counts")[,1:1e5] +mat <- open_matrix_dir("../data/mouse_1M_neurons_counts")[,1:1e5] -devtools::load_all("/brahms/haoy//package/MetricPatch/") -mat <- ConvertEnsembleToSymbol(mat = mat, species = 'mouse') +mat <- Azimuth::ConvertEnsembleToSymbol(mat = mat, species = 'mouse') options(Seurat.object.assay.version = "v5", Seurat.object.assay.calcn = T) obj <- CreateSeuratObject(counts = mat ) @@ -152,6 +180,23 @@ obj.sub <- FindClusters(obj.sub) ``` ```{r} -DimPlot(obj.sub, label = T) + NoLegend() +p <- DimPlot(obj.sub, label = T) + NoLegend() +p +``` -``` \ No newline at end of file +```{r save.img, include=TRUE} +library(ggplot2) +ggsave(filename = "../output/images/MouseBrain_sketch_clustering.jpg", height = 10, width = 7, plot = p, quality = 50) +``` + +```{r save.times, include=TRUE} +print(as.data.frame(all_times)) +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/MouseBrain_sketch_clustering.csv") +``` + +
    + **Session Info** +```{r} +sessionInfo() +``` +
    diff --git a/vignettes/ParseBio_sketch_integration.Rmd b/vignettes/ParseBio_sketch_integration.Rmd index d16a5ce0c..e93e33647 100755 --- a/vignettes/ParseBio_sketch_integration.Rmd +++ b/vignettes/ParseBio_sketch_integration.Rmd @@ -1,8 +1,37 @@ --- -title: "R Notebook" -output: html_notebook +title: "Sketch integration using a 1 million cell dataset from Parse Biosciences" +output: + html_document: + theme: united + df_print: kable + pdf_document: default +date: 'Compiled: `r Sys.Date()`' --- +```{r setup, include=TRUE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + tidy.opts = list(width.cutoff = 95), + message = FALSE, + warning = FALSE, + fig.width = 10, + time_it = TRUE, + error = TRUE +) +``` + ## load package ```{r, warning=F, message=F} @@ -12,15 +41,12 @@ library(dplyr) ``` ## load matrix + ```{r, warning=F, message=F} - time0_loadMatrix <- system.time({ - mat <- open_matrix_dir('/brahms/haoy/test/pbmc_150k_sparse/') - meta <- readRDS('/brahms/haoy/seurat5/S5_object/ParseBio_PBMC_meta_100K.rds') + mat <- open_matrix_dir('../data/pbmc_150k_sparse/') + meta <- readRDS('../data/ParseBio_PBMC_meta_100K.rds') meta$disease <- sample(c('H','D'), nrow(meta), replace = T) - #mat <- open_matrix_dir('/brahms/haoy/test/pbmc_ParseBio_sparse//') - #meta <- readRDS('/brahms/haoy/seurat5/S5_object/ParseBio_PBMC_meta.rds') - }) ``` @@ -31,12 +57,6 @@ options(Seurat.object.assay.version = "v5", Seurat.object.assay.calcn = T) time1_normalize <- system.time({ object <- CreateSeuratObject(counts = mat, meta.data = meta) object <- NormalizeData(object, verbose = FALSE) - - # object[['RNA']]$data <- write_matrix_dir( - # mat = object[['RNA']]$data, - # dir = '/brahms/haoy/test/pbmc_ParseBio_sparse_data' - # ) - #object[['RNA']]$data <- open_matrix_dir(dir = '/brahms/haoy/test/pbmc_ParseBio_sparse_data') }) @@ -111,10 +131,6 @@ p1<- DimPlot(object, reduction = 'umap.orig', group.by = 'sample',alpha = 0.5) + p2<- DimPlot(object, reduction = 'umap.orig', group.by = 'celltype.weight', label = T, alpha = 0.5) + NoLegend() p1+p2 -``` -## save object -```{r} -#time7_saveRDS <- system.time(saveRDS(object, "/brahms/haoy/test/pbmc_ParseBio_seurat.rds")) ``` ## pseudo-bulk @@ -179,11 +195,7 @@ for (i in 1:length(all_T)) { print(paste('Total time ', round(overall, digits = 1), 'mins' )) ``` - - - ```{r,fig.height = 20, fig.width = 15} - Idents(bulk) <- 'celltype' marker <- FindAllMarkers(object = bulk, only.pos = TRUE, verbose = FALSE) marker %>% @@ -191,5 +203,22 @@ marker %>% top_n(n = -5, wt = p_val) -> top5 bulk <- ScaleData(bulk, features = top5$gene) DoHeatmap(bulk, features = top5$gene) + NoLegend() +``` +```{r save.img, include=TRUE} +library(ggplot2) +plot <- DoHeatmap(bulk, features = top5$gene) + NoLegend() +ggsave(filename = "../output/images/ParseBio_sketch_integration.jpg", height = 10, width = 7, plot = plot, quality = 50) +``` + +```{r save.times, include=TRUE} +print(as.data.frame(all_times)) +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/ParseBio_sketch_integration.csv") +``` + +
    + **Session Info** +```{r} +sessionInfo() ``` +
    diff --git a/vignettes/vignettes.yaml b/vignettes/vignettes.yaml index 74d441f11..47dae1c57 100644 --- a/vignettes/vignettes.yaml +++ b/vignettes/vignettes.yaml @@ -88,18 +88,6 @@ Examples of how to perform normalization, feature selection, integration, and differential expression with an updated version of sctransform. image: assets/sctransform_v2.png - - title: Cross-modality Bridge Integration - name: bridge_integration_vignette - summary: | - Map scATAC-seq onto an scRNA-seq reference using a multi-omic bridge dataset. - image: bridge_integration.png - - - title: Atomic sketch integration for scRNA-seq - name: atomic_integration - summary: | - Perform community-scale integration of scRNA-seq datasets by atomic sketch integration. - image: atomic_integration.png - - category: Other vignettes: - title: Visualization diff --git a/vignettes/vignettes_v5.yaml b/vignettes/vignettes_v5.yaml index 67e1bbccc..db8c87143 100644 --- a/vignettes/vignettes_v5.yaml +++ b/vignettes/vignettes_v5.yaml @@ -42,38 +42,20 @@ Mitigate the effects of cell cycle heterogeneity by computing cell cycle phase scores based on marker genes. image: cell_cycle_vignette.jpg - - title: BPCells Sketch Clustering (Log) - name: BPCells_sketch_clustering_mouse_brain + - title: Sketch Clustering (BPCells) + name: MouseBrain_sketch_clustering summary: | - Analyze a large mouse brain dataset using the on-disk capabilities introduced in Seurat5. - image: BPCells_sketch_clustering_mouse_brain.png + Analyze a large mouse brain dataset using the on-disk capabilities introduced in Seurat v5. + image: MouseBrain_sketch_clustering.jpg - - title: BPCells Sketch Clustering (SCTransform) - name: BPCells_sketch_clustering_mouse_brain_SCT + - title: COVID Mapping (BPCells) + name: COVID_SCTMapping summary: | - Analyze a large mouse brain dataset using the on-disk capabilities introduced in Seurat5. - image: BPCells_sketch_clustering_mouse_brain.png + Map PBMC datasets from COVID-19 patients to a healthy PBMC reference. + image: COVID_SCTMapping.jpg - - title: BPCells Sketch integration (Log) - name: BPCells_sketch_integration_1M + - title: Sketch Integration (BPCells) + name: ParseBio_sketch_integration summary: | Perform sketch integration on a large dataset from Parse Biosciences. - image: BPCells_sketch_inte_1M.png - - - title: BPCells Sketch integration (SCTransform) - name: BPCells_sketch_integration_1M_SCT - summary: | - Perform sketch integration on a large dataset from Parse Biosciences. - image: BPCells_sketch_inte_1M.png - - - title: Chunked mapping (Log) - name: BPCells_COVID_logMapping - summary: | - Iteratively map large COVID datasets onto a reference for cell type annotation. - image: BPCells_COVID.png - - - title: Chunked mapping (SCTransform) - name: BPCells_COVID_SCTMapping - summary: | - Iteratively map large COVID datasets onto a reference for cell type annotation. - image: BPCells_COVID.png + image: ParseBio_sketch_integration.jpg From 6e3bea563a75425b465fef8d50dd12cc26b837e6 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Mon, 27 Feb 2023 13:19:11 -0500 Subject: [PATCH 495/979] run on any self-hosted runner --- .github/workflows/R_CMD_check.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/R_CMD_check.yaml b/.github/workflows/R_CMD_check.yaml index 8c005ab38..b7b64ab33 100644 --- a/.github/workflows/R_CMD_check.yaml +++ b/.github/workflows/R_CMD_check.yaml @@ -16,7 +16,7 @@ jobs: name: R CMD check container: image: satijalab/seurat:develop - runs-on: [self-hosted, satijalab05] + runs-on: [ self-hosted ] steps: - uses: actions/checkout@v3 From e527f3a3df9593dac1b1eb39330a303da5135dde Mon Sep 17 00:00:00 2001 From: Gesmira Date: Mon, 27 Feb 2023 14:59:41 -0500 Subject: [PATCH 496/979] dealing w the fact you cant set 'data' layer to 0x0 matrix --- R/objects.R | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/R/objects.R b/R/objects.R index c0d07bd01..427ff2366 100644 --- a/R/objects.R +++ b/R/objects.R @@ -571,7 +571,13 @@ DietSeurat <- function( object[[assay]][[lyr]] <- NULL object }, error = function(e) { - slot(object = object[[assay]], name = lyr) <- new(Class = "dgCMatrix") + if (lyr == "data"){ + object[[assay]][[lyr]] <- sparseMatrix(i = 1, j = 1, x = 1, + dims = dim(object[[assay]][[lyr]]), + dimnames = dimnames(object[[assay]][[lyr]])) + } else{ + slot(object = object[[assay]], name = lyr) <- new(Class = "dgCMatrix") + } message("Converting layer ", lyr, " in assay ", assay, " to empty dgCMatrix") object From c0a7183c6c67833fca8100edb03a27af4d89a4d7 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Mon, 27 Feb 2023 15:14:32 -0500 Subject: [PATCH 497/979] bump version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 79e94e513..945e74f55 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 4.9.9.9034 +Version: 4.9.9.9035 Date: 2023-02-18 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. From 900b02768058d649a7b382e64c20d7c3a28f5b0b Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Tue, 28 Feb 2023 11:10:20 -0500 Subject: [PATCH 498/979] add xenium to spatial vignette --- vignettes/seurat5_spatial_vignette_2.Rmd | 81 ++++++++++++++++++++++++ vignettes/spatial_vignette_2.Rmd | 80 +++++++++++++++++++++++ vignettes/vignettes.yaml | 4 +- vignettes/vignettes_v5.yaml | 4 +- 4 files changed, 165 insertions(+), 4 deletions(-) diff --git a/vignettes/seurat5_spatial_vignette_2.Rmd b/vignettes/seurat5_spatial_vignette_2.Rmd index 5c759c60c..7c79f731e 100644 --- a/vignettes/seurat5_spatial_vignette_2.Rmd +++ b/vignettes/seurat5_spatial_vignette_2.Rmd @@ -199,6 +199,87 @@ We can visualize individual molecules plotted at higher resolution after zooming ImageDimPlot(vizgen.obj, fov = "hippo", molecules = rownames(markers.14)[1:4], cols = "polychrome", mols.size = 1, alpha = 0.5, mols.cols = c("red", "blue", "yellow", "green")) ``` +# Mouse Brain: 10x Genomics Xenium In Situ + +In this section we'll analyze data produced by the Xenium platform. The vignette demonstrates how to load the per-transcript location data, cell x gene matrix, cell segmentation, and cell centroid information available in the Xenium outputs. The resulting Seurat object will contain the gene expression profile of each cell, the centroid and boundary of each cell, and the location of each individual detected transcript. The per-cell gene expression profiles are similar to standard single-cell RNA-seq and can be analyzed using the same tools. + +This uses the `Tiny subset` dataset from 10x Genomics provided in the [Fresh Frozen Mouse Brain for Xenium Explorer Demo](https://www.10xgenomics.com/resources/datasets/fresh-frozen-mouse-brain-for-xenium-explorer-demo-1-standard) which can be downloaded as described below. These analysis steps are also compatible with the larger `Full coronal section`, but will take longer to execute. + +```{bash, eval=FALSE} +wget https://cf.10xgenomics.com/samples/xenium/1.0.2/Xenium_V1_FF_Mouse_Brain_Coronal_Subset_CTX_HP/Xenium_V1_FF_Mouse_Brain_Coronal_Subset_CTX_HP_outs.zip +unzip Xenium_V1_FF_Mouse_Brain_Coronal_Subset_CTX_HP_outs.zip +``` + +First we read in the dataset and create a Seurat object. Provide the path to the data folder for a Xenium run as the input path. The RNA data is stored in the `Xenium` assay of the Seurat object. This step should take about a minute. + +```{r load.xenium, results='hide'} +path <- "../data/xenium_tiny_subset" +# Load the Xenium data +xenium.obj <- LoadXenium(path, fov = "fov") +# remove cells with 0 counts +xenium.obj <- subset(xenium.obj, subset = nCount_Xenium > 0) +``` + +Spatial information is loaded into slots of the Seurat object, labelled by the name of "field of view" (FOV) being loaded. Initially all the data is loaded into the FOV named `fov`. Later, we will make a cropped FOV that zooms into a region of interest. + +Standard QC plots provided by Seurat are available via the `Xenium` assay. Here are violin plots of genes per cell (`nFeature_Xenium`) and transcript counts per cell (`nCount_Xenium`) +```{r vlnplot.xenium} +VlnPlot(xenium.obj, features = c("nFeature_Xenium", "nCount_Xenium"), ncol = 2, pt.size = 0) +``` + +Next, we plot the positions of the pan-inhibitory neuron marker Gad1, inhibitory neuron sub-type markers Pvalb, and Sst, and astrocyte marker Gfap on the tissue using `ImageDimPlot()`. +```{r p2.xenium, fig.width=10, fig.height=8} +ImageDimPlot(xenium.obj, fov = "fov", molecules = c("Gad1", "Sst", "Pvalb", "Gfap"), nmols = 20000) +``` + +```{r save.img, include=FALSE} +library(ggplot2) +plot <- ImageDimPlot(xenium.obj, fov = "fov", molecules = c("Gad1", "Gfap"), nmols = 40000, alpha=0.01, dark.background = F, mols.alpha = 0.6) + coord_flip() + scale_x_reverse() + NoLegend() +ggsave(filename = "../output/images/spatial_vignette_2.jpg", height = 7, width = 12, plot = plot, quality = 50) +``` + +Here we visualize the expression level of some key layer marker genes at the per-cell level using `ImageFeaturePlot()` which is analogous to the `FeaturePlot()` function for visualizing expression on a 2D embedding. We manually adjust the `max.cutoff` for each gene to roughly the 90th percentile (which can be specified with `max.cutoff='q90'`) of it's count distribution to improve contrast. +```{r mat.xenium, message=FALSE, warning=FALSE, fig.width=12, fig.height=12} +ImageFeaturePlot(xenium.obj, features = c("Cux2", "Rorb", "Bcl11b", "Foxp2"), max.cutoff = c(25, 35, 12, 10), size = 0.75, cols = c("white", "red")) +``` + +We can zoom in on a chosen area with the `Crop()` function. Once zoomed-in, we can visualize cell segmentation boundaries along with individual molecules. +```{r cropping.xenium, message=FALSE, warning=FALSE, fig.width=10, fig.height=8} +cropped.coords <- Crop(xenium.obj[["fov"]], x = c(1200, 2900), y = c(3750, 4550), coords = "plot") +xenium.obj[["zoom"]] <- cropped.coords +# visualize cropped area with cell segmentations & selected molecules +DefaultBoundary(xenium.obj[["zoom"]]) <- "segmentation" +ImageDimPlot(xenium.obj, fov = "zoom", + axes = TRUE, border.color = "white", border.size = 0.1, + cols = "polychrome", coord.fixed = FALSE, + molecules = c("Gad1", "Sst", "Npy2r", "Pvalb", "Nrn1"), nmols = 10000) +``` + +Next, we use SCTransform for normalization followed by standard dimensionality reduction and clustering. This step takes about 5 minutes from start to finish. +```{r unsupervised.xenium, results='hide'} +xenium.obj <- SCTransform(xenium.obj, assay = "Xenium") +xenium.obj <- RunPCA(xenium.obj, npcs = 30, features = rownames(xenium.obj)) +xenium.obj <- RunUMAP(xenium.obj, dims = 1:30) +xenium.obj <- FindNeighbors(xenium.obj, reduction = "pca", dims = 1:30) +xenium.obj <- FindClusters(xenium.obj, resolution = 0.3) +``` + +We can then visualize the results of the clustering by coloring each cell according to its cluster either in UMAP space with `DimPlot()` or overlaid on the image with `ImageDimPlot()`. +```{r umap.xenium, fig.width=10, fig.height=7} +DimPlot(xenium.obj) +``` + +We can visualize the expression level of the markers we looked at earlier on the UMAP coordinates. +```{r features.xenium, fig.width=8, fig.height=10} +FeaturePlot(xenium.obj, features = c("Cux2", "Bcl11b", "Foxp2", "Gad1", "Sst", "Gfap")) +``` + +We can now use `ImageDimPlot()` to color the cell positions colored by the cluster labels determined in the previous step. +```{r clusters.xenium, fig.width=13, fig.height=13} +ImageDimPlot(xenium.obj, cols = "polychrome", size = 0.75) +``` + + # Human Lung: Nanostring CosMx Spatial Molecular Imager This dataset was produced using Nanostring CosMx Spatial Molecular Imager (SMI). The CosMX SMI performs multiplexed single molecule profiling, can profile both RNA and protein targets, and can be applied directly to FFPE tissues. The dataset represents 8 FFPE samples taken from 5 non-small-cell lung cancer (NSCLC) tissues, and is available for [public download](https://www.nanostring.com/products/cosmx-spatial-molecular-imager/ffpe-dataset/). The gene panel consists of 960 transcripts. diff --git a/vignettes/spatial_vignette_2.Rmd b/vignettes/spatial_vignette_2.Rmd index 0ff3428e6..eb8c76c01 100644 --- a/vignettes/spatial_vignette_2.Rmd +++ b/vignettes/spatial_vignette_2.Rmd @@ -198,6 +198,86 @@ We can visualize individual molecules plotted at higher resolution after zooming ImageDimPlot(vizgen.obj, fov = "hippo", molecules = rownames(markers.14)[1:4], cols = "polychrome", mols.size = 1, alpha = 0.5, mols.cols = c("red", "blue", "yellow", "green")) ``` +# Mouse Brain: 10x Genomics Xenium In Situ + +In this section we'll analyze data produced by the Xenium platform. The vignette demonstrates how to load the per-transcript location data, cell x gene matrix, cell segmentation, and cell centroid information available in the Xenium outputs. The resulting Seurat object will contain the gene expression profile of each cell, the centroid and boundary of each cell, and the location of each individual detected transcript. The per-cell gene expression profiles are similar to standard single-cell RNA-seq and can be analyzed using the same tools. + +This uses the `Tiny subset` dataset from 10x Genomics provided in the [Fresh Frozen Mouse Brain for Xenium Explorer Demo](https://www.10xgenomics.com/resources/datasets/fresh-frozen-mouse-brain-for-xenium-explorer-demo-1-standard) which can be downloaded as described below. These analysis steps are also compatible with the larger `Full coronal section`, but will take longer to execute. + +```{bash, eval=FALSE} +wget https://cf.10xgenomics.com/samples/xenium/1.0.2/Xenium_V1_FF_Mouse_Brain_Coronal_Subset_CTX_HP/Xenium_V1_FF_Mouse_Brain_Coronal_Subset_CTX_HP_outs.zip +unzip Xenium_V1_FF_Mouse_Brain_Coronal_Subset_CTX_HP_outs.zip +``` + +First we read in the dataset and create a Seurat object. Provide the path to the data folder for a Xenium run as the input path. The RNA data is stored in the `Xenium` assay of the Seurat object. This step should take about a minute. + +```{r load.xenium, results='hide'} +path <- "../data/xenium_tiny_subset" +# Load the Xenium data +xenium.obj <- LoadXenium(path, fov = "fov") +# remove cells with 0 counts +xenium.obj <- subset(xenium.obj, subset = nCount_Xenium > 0) +``` + +Spatial information is loaded into slots of the Seurat object, labelled by the name of "field of view" (FOV) being loaded. Initially all the data is loaded into the FOV named `fov`. Later, we will make a cropped FOV that zooms into a region of interest. + +Standard QC plots provided by Seurat are available via the `Xenium` assay. Here are violin plots of genes per cell (`nFeature_Xenium`) and transcript counts per cell (`nCount_Xenium`) +```{r vlnplot.xenium} +VlnPlot(xenium.obj, features = c("nFeature_Xenium", "nCount_Xenium"), ncol = 2, pt.size = 0) +``` + +Next, we plot the positions of the pan-inhibitory neuron marker Gad1, inhibitory neuron sub-type markers Pvalb, and Sst, and astrocyte marker Gfap on the tissue using `ImageDimPlot()`. +```{r p2.xenium, fig.width=10, fig.height=8} +ImageDimPlot(xenium.obj, fov = "fov", molecules = c("Gad1", "Sst", "Pvalb", "Gfap"), nmols = 20000) +``` + +```{r save.img, include=FALSE} +library(ggplot2) +plot <- ImageDimPlot(xenium.obj, fov = "fov", molecules = c("Gad1", "Gfap"), nmols = 40000, alpha=0.01, dark.background = F, mols.alpha = 0.6) + coord_flip() + scale_x_reverse() + NoLegend() +ggsave(filename = "../output/images/spatial_vignette_2.jpg", height = 7, width = 12, plot = plot, quality = 50) +``` + +Here we visualize the expression level of some key layer marker genes at the per-cell level using `ImageFeaturePlot()` which is analogous to the `FeaturePlot()` function for visualizing expression on a 2D embedding. We manually adjust the `max.cutoff` for each gene to roughly the 90th percentile (which can be specified with `max.cutoff='q90'`) of it's count distribution to improve contrast. +```{r mat.xenium, message=FALSE, warning=FALSE, fig.width=12, fig.height=12} +ImageFeaturePlot(xenium.obj, features = c("Cux2", "Rorb", "Bcl11b", "Foxp2"), max.cutoff = c(25, 35, 12, 10), size = 0.75, cols = c("white", "red")) +``` + +We can zoom in on a chosen area with the `Crop()` function. Once zoomed-in, we can visualize cell segmentation boundaries along with individual molecules. +```{r cropping.xenium, message=FALSE, warning=FALSE, fig.width=10, fig.height=8} +cropped.coords <- Crop(xenium.obj[["fov"]], x = c(1200, 2900), y = c(3750, 4550), coords = "plot") +xenium.obj[["zoom"]] <- cropped.coords +# visualize cropped area with cell segmentations & selected molecules +DefaultBoundary(xenium.obj[["zoom"]]) <- "segmentation" +ImageDimPlot(xenium.obj, fov = "zoom", + axes = TRUE, border.color = "white", border.size = 0.1, + cols = "polychrome", coord.fixed = FALSE, + molecules = c("Gad1", "Sst", "Npy2r", "Pvalb", "Nrn1"), nmols = 10000) +``` + +Next, we use SCTransform for normalization followed by standard dimensionality reduction and clustering. This step takes about 5 minutes from start to finish. +```{r unsupervised.xenium, results='hide'} +xenium.obj <- SCTransform(xenium.obj, assay = "Xenium") +xenium.obj <- RunPCA(xenium.obj, npcs = 30, features = rownames(xenium.obj)) +xenium.obj <- RunUMAP(xenium.obj, dims = 1:30) +xenium.obj <- FindNeighbors(xenium.obj, reduction = "pca", dims = 1:30) +xenium.obj <- FindClusters(xenium.obj, resolution = 0.3) +``` + +We can then visualize the results of the clustering by coloring each cell according to its cluster either in UMAP space with `DimPlot()` or overlaid on the image with `ImageDimPlot()`. +```{r umap.xenium, fig.width=10, fig.height=7} +DimPlot(xenium.obj) +``` + +We can visualize the expression level of the markers we looked at earlier on the UMAP coordinates. +```{r features.xenium, fig.width=8, fig.height=10} +FeaturePlot(xenium.obj, features = c("Cux2", "Bcl11b", "Foxp2", "Gad1", "Sst", "Gfap")) +``` + +We can now use `ImageDimPlot()` to color the cell positions colored by the cluster labels determined in the previous step. +```{r clusters.xenium, fig.width=13, fig.height=13} +ImageDimPlot(xenium.obj, cols = "polychrome", size = 0.75) +``` + # Human Lung: Nanostring CosMx Spatial Molecular Imager This dataset was produced using Nanostring CosMx Spatial Molecular Imager (SMI). The CosMX SMI performs multiplexed single molecule profiling, can profile both RNA and protein targets, and can be applied directly to FFPE tissues. The dataset represents 8 FFPE samples taken from 5 non-small-cell lung cancer (NSCLC) tissues, and is available for [public download](https://www.nanostring.com/products/cosmx-spatial-molecular-imager/ffpe-dataset/). The gene panel consists of 960 transcripts. diff --git a/vignettes/vignettes.yaml b/vignettes/vignettes.yaml index 47dae1c57..81a60a7c6 100644 --- a/vignettes/vignettes.yaml +++ b/vignettes/vignettes.yaml @@ -21,8 +21,8 @@ - title: Analysis of spatial datasets (Imaging-based) name: spatial_vignette_2 summary: | - Learn to explore spatially-resolved data from multiplexed imaging technologies, including MERFISH, Nanostring SMI, and CODEX. - image: spatial_vignette_2.png + Learn to explore spatially-resolved data from multiplexed imaging technologies, including MERFISH, Xenium, CosMx SMI, and CODEX. + image: spatial_vignette_2.jpg - category: Data Integration vignettes: diff --git a/vignettes/vignettes_v5.yaml b/vignettes/vignettes_v5.yaml index db8c87143..ff9ae40dd 100644 --- a/vignettes/vignettes_v5.yaml +++ b/vignettes/vignettes_v5.yaml @@ -21,8 +21,8 @@ - title: Analysis of spatial datasets (Imaging-based) name: seurat5_spatial_vignette_2 summary: | - Learn to explore spatially-resolved data from multiplexed imaging technologies, including MERFISH, Nanostring SMI, and CODEX. - image: spatial_vignette_2.png + Learn to explore spatially-resolved data from multiplexed imaging technologies, including MERFISH, Xenium, CosMx SMI, and CODEX. + image: spatial_vignette_2.jpg - title: SCTransform name: seurat5_sctransform_vignette From 8eaa5c044214fb6ecffa89bce254ef40f681df6a Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 28 Feb 2023 13:14:06 -0500 Subject: [PATCH 499/979] k.filter not for v5 --- R/integration.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/R/integration.R b/R/integration.R index 6ec43ded9..9e72767ec 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5943,6 +5943,11 @@ ValidateParams_FindTransferAnchors <- function( if (reduction == "lsiproject") { ModifyParam(param = "k.filter", value = NA) } + if (inherits(x = reference[[reference.assay]], what = 'Assay5') || + inherits(x = query[[query.assay]], what = 'Assay5')) { + # current filter anchors not support for v5 assay + ModifyParam(param = "k.filter", value = NA) + } if (!is.na(x = k.filter) && k.filter > ncol(x = query)) { warning("k.filter is larger than the number of cells present in the query.\n", "Continuing without anchor filtering.", From 453e1f06babbbb833e1d427c8c52a6de1fb4da09 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Tue, 28 Feb 2023 13:19:01 -0500 Subject: [PATCH 500/979] remove timing from bpcells vignettes --- vignettes/COVID_SCTMapping.Rmd | 16 ---------------- vignettes/MouseBrain_sketch_clustering.Rmd | 18 ------------------ vignettes/ParseBio_sketch_integration.Rmd | 17 ----------------- 3 files changed, 51 deletions(-) diff --git a/vignettes/COVID_SCTMapping.Rmd b/vignettes/COVID_SCTMapping.Rmd index a710bd87a..a79e19249 100755 --- a/vignettes/COVID_SCTMapping.Rmd +++ b/vignettes/COVID_SCTMapping.Rmd @@ -151,22 +151,6 @@ bulk <- subset(bulk, subset = disease != 'other') ``` -## computing time summary -```{r} -all_T <- ls(pattern = 'time') -overall <- sum(sapply(all_T, function(x) round(get(x)['elapsed'], digits = 3)))/60 - - -for (i in 1:length(all_T)) { - T_i <- get(all_T[i])['elapsed'] - if (T_i > 60) { - print(paste(all_T[i], round(T_i/60, digits = 1), 'mins')) - } else { - print(paste(all_T[i], round(T_i, digits = 1), 'secs')) - } -} -print(paste('Total time ', round(overall, digits = 1), 'mins' )) -``` ```{R} marker.list <- list() celltype.set <- unique(bulk$celltype ) diff --git a/vignettes/MouseBrain_sketch_clustering.Rmd b/vignettes/MouseBrain_sketch_clustering.Rmd index 6d090f030..40d007cd4 100755 --- a/vignettes/MouseBrain_sketch_clustering.Rmd +++ b/vignettes/MouseBrain_sketch_clustering.Rmd @@ -132,24 +132,6 @@ DimPlot(obj, label = T, reduction = 'ref.umap', group.by = 'predicted.cluster_fu ``` -```{r} -all_T <- ls(pattern = '^t') -overall <- sum(sapply(all_T, function(x) round(get(x)['elapsed'], digits = 3)))/60 - - -for (i in 1:length(all_T)) { - time.i <- get(all_T[i])['elapsed'] - if (time.i > 60) { - print(paste(all_T[i], round(time.i/60, digits = 1), 'mins')) - } else { - print(paste(all_T[i], round(time.i, digits = 1), 'secs')) - } -} -print(paste('Total time ', round(overall, digits = 3), 'mins' )) - -``` - - ```{r} obj[['pca.nn']] <- Seurat:::NNHelper(data = obj[['pca.orig']]@cell.embeddings[,1:50], diff --git a/vignettes/ParseBio_sketch_integration.Rmd b/vignettes/ParseBio_sketch_integration.Rmd index e93e33647..7be50c29f 100755 --- a/vignettes/ParseBio_sketch_integration.Rmd +++ b/vignettes/ParseBio_sketch_integration.Rmd @@ -178,23 +178,6 @@ marker.list.filter <- lapply(marker.list, function(x) { VlnPlot(bulk, features = 'FOXO3', group.by = 'celltype', split.by = 'disease') ``` -## computing time summary -```{r} -all_T <- ls(pattern = 'time') -overall <- sum(sapply(all_T, function(x) round(get(x)['elapsed'], digits = 3)))/60 - - -for (i in 1:length(all_T)) { - T_i <- get(all_T[i])['elapsed'] - if (T_i > 60) { - print(paste(all_T[i], round(T_i/60, digits = 1), 'mins')) - } else { - print(paste(all_T[i], round(T_i, digits = 1), 'secs')) - } -} -print(paste('Total time ', round(overall, digits = 1), 'mins' )) -``` - ```{r,fig.height = 20, fig.width = 15} Idents(bulk) <- 'celltype' marker <- FindAllMarkers(object = bulk, only.pos = TRUE, verbose = FALSE) From c30cf67009c5d8f7603ecb5147c51789ed305954 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Mon, 6 Mar 2023 09:40:05 -0500 Subject: [PATCH 501/979] update vignettes --- vignettes/COVID_SCTMapping.Rmd | 2 +- vignettes/MouseBrain_sketch_clustering.Rmd | 3 ++- vignettes/ParseBio_sketch_integration.Rmd | 4 ++-- vignettes/seurat5_spatial_vignette_2.Rmd | 12 ++++++------ vignettes/spatial_vignette_2.Rmd | 2 +- 5 files changed, 12 insertions(+), 11 deletions(-) diff --git a/vignettes/COVID_SCTMapping.Rmd b/vignettes/COVID_SCTMapping.Rmd index a79e19249..4d7585e23 100755 --- a/vignettes/COVID_SCTMapping.Rmd +++ b/vignettes/COVID_SCTMapping.Rmd @@ -190,7 +190,7 @@ VlnPlot(bulk, features = 'MX1', group.by = 'celltype', split.by = 'disease', col ```{r save.img, include=TRUE} library(ggplot2) -ggsave(filename = "../output/images/COVID_SCTMapping.jpg", height = 10, width = 7, plot = p3, quality = 50) +ggsave(filename = "../output/images/COVID_SCTMapping.jpg", height = 7, width = 8, plot = p3, quality = 50) ``` ```{r save.times, include=TRUE} diff --git a/vignettes/MouseBrain_sketch_clustering.Rmd b/vignettes/MouseBrain_sketch_clustering.Rmd index 40d007cd4..c736003b3 100755 --- a/vignettes/MouseBrain_sketch_clustering.Rmd +++ b/vignettes/MouseBrain_sketch_clustering.Rmd @@ -168,7 +168,8 @@ p ```{r save.img, include=TRUE} library(ggplot2) -ggsave(filename = "../output/images/MouseBrain_sketch_clustering.jpg", height = 10, width = 7, plot = p, quality = 50) +p <- DimPlot(obj, label = T, label.size=8, reduction = "ref.umap", group.by = "predicted.cluster_full", alpha = 0.1) + NoLegend() +ggsave(filename = "../output/images/MouseBrain_sketch_clustering.jpg", height = 7, width = 7, plot = p, quality = 50) ``` ```{r save.times, include=TRUE} diff --git a/vignettes/ParseBio_sketch_integration.Rmd b/vignettes/ParseBio_sketch_integration.Rmd index 7be50c29f..2ec61acfd 100755 --- a/vignettes/ParseBio_sketch_integration.Rmd +++ b/vignettes/ParseBio_sketch_integration.Rmd @@ -190,8 +190,8 @@ DoHeatmap(bulk, features = top5$gene) + NoLegend() ```{r save.img, include=TRUE} library(ggplot2) -plot <- DoHeatmap(bulk, features = top5$gene) + NoLegend() -ggsave(filename = "../output/images/ParseBio_sketch_integration.jpg", height = 10, width = 7, plot = plot, quality = 50) +plot <- DoHeatmap(bulk, features = top5$gene) + NoLegend() + theme(axis.text.y = element_blank()) +ggsave(filename = "../output/images/ParseBio_sketch_integration.jpg", height = 7, width = 7, plot = plot, quality = 50) ``` ```{r save.times, include=TRUE} diff --git a/vignettes/seurat5_spatial_vignette_2.Rmd b/vignettes/seurat5_spatial_vignette_2.Rmd index 7c79f731e..db9ca53b2 100644 --- a/vignettes/seurat5_spatial_vignette_2.Rmd +++ b/vignettes/seurat5_spatial_vignette_2.Rmd @@ -62,7 +62,7 @@ We use the `LoadVizgen()` function, which we have written to read in the output ```{r, message=FALSE, warning=FALSE} # Loading segmentations is a slow process and multi processing with the future pacakge is recommended -vizgen.obj <- LoadVizgen(data.dir = "../data/vizgen/s2r1/", fov = "s2r1") +vizgen.obj <- LoadVizgen(data.dir = "/brahms/hartmana/vignette_data/vizgen/s2r1/", fov = "s2r1") ``` The next pieces of information are specific to imaging assays, and is stored in the images slot of the resulting Seurat object: @@ -213,7 +213,7 @@ unzip Xenium_V1_FF_Mouse_Brain_Coronal_Subset_CTX_HP_outs.zip First we read in the dataset and create a Seurat object. Provide the path to the data folder for a Xenium run as the input path. The RNA data is stored in the `Xenium` assay of the Seurat object. This step should take about a minute. ```{r load.xenium, results='hide'} -path <- "../data/xenium_tiny_subset" +path <- "/brahms/hartmana/vignette_data/xenium_tiny_subset" # Load the Xenium data xenium.obj <- LoadXenium(path, fov = "fov") # remove cells with 0 counts @@ -235,7 +235,7 @@ ImageDimPlot(xenium.obj, fov = "fov", molecules = c("Gad1", "Sst", "Pvalb", "Gfa ```{r save.img, include=FALSE} library(ggplot2) plot <- ImageDimPlot(xenium.obj, fov = "fov", molecules = c("Gad1", "Gfap"), nmols = 40000, alpha=0.01, dark.background = F, mols.alpha = 0.6) + coord_flip() + scale_x_reverse() + NoLegend() -ggsave(filename = "../output/images/spatial_vignette_2.jpg", height = 7, width = 12, plot = plot, quality = 50) +ggsave(filename = "../output/images/spatial_vignette_2.jpg", height = 5, width = 9, plot = plot) ``` Here we visualize the expression level of some key layer marker genes at the per-cell level using `ImageFeaturePlot()` which is analogous to the `FeaturePlot()` function for visualizing expression on a 2D embedding. We manually adjust the `max.cutoff` for each gene to roughly the 90th percentile (which can be specified with `max.cutoff='q90'`) of it's count distribution to improve contrast. @@ -289,12 +289,12 @@ In this vignette, we load one of 8 samples (lung 5, replicate 1). We use the `Lo For this dataset, instead of performing unsupervised analysis, we map the Nanostring profiles to our Azimuth Healthy Human Lung reference, which was defined by scRNA-seq. We used Azimuth version 0.4.3 with the [human lung](https://azimuth.hubmapconsortium.org/references/#Human%20-%20Lung%20v1) reference version 1.0.0. You can download the precomputed results [here](https://seurat.nygenome.org/vignette_data/spatial_vignette_2/nanostring_data.Rds), which include annotations, prediction scores, and a UMAP visualization. The median number of detected transcripts/cell is 249, which does create uncertainty for the annotation process. ```{r load} -nano.obj <- LoadNanostring(data.dir = "../data/nanostring/lung5_rep1", fov="lung5.rep1") +nano.obj <- LoadNanostring(data.dir = "/brahms/hartmana/vignette_data/nanostring/lung5_rep1", fov="lung5.rep1") ``` ```{r integration} # add in precomputed Azimuth annotations -azimuth.data <- readRDS("../data/nanostring_data.Rds") +azimuth.data <- readRDS("/brahms/hartmana/vignette_data/nanostring_data.Rds") nano.obj <- AddMetaData(nano.obj, metadata = azimuth.data$annotations) nano.obj[["proj.umap"]] <- azimuth.data$umap Idents(nano.obj) <- nano.obj$predicted.annotation.l1 @@ -376,7 +376,7 @@ First, we load in the data of a HuBMAP dataset using the `LoadAkoya()` function ```{r} codex.obj <- LoadAkoya( - filename = "../data/LN7910_20_008_11022020_reg001_compensated.csv", + filename = "/brahms/hartmana/vignette_data/LN7910_20_008_11022020_reg001_compensated.csv", type = "processor", fov = "HBM754.WKLP.262" ) diff --git a/vignettes/spatial_vignette_2.Rmd b/vignettes/spatial_vignette_2.Rmd index eb8c76c01..b891d1cf0 100644 --- a/vignettes/spatial_vignette_2.Rmd +++ b/vignettes/spatial_vignette_2.Rmd @@ -234,7 +234,7 @@ ImageDimPlot(xenium.obj, fov = "fov", molecules = c("Gad1", "Sst", "Pvalb", "Gfa ```{r save.img, include=FALSE} library(ggplot2) plot <- ImageDimPlot(xenium.obj, fov = "fov", molecules = c("Gad1", "Gfap"), nmols = 40000, alpha=0.01, dark.background = F, mols.alpha = 0.6) + coord_flip() + scale_x_reverse() + NoLegend() -ggsave(filename = "../output/images/spatial_vignette_2.jpg", height = 7, width = 12, plot = plot, quality = 50) +ggsave(filename = "../output/images/spatial_vignette_2.jpg", height = 5, width = 9, plot = plot) ``` Here we visualize the expression level of some key layer marker genes at the per-cell level using `ImageFeaturePlot()` which is analogous to the `FeaturePlot()` function for visualizing expression on a 2D embedding. We manually adjust the `max.cutoff` for each gene to roughly the 90th percentile (which can be specified with `max.cutoff='q90'`) of it's count distribution to improve contrast. From cca9a1eca94394fb06f5a986f5aec8488f0882a5 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Tue, 7 Mar 2023 15:53:32 -0500 Subject: [PATCH 502/979] bridge integration vignette --- vignettes/seurat5_integration_bridge.Rmd | 272 +++++++++++++++++++++++ 1 file changed, 272 insertions(+) create mode 100644 vignettes/seurat5_integration_bridge.Rmd diff --git a/vignettes/seurat5_integration_bridge.Rmd b/vignettes/seurat5_integration_bridge.Rmd new file mode 100644 index 000000000..f4295a36f --- /dev/null +++ b/vignettes/seurat5_integration_bridge.Rmd @@ -0,0 +1,272 @@ +--- +title: "Dictionary Learning for cross-modality integration" +output: + html_document: + theme: united + df_print: kable + pdf_document: default +date: 'Compiled: `r format(Sys.Date(), "%B %d, %Y")`' +--- +*** +```{r setup, include=TRUE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + tidy.opts = list(width.cutoff = 95), + warning = FALSE, + error = TRUE, + message = FALSE, + fig.width = 8, + time_it = TRUE +) +``` + +In the same way that read mapping tools have transformed genome sequence analysis, the ability to map new datasets to established references represents an exciting opportunity for the field of single-cell genomics. Along with others in the community, we have developed [tools to map and interpret query datasets](https://satijalab.org/seurat/articles/multimodal_reference_mapping.html), and have also constructed a [set of scRNA-seq datasets for diverse mammalian tissues](http://azimuth.hubmapconsortium.org). + +A key challenge is to extend this reference mapping framework to technologies that do not measure gene expression, even if the underlying reference is based on scRNA-seq. In [Hao et al, bioRxiv 2022](https://www.biorxiv.org/content/10.1101/2022.02.24.481684v1), we introduce 'bridge integration', which enables the mapping of complementary technologies (like scATAC-seq, scDNAme, CyTOF), onto scRNA-seq references, using a 'multi-omic' dataset as a molecular bridge. In this vignette, we demonstrate how to map an scATAC-seq dataset of human PBMC, onto our previously constructed [PBMC reference](https://azimuth.hubmapconsortium.org/references/human_pbmc/). We use a publicly available 10x multiome dataset, which simultaneously measures gene expression and chromatin accessibility in the same cell, as a bridge dataset. + +In this vignette we demonstrate: + +* Loading in and pre-processing the scATAC-seq, multiome, and scRNA-seq reference datasets +* Mapping the scATAC-seq dataset via bridge integration +* Exploring and assessing the resulting annotations + +First, we install the updated version of Seurat that supports this infrastructure, as well as other packages necessary for this vignette. + +```{r, message=FALSE, warning=FALSE} +library(remotes) +library(Seurat) +options(Seurat.object.assay.version = "v5") +library(SeuratDisk) +library(Signac) +library(EnsDb.Hsapiens.v86) +library(dplyr) +library(ggplot2) +``` + +## Load the bridge, query, and reference datasets + +We start by loading a 10x multiome dataset, consisting of ~12,000 PBMC from a healthy donor. The dataset measures RNA-seq and ATAC-seq in the same cell, and is available for download from 10x Genomics [here](https://www.10xgenomics.com/resources/datasets/pbmc-from-a-healthy-donor-granulocytes-removed-through-cell-sorting-10-k-1-standard-2-0-0). We follow the loading instructions from the [Signac package vignettes](https://satijalab.org/signac/articles/pbmc_multiomic.html). Note that when using Signac, please make sure you are using the [latest version of Bioconductor]([http://www.bioconductor.org/news/bioc_3_14_release/]), as [users have reported errors](https://github.com/timoast/signac/issues/687) when using older BioC versions. + +
    + **Load and setup the 10x multiome object** + +```{r} +# the 10x hdf5 file contains both data types. +inputdata.10x <- Read10X_h5("/brahms/haoy/vignette_data/pbmc_granulocyte_sorted_10k_filtered_feature_bc_matrix.h5") +# extract RNA and ATAC data +rna_counts <- inputdata.10x$`Gene Expression` +atac_counts <- inputdata.10x$Peaks +# Create Seurat object +obj.multi <- CreateSeuratObject(counts = rna_counts) +# Get % of mitochondrial genes +obj.multi[["percent.mt"]] <- PercentageFeatureSet(obj.multi, pattern = "^MT-") + +# add the ATAC-seq assay +options(Seurat.object.assay.version = "v3") +grange.counts <- StringToGRanges(rownames(atac_counts), sep = c(":", "-")) +grange.use <- seqnames(grange.counts) %in% standardChromosomes(grange.counts) +atac_counts <- atac_counts[as.vector(grange.use), ] +# Get gene annotations +annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86) +# Change style to UCSC +seqlevelsStyle(annotations) <- 'UCSC' +genome(annotations) <- "hg38" +# File with ATAC per fragment information file +frag.file <- "/brahms/haoy/vignette_data/pbmc_granulocyte_sorted_10k_atac_fragments.tsv.gz" +# Add in ATAC-seq data as ChromatinAssay object +chrom_assay <- CreateChromatinAssay( + counts = atac_counts, + sep = c(":", "-"), + genome = 'hg38', + fragments = frag.file, + min.cells = 10, + annotation = annotations +) +# Add the ATAC assay to the multiome object +obj.multi[["ATAC"]] <- chrom_assay +# Filter ATAC data based on QC metrics +obj.multi <- subset( + x = obj.multi, + subset = nCount_ATAC < 7e4 & + nCount_ATAC > 5e3 & + nCount_RNA < 25000 & + nCount_RNA > 1000 & + percent.mt < 20 +) +``` +
    + +--- + +The scATAC-seq query dataset represents ~10,000 PBMC from a healthy donor, and is available for download [here](https://www.10xgenomics.com/resources/datasets/10-k-human-pbm-cs-atac-v-1-1-chromium-x-1-1-standard-2-0-0). We load in the peak/cell matrix, store the path to the fragments file, and add gene annotations to the object, following the steps as with the ATAC data in the multiome experiment. + +We note that it is important to quantify the same set of genomic features in the query dataset as are quantified in the multi-omic bridge. We therefore requantify the set of scATAC-seq peaks using the `FeatureMatrix` command. This is also described in the [Signac vignettes](https://satijalab.org/signac/articles/integrate_atac.html) and shown below. + +
    + **Load and setup the 10x scATAC-seq query** + +```{r, message=FALSE, warning=FALSE} +# Load ATAC dataset +atac_pbmc_data <- Read10X_h5(filename = "/brahms/haoy/vignette_data/10k_PBMC_ATAC_nextgem_Chromium_X_filtered_peak_bc_matrix.h5") +fragpath <- "/brahms/haoy/vignette_data/10k_PBMC_ATAC_nextgem_Chromium_X_fragments.tsv.gz" +# Get gene annotations +annotation <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86) +# Change to UCSC style +seqlevelsStyle(annotation) <- 'UCSC' +# Create ChromatinAssay for ATAC data +atac_pbmc_assay <- CreateChromatinAssay( + counts = atac_pbmc_data, + sep = c(":", "-"), + fragments = fragpath, + annotation = annotation +) +# Requantify query ATAC to have same features as multiome ATAC dataset +requant_multiome_ATAC <- FeatureMatrix( + fragments = Fragments(atac_pbmc_assay), + features = granges(obj.multi[['ATAC']]), + cells = Cells(atac_pbmc_assay) +) +# Create assay with requantified ATAC data +ATAC_assay <- CreateChromatinAssay( + counts = requant_multiome_ATAC, + fragments = fragpath, + annotation = annotation +) +# Create Seurat sbject +obj.atac <- CreateSeuratObject(counts = ATAC_assay,assay = 'ATAC') +obj.atac[['peak.orig']] <- atac_pbmc_assay +obj.atac <- subset(obj.atac, subset = nCount_ATAC < 7e4 & nCount_ATAC > 2000) +``` +
    + +--- + +We load the reference (download [here](https://atlas.fredhutch.org/data/nygc/multimodal/pbmc_multimodal.h5seurat)) from our recent [paper](https://doi.org/10.1016/j.cell.2021.04.048). This reference is stored as an h5Seurat file, a format that enables on-disk storage of multimodal Seurat objects (more details on h5Seurat and `SeuratDisk` can be found [here](https://mojaveazure.github.io/seurat-disk/index.html)). + +```{r pbmc.ref} +obj.rna <- LoadH5Seurat("/brahms/haoy/seurat4_pbmc/pbmc_multimodal.h5seurat") +``` +
    + **What if I want to use my own reference dataset?** + +As an alternative to using a pre-built reference, you can also use your own reference. To demonstrate, you can download a scRNA-seq dataset of 23,837 human PBMC [here](https://www.dropbox.com/s/x8mu9ye2w3a63hf/20k_PBMC_scRNA.rds?dl=0), which we have already annotated. +```{r, message=FALSE, warning=FALSE, eval=FALSE} +obj.rna = readRDS("/path/to/reference.rds") +obj.rna = SCTransform(object = obj.rna) %>% RunPCA() %>% RunUMAP(dims = 1:50, return.model = TRUE) +``` +When using your own reference, set `reference.reduction = "pca"` in the `PrepareBridgeReference` function. + +
    + +--- + +# Preprocessing/normalization for all datasets + +Prior to performing bridge integration, we normalize and pre-process each of the datasets (note that the reference has already been normalized). We normalize gene expression data using [sctransform](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1874-1), and ATAC data using TF-IDF. + +```{r, message=FALSE, warning=FALSE} +# normalize multiome RNA +DefaultAssay(obj.multi) <- "RNA" +obj.multi <- SCTransform(obj.multi, verbose = FALSE) +# normalize multiome ATAC +DefaultAssay(obj.multi) <- "ATAC" +obj.multi <- RunTFIDF(obj.multi) +obj.multi <- FindTopFeatures(obj.multi, min.cutoff = "q0") +# normalize query +obj.atac <- RunTFIDF(obj.atac) +``` + +## Map scATAC-seq dataset using bridge integration + +Now that we have the reference, query, and bridge datasets set up, we can begin integration. The bridge dataset enables translation between the scRNA-seq reference and the scATAC-seq query, effectively augmenting the reference so that it can map a new data type. We call this an extended reference, and first set it up. Note that you can save the results of this function and map multiple scATAC-seq datasets without having to rerun. + + +```{r, message=FALSE, warning=FALSE} +# Drop first dimension for ATAC reduction +dims.atac <- 2:50 +dims.rna <- 1:50 +DefaultAssay(obj.multi) <- "RNA" +DefaultAssay(obj.rna) <- "SCT" +obj.rna.ext <- PrepareBridgeReference(reference = obj.rna, + bridge = obj.multi, + reference.reduction = "spca", + reference.dims = dims.rna, + normalization.method = "SCT" +) +``` + +Now, we can directly find anchors between the extended reference and query objects. We use the `FindBridgeTransferAnchors` function, which translates the query dataset using the same dictionary as was used to translate the reference, and then identifies anchors in this space. The function is meant to mimic our `FindTransferAnchors` function, but to identify correspondences across modalities. + +```{r, message=FALSE, warning=FALSE} +bridge.anchor <- FindBridgeTransferAnchors(extended.reference = obj.rna.ext, + query = obj.atac, + reduction = "lsiproject", + dims = dims.atac +) +``` + + +Once we have identified anchors, we can map the query dataset onto the reference. The `MapQuery` function is the same as we have [previously introduced for reference mapping](https://satijalab.org/seurat/articles/multimodal_reference_mapping.html) . It transfers cell annotations from the reference dataset, and also visualizes the query dataset on a previously computed UMAP embedding. Since our reference dataset contains cell type annotations at three levels of resolution (l1 - l3), we can transfer each level to the query dataset. + + +```{r, message=FALSE, warning=FALSE} +obj.atac <- MapQuery(anchorset = bridge.anchor, + reference = obj.rna.ext, + query = obj.atac, + refdata = list( + l1 = "celltype.l1", + l2 = "celltype.l2", + l3 = "celltype.l3"), + reduction.model = "wnn.umap" +) +``` + +Now we can visualize the results, plotting the scATAC-seq cells based on their predicted annotations, on the reference UMAP embedding. You can see that each scATAC-seq cell has been assigned a cell name based on the scRNA-seq defined cell ontology. + +```{r, message=FALSE, warning=FALSE} +DimPlot(obj.atac, group.by = "predicted.l2", reduction = "ref.umap", label = TRUE) + ggtitle("ATAC") + NoLegend() +``` + +## Assessing the mapping + +To assess the mapping and cell type predictions, we will first see if the predicted cell type labels are concordant with an unsupervised analysis of the scATAC-seq dataset. We follow the standard unsupervised processing workflow for scATAC-seq data: + +```{r, message=FALSE, warning=FALSE} +obj.atac <- FindTopFeatures(obj.atac, min.cutoff = "q0") +obj.atac <- RunSVD(obj.atac) +obj.atac <- RunUMAP(obj.atac, reduction = "lsi", dims = 2:50) +``` + +Now, we visualize the predicted cluster labels on the unsupervised UMAP emebdding. We can see that predicted cluster labels (from the scRNA-seq reference) are concordant with the structure of the scATAC-seq data. However, there are some cell types (i.e. Treg), that do not appear to separate in unsupervised analysis. These may be prediction errors, or cases where the reference mapping provides additional resolution. + +```{r, pbmcdimplots, message=FALSE, warning=FALSE} +DimPlot(obj.atac, group.by = "predicted.l2", reduction = "umap", label = FALSE) +``` + +Lastly, we validate the predicted cell types for the scATAC-seq data by examining their chromatin accessibility profiles at canonical loci. We use the `CoveragePlot` function to visualize accessibility patterns at the CD8A, FOXP3, and RORC, after grouping cells by their predicted labels. We see expected patterns in each case. For example, the PAX5 locus exhibits peaks that are accessible exclusively in B cells, and the CD8A locus shows the same in CD8 T cell subsets. Similarly, the accessibility of FOXP3, a canonical marker of regulatory T cells (Tregs), in predicted Tregs provides strong support for the accuracy of our prediction. + +```{r, message=FALSE, warning=FALSE} +CoveragePlot(obj.atac, region = "PAX5", group.by = "predicted.l1", idents = c("B", "CD4 T", "Mono", "NK"), window = 200, extend.upstream = -150000) +CoveragePlot(obj.atac, region = "CD8A", group.by = "predicted.l2", idents = c("CD8 Naive", "CD4 Naive", "CD4 TCM", "CD8 TCM"), extend.downstream = 5000, extend.upstream = 5000) +CoveragePlot(obj.atac, region = "FOXP3", group.by = "predicted.l2", idents = c( "CD4 Naive", "CD4 TCM", "CD4 TEM", "Treg"), extend.downstream = 0, extend.upstream = 0) +CoveragePlot(obj.atac, region = "RORC", group.by = "predicted.l2", idents = c("CD8 Naive", "CD8 TEM", "CD8 TCM", "MAIT"), extend.downstream = 5000, extend.upstream = 5000) +``` + +
    + **Session Info** +```{r} +sessionInfo() +``` +
    From 231489577accbbaea0037152bab73970ff949b51 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Wed, 8 Mar 2023 12:12:54 -0500 Subject: [PATCH 503/979] update bridge integration vignette for website --- _pkgdown.yaml | 8 ++++++++ vignettes/seurat5_integration_bridge.Rmd | 16 ++++++++-------- vignettes/vignettes_v5.yaml | 6 ++++++ 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/_pkgdown.yaml b/_pkgdown.yaml index bec6a63bd..8841d4c53 100644 --- a/_pkgdown.yaml +++ b/_pkgdown.yaml @@ -133,6 +133,14 @@ navbar: href: articles/seurat5_interaction_vignette.html - text: "Merging Seurat objects" href: articles/seurat5_merge_vignette.html + - text: "Bridge Integration" + href: articles/seurat5_integration_bridge.html + - text: "Sketch Clustering (BPCells)" + href: articles/MouseBrain_sketch_clustering.html + - text: "COVID Mapping (BPCells)" + href: articles/COVID_SCTMapping.html + - text: "Sketch Integration (BPCells)" + href: articles/ParseBio_sketch_integration.html - text: Extensions href: articles/extensions.html - text: FAQ diff --git a/vignettes/seurat5_integration_bridge.Rmd b/vignettes/seurat5_integration_bridge.Rmd index f4295a36f..f4562efa1 100644 --- a/vignettes/seurat5_integration_bridge.Rmd +++ b/vignettes/seurat5_integration_bridge.Rmd @@ -63,8 +63,8 @@ We start by loading a 10x multiome dataset, consisting of ~12,000 PBMC from a he **Load and setup the 10x multiome object** ```{r} -# the 10x hdf5 file contains both data types. -inputdata.10x <- Read10X_h5("/brahms/haoy/vignette_data/pbmc_granulocyte_sorted_10k_filtered_feature_bc_matrix.h5") +# the 10x hdf5 file contains both data types. +inputdata.10x <- Read10X_h5("../data/pbmc_granulocyte_sorted_10k_filtered_feature_bc_matrix.h5") # extract RNA and ATAC data rna_counts <- inputdata.10x$`Gene Expression` atac_counts <- inputdata.10x$Peaks @@ -80,11 +80,11 @@ grange.use <- seqnames(grange.counts) %in% standardChromosomes(grange.counts) atac_counts <- atac_counts[as.vector(grange.use), ] # Get gene annotations annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86) -# Change style to UCSC +# Change style to UCSC seqlevelsStyle(annotations) <- 'UCSC' genome(annotations) <- "hg38" -# File with ATAC per fragment information file -frag.file <- "/brahms/haoy/vignette_data/pbmc_granulocyte_sorted_10k_atac_fragments.tsv.gz" +# File with ATAC per fragment information file +frag.file <- "../data/pbmc_granulocyte_sorted_10k_atac_fragments.tsv.gz" # Add in ATAC-seq data as ChromatinAssay object chrom_assay <- CreateChromatinAssay( counts = atac_counts, @@ -119,8 +119,8 @@ We note that it is important to quantify the same set of genomic features in the ```{r, message=FALSE, warning=FALSE} # Load ATAC dataset -atac_pbmc_data <- Read10X_h5(filename = "/brahms/haoy/vignette_data/10k_PBMC_ATAC_nextgem_Chromium_X_filtered_peak_bc_matrix.h5") -fragpath <- "/brahms/haoy/vignette_data/10k_PBMC_ATAC_nextgem_Chromium_X_fragments.tsv.gz" +atac_pbmc_data <- Read10X_h5(filename = "../data/10k_PBMC_ATAC_nextgem_Chromium_X_filtered_peak_bc_matrix.h5") +fragpath <- "../data/10k_PBMC_ATAC_nextgem_Chromium_X_fragments.tsv.gz" # Get gene annotations annotation <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86) # Change to UCSC style @@ -156,7 +156,7 @@ obj.atac <- subset(obj.atac, subset = nCount_ATAC < 7e4 & nCount_ATAC > 2000) We load the reference (download [here](https://atlas.fredhutch.org/data/nygc/multimodal/pbmc_multimodal.h5seurat)) from our recent [paper](https://doi.org/10.1016/j.cell.2021.04.048). This reference is stored as an h5Seurat file, a format that enables on-disk storage of multimodal Seurat objects (more details on h5Seurat and `SeuratDisk` can be found [here](https://mojaveazure.github.io/seurat-disk/index.html)). ```{r pbmc.ref} -obj.rna <- LoadH5Seurat("/brahms/haoy/seurat4_pbmc/pbmc_multimodal.h5seurat") +obj.rna <- LoadH5Seurat("../data/pbmc_multimodal.h5seurat") ```
    **What if I want to use my own reference dataset?** diff --git a/vignettes/vignettes_v5.yaml b/vignettes/vignettes_v5.yaml index ff9ae40dd..4cf722647 100644 --- a/vignettes/vignettes_v5.yaml +++ b/vignettes/vignettes_v5.yaml @@ -42,6 +42,12 @@ Mitigate the effects of cell cycle heterogeneity by computing cell cycle phase scores based on marker genes. image: cell_cycle_vignette.jpg + - title: Bridge Integration + name: seurat5_integration_bridge + summary: | + Map scATAC-seq onto an scRNA-seq reference using a multi-omic bridge dataset. + image: bridge_integration.png + - title: Sketch Clustering (BPCells) name: MouseBrain_sketch_clustering summary: | From 161e6979c0222286f492616faa2e2cf12ba1f938 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Thu, 9 Mar 2023 21:09:13 -0500 Subject: [PATCH 504/979] rctd --- vignettes/seurat5_spatial_vignette.Rmd | 21 +++++++++++++-------- vignettes/spatial_vignette.Rmd | 20 ++++++++++++-------- 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/vignettes/seurat5_spatial_vignette.Rmd b/vignettes/seurat5_spatial_vignette.Rmd index b804304bf..3d3c2d22f 100644 --- a/vignettes/seurat5_spatial_vignette.Rmd +++ b/vignettes/seurat5_spatial_vignette.Rmd @@ -272,7 +272,7 @@ We consistently found superior performance using integration methods (as opposed We first load the data (download available [here](https://www.dropbox.com/s/cuowvm4vrf65pvq/allen_cortex.rds?dl=1)), pre-process the scRNA-seq reference, and then perform label transfer. The procedure outputs, for each spot, a probabilistic classification for each of the scRNA-seq derived classes. We add these predictions as a new assay in the Seurat object. ```{r sc.data} -allen_reference <- readRDS("../data/allen_cortex.rds") +allen_reference <- readRDS("/home/hartmana/github/seurat-private/data/allen_cortex.rds") allen_reference <- UpdateSeuratObject(allen_reference) ``` @@ -395,7 +395,7 @@ We then normalize the data using [sctransform](https://genomebiology.biomedcentr ```{r preprocess.ss} slide.seq <- SCTransform(slide.seq, assay = "Spatial", ncells = 3000, verbose = FALSE) -slide.seq <- RunPCA(slide.seq) +slide.seq <- RunPCA(slide.seq, assay = "SCT") slide.seq <- RunUMAP(slide.seq, dims = 1:30) slide.seq <- FindNeighbors(slide.seq, dims = 1:30) slide.seq <- FindClusters(slide.seq, resolution = 0.3, verbose = FALSE) @@ -415,7 +415,7 @@ SpatialDimPlot(slide.seq, cells.highlight = CellsByIdentities(object = slide.seq To facilitate cell-type annotation of the Slide-seq dataset, we are leveraging an existing mouse single-cell RNA-seq hippocampus dataset, produced in [Saunders\*, Macosko\*, et al. 2018](https://doi.org/10.1016/j.cell.2018.07.028). The data is available for download as a processed Seurat object [here](https://www.dropbox.com/s/cs6pii5my4p3ke3/mouse_hippocampus_reference.rds?dl=0), with the raw count matrices available on the [DropViz website](http://dropviz.org/). ```{r ref.saunders} -ref <- readRDS("../data/mouse_hippocampus_reference.rds") +ref <- readRDS("/home/hartmana/github/seurat-private/data/mouse_hippocampus_reference.rds") ref <- UpdateSeuratObject(ref) ``` @@ -467,11 +467,11 @@ devtools::install_github("dmcable/spacexr", build_vignettes = FALSE) Annotation using RCTD -```{r rctd, eval=FALSE} +```{r rctd, warning=FALSE, results=FALSE} library(spacexr) # set up reference -ref <- readRDS("../data/mouse_hippocampus_reference.rds") +ref <- readRDS("/home/hartmana/github/seurat-private/data/mouse_hippocampus_reference.rds") ref <- UpdateSeuratObject(ref) Idents(ref) <- "celltype" counts <- LayerData(ref, assay = "RNA", layer = "counts") @@ -482,20 +482,25 @@ names(nUMI) <- colnames(ref) reference <- Reference(counts, cluster, nUMI) # set up query -slide.seq <- LodaData("ssHippo") +slide.seq <- SeuratData::LoadData("ssHippo") +slide.seq[['Spatial']] <- as(slide.seq[['Spatial']], Class = 'Assay5') counts <- LayerData(slide.seq, assay = "Spatial", layer = "counts") coords <- GetTissueCoordinates(slide.seq) colnames(coords) <- c("x", "y") coords[is.na(colnames(coords))] <- NULL query <- SpatialRNA(coords, counts, colSums(counts)) -RCTD <- create.RCTD(query, reference, max_cores = 1) +RCTD <- create.RCTD(query, reference, max_cores = 8) RCTD <- run.RCTD(RCTD, doublet_mode = 'doublet') +``` +Next, plot the RCTD annotations. Because we ran RCTD in doublet mode, the algorithm assigns a `first_type` and `second_type` for each barcode or spot + +```{r rctd_results, fig.height=8, fig.width=14} slide.seq <- AddMetaData(slide.seq, metadata = RCTD@results$results_df) p1 <- SpatialDimPlot(slide.seq, group.by = "first_type") p2 <- SpatialDimPlot(slide.seq, group.by = "second_type") -p1 + p2 +p1 | p2 ``` ```{r save.times, include=TRUE} diff --git a/vignettes/spatial_vignette.Rmd b/vignettes/spatial_vignette.Rmd index 76f74163d..0df34dd12 100644 --- a/vignettes/spatial_vignette.Rmd +++ b/vignettes/spatial_vignette.Rmd @@ -270,7 +270,7 @@ We consistently found superior performance using integration methods (as opposed We first load the data (download available [here](https://www.dropbox.com/s/cuowvm4vrf65pvq/allen_cortex.rds?dl=1)), pre-process the scRNA-seq reference, and then perform label transfer. The procedure outputs, for each spot, a probabilistic classification for each of the scRNA-seq derived classes. We add these predictions as a new assay in the Seurat object. ```{r sc.data} -allen_reference <- readRDS("../data/allen_cortex.rds") +allen_reference <- readRDS("/home/hartmana/github/seurat-private/data/allen_cortex.rds") ``` ```{r sc.data2} @@ -410,7 +410,7 @@ SpatialDimPlot(slide.seq, cells.highlight = CellsByIdentities(object = slide.seq To facilitate cell-type annotation of the Slide-seq dataset, we are leveraging an existing mouse single-cell RNA-seq hippocampus dataset, produced in [Saunders\*, Macosko\*, et al. 2018](https://doi.org/10.1016/j.cell.2018.07.028). The data is available for download as a processed Seurat object [here](https://www.dropbox.com/s/cs6pii5my4p3ke3/mouse_hippocampus_reference.rds?dl=0), with the raw count matrices available on the [DropViz website](http://dropviz.org/). ```{r ref.saunders} -ref <- readRDS("../data/mouse_hippocampus_reference.rds") +ref <- readRDS("/home/hartmana/github/seurat-private/data/mouse_hippocampus_reference.rds") ref <- UpdateSeuratObject(ref) ``` @@ -462,11 +462,11 @@ devtools::install_github("dmcable/spacexr", build_vignettes = FALSE) Annotation using RCTD -```{r rctd, eval=FALSE} +```{r rctd, warning=FALSE, results=FALSE} library(spacexr) # set up reference -ref <- readRDS("../data/mouse_hippocampus_reference.rds") +ref <- readRDS("/home/hartmana/github/seurat-private/data/mouse_hippocampus_reference.rds") ref <- UpdateSeuratObject(ref) Idents(ref) <- "celltype" counts <- LayerData(ref, assay = "RNA", layer = "counts") @@ -477,23 +477,27 @@ names(nUMI) <- colnames(ref) reference <- Reference(counts, cluster, nUMI) # set up query -slide.seq <- LodaData("ssHippo") +slide.seq <- SeuratData::LoadData("ssHippo") counts <- LayerData(slide.seq, assay = "Spatial", layer = "counts") coords <- GetTissueCoordinates(slide.seq) colnames(coords) <- c("x", "y") coords[is.na(colnames(coords))] <- NULL query <- SpatialRNA(coords, counts, colSums(counts)) -RCTD <- create.RCTD(query, reference, max_cores = 1) +RCTD <- create.RCTD(query, reference, max_cores = 8) RCTD <- run.RCTD(RCTD, doublet_mode = 'doublet') +``` + +Next, plot the RCTD annotations. Because we ran RCTD in doublet mode, the algorithm assigns a `first_type` and `second_type` for each barcode or spot +```{r rctd_results, fig.height=8, fig.width=14} slide.seq <- AddMetaData(slide.seq, metadata = RCTD@results$results_df) p1 <- SpatialDimPlot(slide.seq, group.by = "first_type") p2 <- SpatialDimPlot(slide.seq, group.by = "second_type") -p1 + p2 +p1 | p2 ``` -```{r save.times, include = FALSE} +```{r save.times, include = TRUE} write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/spatial_vignette_times.csv") ``` From 53e361c325077525da0fbf42ec770f649aaf1c04 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Thu, 9 Mar 2023 21:27:02 -0500 Subject: [PATCH 505/979] use cellranger arc 2.0.0 outs --- vignettes/seurat5_integration_bridge.Rmd | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vignettes/seurat5_integration_bridge.Rmd b/vignettes/seurat5_integration_bridge.Rmd index f4562efa1..6f07c095e 100644 --- a/vignettes/seurat5_integration_bridge.Rmd +++ b/vignettes/seurat5_integration_bridge.Rmd @@ -64,7 +64,7 @@ We start by loading a 10x multiome dataset, consisting of ~12,000 PBMC from a he ```{r} # the 10x hdf5 file contains both data types. -inputdata.10x <- Read10X_h5("../data/pbmc_granulocyte_sorted_10k_filtered_feature_bc_matrix.h5") +inputdata.10x <- Read10X_h5("../data/pbmc_cellranger_arc_2/pbmc_granulocyte_sorted_10k_filtered_feature_bc_matrix.h5") # extract RNA and ATAC data rna_counts <- inputdata.10x$`Gene Expression` atac_counts <- inputdata.10x$Peaks @@ -84,7 +84,7 @@ annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86) seqlevelsStyle(annotations) <- 'UCSC' genome(annotations) <- "hg38" # File with ATAC per fragment information file -frag.file <- "../data/pbmc_granulocyte_sorted_10k_atac_fragments.tsv.gz" +frag.file <- "../data/pbmc_cellranger_arc_2/pbmc_granulocyte_sorted_10k_atac_fragments.tsv.gz" # Add in ATAC-seq data as ChromatinAssay object chrom_assay <- CreateChromatinAssay( counts = atac_counts, @@ -94,7 +94,7 @@ chrom_assay <- CreateChromatinAssay( min.cells = 10, annotation = annotations ) -# Add the ATAC assay to the multiome object +# Add the ATAC assay to the multiome object obj.multi[["ATAC"]] <- chrom_assay # Filter ATAC data based on QC metrics obj.multi <- subset( From ec6c3887024c5a74b27fc8b8dd7bcef684bd0d38 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Thu, 9 Mar 2023 23:54:36 -0500 Subject: [PATCH 506/979] Fix SCT for single cell grids --- R/preprocessing5.R | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index bc9cd8004..ebd3cca23 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -540,7 +540,7 @@ NormalizeData.default <- function( ... ) } - }, + }, 'CLR' = { if (!inherits(x = object, what = 'dgCMatrix') && !inherits(x = object, what = 'matrix')) { @@ -560,7 +560,7 @@ NormalizeData.default <- function( !inherits(x = object, what = 'matrix')) { stop('RC normalization only supports for dense and dgCMatrix') } - RelativeCounts(data = object, + RelativeCounts(data = object, scale.factor = scale.factor, verbose = verbose) } @@ -1013,7 +1013,7 @@ CalcDispersion <- function( } feature.mean <- mean.function(object, verbose) feature.dispersion <- dispersion.function(object, verbose) - + names(x = feature.mean) <- names(x = feature.dispersion) <- rownames(x = object) feature.dispersion[is.na(x = feature.dispersion)] <- 0 feature.mean[is.na(x = feature.mean)] <- 0 @@ -1061,7 +1061,7 @@ CalcN <- function(object) { } #' Find variable features based on dispersion -#' +#' DISP <- function( data, nselect = 2000L, @@ -1452,7 +1452,6 @@ SCTransform.StdAssay <- function( seed.use = seed.use, verbose = FALSE, ...) - residual.type <- vst.out[['residual_type']] %||% 'pearson' sct.method <- vst.out[['sct.method']] # create output assay and put (corrected) umi counts in count slot @@ -1499,7 +1498,13 @@ SCTransform.StdAssay <- function( message("Using block ", selected.block, " from ", dataset.names[[dataset.index]], " to learn model.") } vp <- cells.grid[[selected.block]] - assay.out <- GetSCT.Chunked(vp = vp, do.correct.umi = FALSE) + + do.correct.umi.chunk <- FALSE + # correct umi if only single chunk + if (length(x = cells.grid) == 1) { + do.correct.umi.chunk <- TRUE + } + assay.out <- GetSCT.Chunked(vp = vp, do.correct.umi = do.correct.umi.chunk) local.reference.SCT.model <- assay.out@SCTModel.list[[1]] variable.features <- VariableFeatures(assay.out) # once we have the model, just calculate residuals for all @@ -1518,7 +1523,7 @@ SCTransform.StdAssay <- function( residuals <- list() corrected_counts <- list() cell_attrs <- list() - if (length(cells.grid)==1){ + if (length(x = cells.grid) == 1){ merged.assay <- assay.out corrected_counts[[1]] <- GetAssayData(object = assay.out, slot="data") residuals[[1]] <- GetAssayData(object = assay.out, slot="scale.data") @@ -2244,4 +2249,4 @@ MVP <- function( hvf.info[hvf.info$variable,'rank'] <- rank(x = hvf.info[hvf.info$variable,'rank']) hvf.info[!hvf.info$variable,'rank'] <- NA return(hvf.info) -} \ No newline at end of file +} From 3f3e4da58013ed4edac6171862b2a09eed85b80b Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Fri, 10 Mar 2023 09:18:52 -0500 Subject: [PATCH 507/979] fix paths --- vignettes/seurat5_spatial_vignette.Rmd | 6 +++--- vignettes/spatial_vignette.Rmd | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/vignettes/seurat5_spatial_vignette.Rmd b/vignettes/seurat5_spatial_vignette.Rmd index 3d3c2d22f..0fa623e74 100644 --- a/vignettes/seurat5_spatial_vignette.Rmd +++ b/vignettes/seurat5_spatial_vignette.Rmd @@ -272,7 +272,7 @@ We consistently found superior performance using integration methods (as opposed We first load the data (download available [here](https://www.dropbox.com/s/cuowvm4vrf65pvq/allen_cortex.rds?dl=1)), pre-process the scRNA-seq reference, and then perform label transfer. The procedure outputs, for each spot, a probabilistic classification for each of the scRNA-seq derived classes. We add these predictions as a new assay in the Seurat object. ```{r sc.data} -allen_reference <- readRDS("/home/hartmana/github/seurat-private/data/allen_cortex.rds") +allen_reference <- readRDS("../data/allen_cortex.rds") allen_reference <- UpdateSeuratObject(allen_reference) ``` @@ -415,7 +415,7 @@ SpatialDimPlot(slide.seq, cells.highlight = CellsByIdentities(object = slide.seq To facilitate cell-type annotation of the Slide-seq dataset, we are leveraging an existing mouse single-cell RNA-seq hippocampus dataset, produced in [Saunders\*, Macosko\*, et al. 2018](https://doi.org/10.1016/j.cell.2018.07.028). The data is available for download as a processed Seurat object [here](https://www.dropbox.com/s/cs6pii5my4p3ke3/mouse_hippocampus_reference.rds?dl=0), with the raw count matrices available on the [DropViz website](http://dropviz.org/). ```{r ref.saunders} -ref <- readRDS("/home/hartmana/github/seurat-private/data/mouse_hippocampus_reference.rds") +ref <- readRDS("../data/mouse_hippocampus_reference.rds") ref <- UpdateSeuratObject(ref) ``` @@ -471,7 +471,7 @@ Annotation using RCTD library(spacexr) # set up reference -ref <- readRDS("/home/hartmana/github/seurat-private/data/mouse_hippocampus_reference.rds") +ref <- readRDS("../data/mouse_hippocampus_reference.rds") ref <- UpdateSeuratObject(ref) Idents(ref) <- "celltype" counts <- LayerData(ref, assay = "RNA", layer = "counts") diff --git a/vignettes/spatial_vignette.Rmd b/vignettes/spatial_vignette.Rmd index 0df34dd12..00f9d7a50 100644 --- a/vignettes/spatial_vignette.Rmd +++ b/vignettes/spatial_vignette.Rmd @@ -270,7 +270,7 @@ We consistently found superior performance using integration methods (as opposed We first load the data (download available [here](https://www.dropbox.com/s/cuowvm4vrf65pvq/allen_cortex.rds?dl=1)), pre-process the scRNA-seq reference, and then perform label transfer. The procedure outputs, for each spot, a probabilistic classification for each of the scRNA-seq derived classes. We add these predictions as a new assay in the Seurat object. ```{r sc.data} -allen_reference <- readRDS("/home/hartmana/github/seurat-private/data/allen_cortex.rds") +allen_reference <- readRDS("../data/allen_cortex.rds") ``` ```{r sc.data2} @@ -410,7 +410,7 @@ SpatialDimPlot(slide.seq, cells.highlight = CellsByIdentities(object = slide.seq To facilitate cell-type annotation of the Slide-seq dataset, we are leveraging an existing mouse single-cell RNA-seq hippocampus dataset, produced in [Saunders\*, Macosko\*, et al. 2018](https://doi.org/10.1016/j.cell.2018.07.028). The data is available for download as a processed Seurat object [here](https://www.dropbox.com/s/cs6pii5my4p3ke3/mouse_hippocampus_reference.rds?dl=0), with the raw count matrices available on the [DropViz website](http://dropviz.org/). ```{r ref.saunders} -ref <- readRDS("/home/hartmana/github/seurat-private/data/mouse_hippocampus_reference.rds") +ref <- readRDS("../data/mouse_hippocampus_reference.rds") ref <- UpdateSeuratObject(ref) ``` @@ -466,7 +466,7 @@ Annotation using RCTD library(spacexr) # set up reference -ref <- readRDS("/home/hartmana/github/seurat-private/data/mouse_hippocampus_reference.rds") +ref <- readRDS("../data/mouse_hippocampus_reference.rds") ref <- UpdateSeuratObject(ref) Idents(ref) <- "celltype" counts <- LayerData(ref, assay = "RNA", layer = "counts") From f695dafc6af3b70b87d169cb7e5bda5b2dea48da Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 14 Mar 2023 11:56:46 -0400 Subject: [PATCH 508/979] replace leverageScoreSampling by sketchData --- R/sketching.R | 60 +++++++++++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 26 deletions(-) diff --git a/R/sketching.R b/R/sketching.R index 44104ee42..09d357475 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -76,46 +76,56 @@ DelayedLeverageScore <- function( #' #' @export #' -LeverageScoreSampling <- function( +#' +#' +SketchData <- function( object, assay = NULL, ncells = 5000L, - save = 'sketch', + sketched.assay = 'sketch', + method = c('LeverageScore', 'Uniform'), var.name = "leverage.score", over.write = FALSE, - default = TRUE, seed = 123L, - cast = NULL, - layers = c('counts', 'data'), + cast = 'dgCMatrix', verbose = TRUE, ... ) { assay <- assay[1L] %||% DefaultAssay(object = object) assay <- match.arg(arg = assay, choices = Assays(object = object)) - if (save == assay) { + method <- match.arg(arg = method) + if (sketched.assay == assay) { abort(message = "Cannot overwrite existing assays") } - if (save %in% Assays(object = object)) { - if (save == DefaultAssay(object = object)) { + if (sketched.assay %in% Assays(object = object)) { + if (sketched.assay == DefaultAssay(object = object)) { DefaultAssay(object = object) <- assay } - object[[save]] <- NULL + object[[sketched.assay]] <- NULL } if (!over.write) { var.name <- CheckMetaVarName(object = object, var.name = var.name) } - if (verbose) { - message("Calcuating Leverage Score") + + if (method == 'LeverageScore') { + if (verbose) { + message("Calcuating Leverage Score") + } + object <- LeverageScore( + object = object, + assay = assay, + var.name = var.name, + over.write = over.write, + seed = seed, + verbose = verbose, + ... + ) + } else if (method == 'Uniform') { + if (verbose) { + message("Uniformly sampling") + } + object[[var.name]] <- 1 } - object <- LeverageScore( - object = object, - assay = assay, - var.name = var.name, - over.write = over.write, - seed = seed, - verbose = verbose, - ... - ) leverage.score <- object[[var.name]] layers.data <- Layers(object = object[[assay]], search = 'data') cells <- lapply( @@ -137,7 +147,7 @@ LeverageScoreSampling <- function( sketched <- suppressWarnings(expr = subset( x = object[[assay]], cells = unlist(cells), - layers = Layers(object = object[[assay]], search = layers) + layers = Layers(object = object[[assay]], search = c('counts', 'data')) )) for (lyr in layers.data) { try( @@ -149,11 +159,9 @@ LeverageScoreSampling <- function( if (!is.null(x = cast)) { sketched <- CastAssay(object = sketched, to = cast, ...) } - Key(object = sketched) <- Key(object = save, quiet = TRUE) - object[[save]] <- sketched - if (isTRUE(x = default)) { - DefaultAssay(object = object) <- save - } + Key(object = sketched) <- Key(object = sketched.assay, quiet = TRUE) + object[[sketched.assay]] <- sketched + DefaultAssay(object = object) <- sketched.assay return(object) } From 283c607a19e38fe536d292c4ee3ab0e3d0abe5cd Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 14 Mar 2023 16:07:38 -0400 Subject: [PATCH 509/979] update docu --- NAMESPACE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index 51fcee1c2..418ad7bb0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -300,7 +300,6 @@ export(L2Dim) export(LabelClusters) export(LabelPoints) export(LeverageScore) -export(LeverageScoreSampling) export(LinkedDimPlot) export(LinkedFeaturePlot) export(Load10X_Spatial) @@ -414,6 +413,7 @@ export(SingleImageMap) export(SingleImagePlot) export(SingleRasterMap) export(SingleSpatialPlot) +export(SketchData) export(SpatialDimPlot) export(SpatialFeaturePlot) export(SpatialPlot) From ba20efb46a2683fdb4b83c6ae584feb763fd993b Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 14 Mar 2023 16:11:50 -0400 Subject: [PATCH 510/979] bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 945e74f55..57b999bda 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9035 -Date: 2023-02-18 +Version: 4.9.9.9036 +Date: 2023-03-14 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From a137a1fdebdc619b48b8a3f49197a4cf156ed45a Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 14 Mar 2023 17:28:08 -0400 Subject: [PATCH 511/979] add projectdata --- R/integration.R | 110 ----------------------- R/sketching.R | 231 +++++++++++++++++++++++++++++++++++------------- 2 files changed, 172 insertions(+), 169 deletions(-) diff --git a/R/integration.R b/R/integration.R index 9e72767ec..4f8a3dc4e 100644 --- a/R/integration.R +++ b/R/integration.R @@ -3516,116 +3516,6 @@ TransferData <- function( } - - -#' Transfer data from sketch data to full data -#' @export -#' -TransferSketchLabels <- function( - object, - atoms = 'sketch', - reduction, - dims, - refdata, - k = 50, - reduction.model = NULL, - neighbors = NULL, - recompute.neighbors = FALSE, - recompute.weights = FALSE, - verbose = TRUE -){ - - full_sketch.nn <- neighbors %||% Tool( - object = object, - slot = 'TransferSketchLabels' - )$full_sketch.nn - full_sketch.weight <- Tool( - object = object, - slot = 'TransferSketchLabels' - )$full_sketch.weight - - compute.neighbors <- is.null(x = full_sketch.nn) || - !all(Cells(full_sketch.nn) == Cells(object[[reduction]])) || - max(Indices(full_sketch.nn)) > ncol(object[[atoms]]) || - recompute.neighbors - compute.weights <- is.null(x = full_sketch.weight) || - !all(colnames(full_sketch.weight) == Cells(object[[reduction]])) || - !all(rownames(full_sketch.weight) == colnames(object[[atoms]])) || - recompute.weights - - if (compute.neighbors) { - if (verbose) { - message("Finding sketch neighbors") - } - full_sketch.nn <- Seurat:::NNHelper( - query = Embeddings(object[[reduction]])[, dims], - data = Embeddings(object[[reduction]])[colnames(object[[atoms]]), dims], - k = k, - method = "annoy" - ) - } - if (compute.weights) { - if (verbose) { - message("Finding sketch weight matrix") - } - full_sketch.weight <- FindWeightsNN(nn.obj = full_sketch.nn, - query.cells = Cells(object[[reduction]]), - reference = colnames(object[[atoms]]), - verbose = verbose) - rownames(full_sketch.weight) <- colnames(object[[atoms]]) - colnames(full_sketch.weight) <- Cells(object[[reduction]]) - } - object@tools$TransferSketchLabels$full_sketch.nn <- full_sketch.nn - object@tools$TransferSketchLabels$full_sketch.weight <- full_sketch.weight - - if (length(refdata) == 1 & is.character(refdata)) { - refdata <- list(refdata) - names(refdata) <- unlist(refdata) - } - if (verbose) { - message("Transfering refdata from sketch") - } - for (rd in 1:length(x = refdata)) { - if (isFALSE(x = refdata[[rd]])) { - transfer.results[[rd]] <- NULL - next - } - rd.name <- names(x = refdata)[rd] - label.rd <- refdata[[rd]] - ## FetchData not work - reference.labels <- object@meta.data[colnames(object[[atoms]]), label.rd] - predicted.labels.list <- TransferLablesNN( - reference.labels = reference.labels, - weight.matrix = full_sketch.weight) - object[[paste0('predicted.', rd.name)]] <- predicted.labels.list$labels - object[[paste0('predicted.', rd.name, '.score')]] <- predicted.labels.list$scores - } - if (!is.null(reduction.model)) { - umap.model <- Misc(object = object[[reduction.model]], slot = 'model') - if (is.null(umap.model)) { - warning(reduction.model, ' does not have a stored umap model') - return(object) - } - if (verbose) { - message("Projection to sketch umap") - } - if (ncol(full_sketch.nn) > umap.model$n_neighbors) { - full_sketch.nn@nn.idx <- full_sketch.nn@nn.idx[, 1:umap.model$n_neighbors] - full_sketch.nn@nn.dist <- full_sketch.nn@nn.dist[, 1:umap.model$n_neighbors] - } - proj.umap <- RunUMAP( - object = full_sketch.nn, - reduction.model = object[[reduction.model]], - verbose = verbose, - assay = slot(object = object[[reduction]], name = 'assay.used') - ) - Key(proj.umap) <- paste0('ref', Key(proj.umap)) - object[[paste0('ref.',reduction.model )]] <- proj.umap - } - return(object) -} - - #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Methods for Seurat-defined generics #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/R/sketching.R b/R/sketching.R index 09d357475..a70f6683e 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -12,65 +12,6 @@ NULL # Functions #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -DelayedLeverageScore <- function( - object, - assay = NULL, - nsketch = 5000L, - ncells = 5000L, - layer = 'data', - save = 'sketch', - method = CountSketch, - eps = 0.5, - default = TRUE, - seed = NA_integer_, - # cast = NULL, - verbose = TRUE, - ... -) { - .NotYetImplemented() - check_installed( - pkg = 'DelayedArray', - reason = 'for working with delayed matrices' - ) - assay <- assay[1L] %||% DefaultAssay(object = object) - assay <- match.arg(arg = assay, choices = Assays(object = object)) - # TODO: fix this in [[<-,Seurat5 - if (save == assay) { - abort(message = "Cannot overwrite existing assays") - } - if (save %in% Assays(object = object)) { - if (save == DefaultAssay(object = object)) { - DefaultAssay(object = object) <- assay - } - object[[save]] <- NULL - } - layer <- unique(x = layer) %||% DefaultLayer(object = object) - layer <- Layers(object = object, assay = assay, search = layer) - scores <- SeuratObject:::EmptyDF(n = ncol(x = object)) - row.names(x = scores) <- colnames(x = object) - scores[, layer] <- NA_real_ - for (i in seq_along(along.with = layer)) { - l <- layer[i] - if (isTRUE(x = verbose)) { - message("Running LeverageScore for layer ", l) - } - # scores[Cells(x = object, layer = l), l] <- LeverageScore( - # object = LayerData( - # object = object, - # layer = l, - # features = features %||% VariableFeatures(object = object, layer = l), - # fast = TRUE - # ), - # nsketch = nsketch, - # ndims = ndims %||% ncol(x = object), - # method = method, - # eps = eps, - # seed = seed, - # verbose = verbose, - # ... - # ) - } -} #' @importFrom SeuratObject CastAssay Key Key<- Layers #' @@ -165,6 +106,178 @@ SketchData <- function( return(object) } + +#' Project full data to the sketch assay +#' +#' @export +#' +ProjectData <- function( + object, + assay = 'RNA', + sketched.assay = 'sketch', + sketched.reduction, + full.reduction, + dims, + normalization.method = c("LogNormalize", "SCT"), + refdata = NULL, + k.weight = 50, + umap.model = NULL, + recompute.neighbors = FALSE, + recompute.weights = FALSE, + verbose = TRUE +) { + if (!full.reduction %in% Reductions(object)) { + if (verbose) { + message(full.reduction, ' is not in the object.' + ,' Data from all cells will be projected to ', sketched.reduction) + } + proj.emb <- ProjectCellEmbeddings(query = object, + reference = object, + query.assay = assay, + dims = dims, + normalization.method = normalization.method, + reference.assay = sketched.assay, + reduction = sketched.reduction, + verbose = verbose) + object[[full.reduction]] <- CreateDimReducObject( + embeddings = proj.emb, + assay = assay, + key = Key(object = full.reduction, quiet = TRUE) + ) + } + + object <- TransferSketchLabels(object = object, + atoms = sketched.assay, + reduction = full.reduction, + dims = dims, + k = k.weight, + refdata = refdata, + reduction.model = umap.model, + recompute.neighbors = recompute.neighbors, + recompute.weights = recompute.weights, + verbose = verbose + ) + return(object) +} + + +#' Transfer data from sketch data to full data +#' @export +#' +TransferSketchLabels <- function( + object, + atoms = 'sketch', + reduction, + dims, + refdata = NULL, + k = 50, + reduction.model = NULL, + neighbors = NULL, + recompute.neighbors = FALSE, + recompute.weights = FALSE, + verbose = TRUE +){ + full_sketch.nn <- neighbors %||% Tool( + object = object, + slot = 'TransferSketchLabels' + )$full_sketch.nn + full_sketch.weight <- Tool( + object = object, + slot = 'TransferSketchLabels' + )$full_sketch.weight + + compute.neighbors <- is.null(x = full_sketch.nn) || + !all(Cells(full_sketch.nn) == Cells(object[[reduction]])) || + max(Indices(full_sketch.nn)) > ncol(object[[atoms]]) || + !identical(x = full_sketch.nn@alg.info$dims, y = dims) || + !identical(x = full_sketch.nn@alg.info$reduction, y = reduction) || + recompute.neighbors + + compute.weights <- is.null(x = full_sketch.weight) || + !all(colnames(full_sketch.weight) == Cells(object[[reduction]])) || + !all(rownames(full_sketch.weight) == colnames(object[[atoms]])) || + recompute.weights || + recompute.neighbors + + if (compute.neighbors) { + if (verbose) { + message("Finding sketch neighbors") + } + full_sketch.nn <- NNHelper( + query = Embeddings(object[[reduction]])[, dims], + data = Embeddings(object[[reduction]])[colnames(object[[atoms]]), dims], + k = k, + method = "annoy" + ) + slot(object = full_sketch.nn, name = 'alg.info')$dims <- dims + slot(object = full_sketch.nn, name = 'alg.info')$reduction <- reduction + } + if (compute.weights) { + if (verbose) { + message("Finding sketch weight matrix") + } + full_sketch.weight <- FindWeightsNN(nn.obj = full_sketch.nn, + query.cells = Cells(object[[reduction]]), + reference = colnames(object[[atoms]]), + verbose = verbose) + rownames(full_sketch.weight) <- colnames(object[[atoms]]) + colnames(full_sketch.weight) <- Cells(object[[reduction]]) + } + slot(object = object, name = 'tools')$TransferSketchLabels$full_sketch.nn <- full_sketch.nn + slot(object = object, name = 'tools')$TransferSketchLabels$full_sketch.weight <- full_sketch.weight + + if (!is.null(refdata)) { + if (length(refdata) == 1 & is.character(refdata)) { + refdata <- list(refdata) + names(refdata) <- unlist(refdata) + } + if (verbose) { + message("Transfering refdata from sketch") + } + for (rd in 1:length(x = refdata)) { + if (isFALSE(x = refdata[[rd]])) { + transfer.results[[rd]] <- NULL + next + } + rd.name <- names(x = refdata)[rd] + label.rd <- refdata[[rd]] + ## FetchData not work + if (!label.rd %in% colnames( object[[]])) { + stop(label.rd, ' is not in the meta.data') + } + reference.labels <- object[[]][colnames(object[[atoms]]), label.rd] + predicted.labels.list <- TransferLablesNN( + reference.labels = reference.labels, + weight.matrix = full_sketch.weight) + object[[paste0('predicted.', rd.name)]] <- predicted.labels.list$labels + object[[paste0('predicted.', rd.name, '.score')]] <- predicted.labels.list$scores + } + } + if (!is.null(reduction.model)) { + umap.model <- Misc(object = object[[reduction.model]], slot = 'model') + if (is.null(umap.model)) { + warning(reduction.model, ' does not have a stored umap model') + return(object) + } + if (verbose) { + message("Projection to sketch umap") + } + if (ncol(full_sketch.nn) > umap.model$n_neighbors) { + full_sketch.nn@nn.idx <- full_sketch.nn@nn.idx[, 1:umap.model$n_neighbors] + full_sketch.nn@nn.dist <- full_sketch.nn@nn.dist[, 1:umap.model$n_neighbors] + } + proj.umap <- RunUMAP( + object = full_sketch.nn, + reduction.model = object[[reduction.model]], + verbose = verbose, + assay = slot(object = object[[reduction]], name = 'assay.used') + ) + Key(proj.umap) <- paste0('ref', Key(proj.umap)) + object[[paste0('ref.',reduction.model )]] <- proj.umap + } + return(object) +} + #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Methods for Seurat-defined generics #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% From 8b039c54223f8657c6a5b4d285e663968dac14bc Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 14 Mar 2023 17:29:24 -0400 Subject: [PATCH 512/979] update docu --- NAMESPACE | 1 + 1 file changed, 1 insertion(+) diff --git a/NAMESPACE b/NAMESPACE index 418ad7bb0..695cf3719 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -348,6 +348,7 @@ export(PrepSCTIntegration) export(PrepareBridgeReference) export(Project) export(ProjectCellEmbeddings) +export(ProjectData) export(ProjectDim) export(ProjectDimReduc) export(ProjectUMAP) From 710c74d8b5e4ed6d886c35f3f071790780ce7283 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 14 Mar 2023 18:06:27 -0400 Subject: [PATCH 513/979] add BPCells --- R/preprocessing5.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index ebd3cca23..47ab4baa3 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -798,7 +798,7 @@ VST.IterableMatrix <-function( ) { nfeatures <- nrow(x = data) hvf.info <- SeuratObject:::EmptyDF(n = nfeatures) - hvf.stats <- matrix_stats( + hvf.stats <- BPCells::matrix_stats( matrix = data, row_stats = 'variance')$row_stats # Calculate feature means From a65201e9ad8be7369fe1ef85b9ce1e62dcd76ed0 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 14 Mar 2023 18:07:06 -0400 Subject: [PATCH 514/979] bump version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 57b999bda..f74b799ff 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 4.9.9.9036 +Version: 4.9.9.9037 Date: 2023-03-14 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. From 735d610555ead0b7524a65b5527daf6708dcf7a0 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 14 Mar 2023 18:13:29 -0400 Subject: [PATCH 515/979] fix BPCells scale data --- R/preprocessing.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index 2671a168d..498913185 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -4444,10 +4444,10 @@ ScaleData.IterableMatrix <- function( } else { features.sd <- 1 } - scaled.data <- (object - features.mean) / features.sd if (scale.max != Inf) { - scaled.data <- BPCells::min_scalar(mat = scaled.data, val = scale.max) + scaled.data <- BPCells::min_by_row(mat = scaled.data, vals = scale.max*feature.sd + feature.mean) } + scaled.data <- (object - features.mean) / features.sd return(scaled.data) } From d72a70b5996c212c86bd7362ee2cdfdc06cceb8f Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 14 Mar 2023 18:16:42 -0400 Subject: [PATCH 516/979] modify style --- R/integration.R | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/R/integration.R b/R/integration.R index 4f8a3dc4e..9b38c3328 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5206,7 +5206,7 @@ ProjectCellEmbeddings.IterableMatrix <- function( block.size = 10000 ) { features <- features %||% rownames(x = Loadings(object = reference[[reduction]])) - features <- intersect(features, rownames(query)) + features <- intersect(x = features, y = rownames(query)) if (normalization.method == 'SCT') { reference.SCT.model <- slot(object = reference[[reference.assay]], name = "SCTModel.list")[[1]] cells.grid <- split( @@ -5214,7 +5214,7 @@ ProjectCellEmbeddings.IterableMatrix <- function( f = ceiling(seq_along(along.with = 1:ncol(query))/block.size) ) proj.list <- list() - for (i in seq_along(cells.grid)) { + for (i in seq_along(along.with = cells.grid)) { query.i <- FetchResiduals_reference( object = as.sparse(query[,cells.grid[[i]]]), reference.SCT.model = reference.SCT.model, @@ -5223,12 +5223,12 @@ ProjectCellEmbeddings.IterableMatrix <- function( proj.list[[i]] <- t(Loadings(object = reference[[reduction]])[features,dims]) %*% query.i } proj.pca <- t(matrix( - data = unlist(proj.list), - nrow = length(dims), - ncol = ncol(query), + data = unlist(x = proj.list), + nrow = length(x = dims), + ncol = ncol(x = query), dimnames = list( - colnames(Embeddings(object = reference[[reduction]]))[dims], - colnames(query)) + colnames(x = Embeddings(object = reference[[reduction]]))[dims], + colnames(x = query)) )) } else { query <- query[features,] @@ -5245,11 +5245,11 @@ ProjectCellEmbeddings.IterableMatrix <- function( } if (scale) { if (inherits(x = reference.data, what = "IterableMatrix")) { - feature.sd <- sqrt(bp.stats$row_stats["variance",]) + feature.sd <- sqrt(x = bp.stats$row_stats["variance",]) } else { feature.sd <- sqrt( x = RowVarSparse( - mat = as.sparse(reference.data) + mat = as.sparse(x = reference.data) ) ) } @@ -5259,11 +5259,11 @@ ProjectCellEmbeddings.IterableMatrix <- function( } feature.mean[is.na(x = feature.mean)] <- 1 } - query.scale <- BPCells::min_by_row(query, 10*feature.sd + feature.mean) + query.scale <- BPCells::min_by_row(mat = query, vals = 10*feature.sd + feature.mean) query.scale <- (query.scale - feature.mean)/feature.sd proj.pca <- t(query.scale) %*% Loadings(object = reference[[reduction]])[features,dims] - rownames(proj.pca) <- colnames(query) - colnames(proj.pca) <- colnames(Embeddings(object = reference[[reduction]]))[dims] + rownames(x = proj.pca) <- colnames(x = query) + colnames(x = proj.pca) <- colnames(x = Embeddings(object = reference[[reduction]]))[dims] } return(proj.pca) } From 2d33ee367a1a6be03ff374df76509246c07a3454 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 16 Mar 2023 12:54:46 -0400 Subject: [PATCH 517/979] rewrite project integration --- R/integration.R | 95 ++++++++++++++++++++++++------------------------- 1 file changed, 47 insertions(+), 48 deletions(-) diff --git a/R/integration.R b/R/integration.R index 9b38c3328..da998a59b 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1829,7 +1829,7 @@ IntegrateEmbeddings.TransferAnchorSet <- function( } -#' Integrate embeddings from the integrated atoms +#' Integrate embeddings from the integrated sketched.assay #' #' The main steps of this procedure are outlined below. For a more detailed #' description of the methodology, please see Hao, et al Biorxiv 2022: @@ -1840,8 +1840,8 @@ IntegrateEmbeddings.TransferAnchorSet <- function( #' reconstruct the embeddings of each cell from the integrated atoms. #' #' @param object A Seurat object with all cells for one dataset -#' @param atoms Assay name for sketched-cell expression (default is 'sketch') -#' @param orig Assay name for original expression (default is 'RNA') +#' @param sketched.assay Assay name for sketched-cell expression (default is 'sketch') +#' @param assay Assay name for original expression (default is 'RNA') #' @param features Features used for atomic sketch integration #' @param reduction Dimensional reduction name for batch-corrected embeddings #' in the sketched object (default is 'integrated_dr') @@ -1867,25 +1867,27 @@ IntegrateEmbeddings.TransferAnchorSet <- function( #' #' @export #' -IntegrateSketchEmbeddings <- function( +ProjectIntegration <- function( object, - atoms = 'sketch', # DefaultAssay(object) - atoms.layers = NULL, - orig = 'RNA', - features = NULL, # VF from object[[atom.assay]] + sketched.assay = 'sketch', # DefaultAssay(object) + assay = 'RNA', reduction = 'integrated_dr', # harmony; rerun UMAP on this - method = c('sketch', 'data'), - ratio = 0.8, + features = NULL, # VF from object[[atom.assay]] + layers = 'data', reduction.name = NULL, reduction.key = NULL, - layers = NULL, + method = c('sketch', 'data'), + ratio = 0.8, + sketched.layers = NULL, seed = 123, verbose = TRUE ) { + + layers <- Layers(object = object[[assay]], search = layers) # Check input and output dimensional reductions - atoms.layers <- atoms.layers %||% layers + sketched.layers <- sketched.layers %||% layers reduction <- match.arg(arg = reduction, choices = Reductions(object = object)) - reduction.name <- reduction.name %||% paste0(reduction, '.orig') + reduction.name <- reduction.name %||% paste0(reduction, '.full') reduction.key <- reduction.key %||% Key(object = reduction.name, quiet = TRUE) if (reduction.name %in% Reductions(object = object)) { warning( @@ -1900,44 +1902,41 @@ IntegrateSketchEmbeddings <- function( method <- method[1L] method <- match.arg(arg = method) # Check our layers - atoms <- match.arg(arg = atoms, choices = Assays(object = object)) - orig <- match.arg(arg = orig, choices = Assays(object = object)) - layer.orig <- layers + sketched.assay <- match.arg(arg = sketched.assay, choices = Assays(object = object)) + assay <- match.arg(arg = assay, choices = Assays(object = object)) + layer.full <- layers layers <- layers %||% intersect( - x = DefaultLayer(object[[atoms]]), - y = Layers(object[[orig]]) - ) - if (is.null(x = layer.orig)) { - atoms.missing <- setdiff(x = layers, DefaultLayer(object = object[[atoms]])) - if (length(x = atoms.missing) == length(x = layers)) { - stop("None of the requested layers are present in the atoms") - } else if (length(x = atoms.missing)) { + x = DefaultLayer(object[[sketched.assay]]), + y = Layers(object[[assay]]) + ) + if (is.null(x = layer.full)) { + sketched.assay.missing <- setdiff(x = layers, DefaultLayer(object = object[[sketched.assay]])) + if (length(x = sketched.assay.missing) == length(x = layers)) { + stop("None of the requested layers are present in the sketched.assay") + } else if (length(x = sketched.assay.missing)) { warning( - length(x = atoms.missing), - " layers missing from the atoms", + length(x = sketched.assay.missing), + " layers missing from the sketched.assay", call. = FALSE, immediate. = TRUE ) - layers <- intersect(x = layers, y = DefaultLayer(object = object[[atoms]])) + layers <- intersect(x = layers, y = DefaultLayer(object = object[[sketched.assay]])) } } # check layers - layers.missing <- setdiff(layers, Layers(object = object[[orig]])) + layers.missing <- setdiff(layers, Layers(object = object[[assay]])) if (length(x = layers.missing)) { - stop('layer ', layers.missing[1L], ' are not present in ', orig, " assay") + stop('layer ', layers.missing[1L], ' are not present in ', assay, " assay") } # check features - features <- features %||% unlist(x = VariableFeatures( - object = object[[atoms]], - layer = layers - )) + features <- features %||% VariableFeatures(object = object[[sketched.assay]]) # TODO: see if we can handle missing features with `union` features.atom <- Reduce( f = intersect, x = lapply( - X = atoms.layers, + X = sketched.layers, FUN = function(lyr) { - return(Features(x = object[[atoms]], layer = lyr)) + return(Features(x = object[[sketched.assay]], layer = lyr)) } ) ) @@ -1947,12 +1946,12 @@ IntegrateSketchEmbeddings <- function( sapply( X = layers, FUN = function(lyr) { - return(length(x = Cells(x = object[[orig]], layer = lyr))) + return(length(x = Cells(x = object[[assay]], layer = lyr))) } ) ) - if (length(atoms.layers) == 1) { - atoms.layers <- rep(atoms.layers, length(layers)) + if (length(sketched.layers) == 1) { + sketched.layers <- rep(sketched.layers, length(layers)) } sketch.matrix <- switch( EXPR = method, @@ -1976,28 +1975,28 @@ IntegrateSketchEmbeddings <- function( emb.list <- list() cells.list <- list() for (i in seq_along(along.with = layers)) { - if (length(unique(atoms.layers)) == length(layers)) { - cells.sketch <- Cells(x = object[[atoms]], layer = atoms.layers[i]) - } else if (length(unique(atoms.layers)) == 1) { - cells.sketch <- intersect(Cells(x = object[[atoms]][[atoms.layers[[1]]]]), - Cells(object[[orig]][[layers[i] ]] )) + if (length(unique(sketched.layers)) == length(layers)) { + cells.sketch <- Cells(x = object[[sketched.assay]], layer = sketched.layers[i]) + } else if (length(unique(sketched.layers)) == 1) { + cells.sketch <- intersect(Cells(x = object[[sketched.assay]][[sketched.layers[[1]]]]), + Cells(object[[assay]][[layers[i] ]] )) } if (isTRUE(x = verbose)) { message( length(x = cells.sketch), - ' atomic cells identified in the atoms' + ' atomic cells identified in the sketched.assay' ) message("Correcting embeddings") } emb <- UnSketchEmbeddings( atom.data = LayerData( - object = object[[atoms]], + object = object[[sketched.assay]], layer = layers[i], features = features ), atom.cells = cells.sketch, orig.data = LayerData( - object = object[[orig]], + object = object[[assay]], layer = layers[i], features = features ), @@ -2012,12 +2011,12 @@ IntegrateSketchEmbeddings <- function( ncol = length(unlist(cells.list)) )) rownames(emb.all) <- unlist(cells.list) - emb.all <- emb.all[colnames(object[[orig]]), ] + emb.all <- emb.all[colnames(object[[assay]]), ] object[[reduction.name]] <- CreateDimReducObject( embeddings = emb.all, loadings = Loadings(object = object[[reduction]]), key = reduction.key, - assay = orig + assay = assay ) CheckGC() return(object) From 5d6261577de76f3052e217d50aa374a69f17507e Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 16 Mar 2023 12:55:39 -0400 Subject: [PATCH 518/979] update docu --- NAMESPACE | 2 +- man/IntegrateSketchEmbeddings.Rd | 67 -------------------------------- 2 files changed, 1 insertion(+), 68 deletions(-) delete mode 100644 man/IntegrateSketchEmbeddings.Rd diff --git a/NAMESPACE b/NAMESPACE index 695cf3719..ba5ea6011 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -287,7 +287,6 @@ export(Indices) export(IntegrateData) export(IntegrateEmbeddings) export(IntegrateLayers) -export(IntegrateSketchEmbeddings) export(Intensity) export(IsGlobal) export(JS) @@ -351,6 +350,7 @@ export(ProjectCellEmbeddings) export(ProjectData) export(ProjectDim) export(ProjectDimReduc) +export(ProjectIntegration) export(ProjectUMAP) export(PseudobulkExpression) export(PurpleAndYellow) diff --git a/man/IntegrateSketchEmbeddings.Rd b/man/IntegrateSketchEmbeddings.Rd deleted file mode 100644 index 3fc2e034a..000000000 --- a/man/IntegrateSketchEmbeddings.Rd +++ /dev/null @@ -1,67 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/integration.R -\name{IntegrateSketchEmbeddings} -\alias{IntegrateSketchEmbeddings} -\title{Integrate embeddings from the integrated atoms} -\usage{ -IntegrateSketchEmbeddings( - object, - atoms = "sketch", - atoms.layers = NULL, - orig = "RNA", - features = NULL, - reduction = "integrated_dr", - method = c("sketch", "data"), - ratio = 0.8, - reduction.name = NULL, - reduction.key = NULL, - layers = NULL, - seed = 123, - verbose = TRUE -) -} -\arguments{ -\item{object}{A Seurat object with all cells for one dataset} - -\item{atoms}{Assay name for sketched-cell expression (default is 'sketch')} - -\item{orig}{Assay name for original expression (default is 'RNA')} - -\item{features}{Features used for atomic sketch integration} - -\item{reduction}{Dimensional reduction name for batch-corrected embeddings -in the sketched object (default is 'integrated_dr')} - -\item{method}{Methods to construct sketch-cell representation -for all cells (default is 'sketch'). Can be one of: -\itemize{ - \item \dQuote{\code{sketch}}: Use random sketched data slot - \item \dQuote{\code{data}}: Use data slot -}} - -\item{ratio}{Sketch ratio of data slot when \code{dictionary.method} is set -to \dQuote{\code{sketch}}; defaults to 0.8} - -\item{reduction.name}{Name to save new reduction as; defaults to -\code{paste0(reduction, '.orig')}} - -\item{reduction.key}{Key for new dimensional reduction; defaults to creating -one from \code{reduction.name}} - -\item{layers}{Names of layers for correction.} - -\item{verbose}{Print progress and message} -} -\value{ -Returns a Seurat object with an integrated dimensional reduction -} -\description{ -The main steps of this procedure are outlined below. For a more detailed -description of the methodology, please see Hao, et al Biorxiv 2022: -\doi{10.1101/2022.02.24.481684} -} -\details{ -First learn a atom dictionary representation to reconstruct each cell. -Then, using this dictionary representation, -reconstruct the embeddings of each cell from the integrated atoms. -} From c820b9c3e18ba65b3e58de24f6bf9a9b81e31e4c Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 16 Mar 2023 17:25:32 -0400 Subject: [PATCH 519/979] remove predicted ProjectData --- R/sketching.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/sketching.R b/R/sketching.R index a70f6683e..e2c807a48 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -249,8 +249,8 @@ TransferSketchLabels <- function( predicted.labels.list <- TransferLablesNN( reference.labels = reference.labels, weight.matrix = full_sketch.weight) - object[[paste0('predicted.', rd.name)]] <- predicted.labels.list$labels - object[[paste0('predicted.', rd.name, '.score')]] <- predicted.labels.list$scores + object[[paste0(rd.name)]] <- predicted.labels.list$labels + object[[paste0(rd.name, '.score')]] <- predicted.labels.list$scores } } if (!is.null(reduction.model)) { From f9645f286c67c9b7c310e9712f8a6f4e7e3b9ba7 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 16 Mar 2023 17:26:34 -0400 Subject: [PATCH 520/979] bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f74b799ff..9618da446 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9037 -Date: 2023-03-14 +Version: 4.9.9.9038 +Date: 2023-03-16 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From 16ba6a5d893dd2d9de765660c2ac4f14dc1c38ac Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Fri, 17 Mar 2023 09:41:30 -0400 Subject: [PATCH 521/979] update rctd text --- vignettes/seurat5_spatial_vignette.Rmd | 15 ++++++++++----- vignettes/spatial_vignette.Rmd | 14 ++++++++++---- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/vignettes/seurat5_spatial_vignette.Rmd b/vignettes/seurat5_spatial_vignette.Rmd index 0fa623e74..8859d77de 100644 --- a/vignettes/seurat5_spatial_vignette.Rmd +++ b/vignettes/seurat5_spatial_vignette.Rmd @@ -459,15 +459,17 @@ Now we visualize the expression of the top 6 features identified by Moran's I. SpatialFeaturePlot(slide.seq, features = head(SpatiallyVariableFeatures(slide.seq, selection.method = "moransi"), 6), ncol = 3, alpha = c(0.1, 1), max.cutoff = "q95") ``` -Install RCTD +We can also use RCTD for annotation which is published [here](https://doi.org/10.1038/s41587-021-00830-w). RCTD is able to decompose cell mixtures to accurately annotate spatial datasets. + +First, we install the `spacexr` package from GitHub which implements RCTD. ```{r, eval=FALSE} devtools::install_github("dmcable/spacexr", build_vignettes = FALSE) ``` -Annotation using RCTD +Counts, cluster, and spot information is extracted from the Seurat query and reference objects to construct `Reference` and `SpatialRNA` objects used by RCTD for annotation. -```{r rctd, warning=FALSE, results=FALSE} +```{r rctd.setup, warning=FALSE, results=FALSE} library(spacexr) # set up reference @@ -483,21 +485,24 @@ reference <- Reference(counts, cluster, nUMI) # set up query slide.seq <- SeuratData::LoadData("ssHippo") -slide.seq[['Spatial']] <- as(slide.seq[['Spatial']], Class = 'Assay5') counts <- LayerData(slide.seq, assay = "Spatial", layer = "counts") coords <- GetTissueCoordinates(slide.seq) colnames(coords) <- c("x", "y") coords[is.na(colnames(coords))] <- NULL query <- SpatialRNA(coords, counts, colSums(counts)) +``` +Using the `reference` and `query` object, we annotate the dataset and add the cell type labels to the query Seurat object. RCTD parallelizes well, so multiple cores can be specified for faster performance. + +```{r run.rctd, warning=FALSE, results=FALSE} RCTD <- create.RCTD(query, reference, max_cores = 8) RCTD <- run.RCTD(RCTD, doublet_mode = 'doublet') +slide.seq <- AddMetaData(slide.seq, metadata = RCTD@results$results_df) ``` Next, plot the RCTD annotations. Because we ran RCTD in doublet mode, the algorithm assigns a `first_type` and `second_type` for each barcode or spot ```{r rctd_results, fig.height=8, fig.width=14} -slide.seq <- AddMetaData(slide.seq, metadata = RCTD@results$results_df) p1 <- SpatialDimPlot(slide.seq, group.by = "first_type") p2 <- SpatialDimPlot(slide.seq, group.by = "second_type") p1 | p2 diff --git a/vignettes/spatial_vignette.Rmd b/vignettes/spatial_vignette.Rmd index 00f9d7a50..eda156be1 100644 --- a/vignettes/spatial_vignette.Rmd +++ b/vignettes/spatial_vignette.Rmd @@ -454,15 +454,17 @@ Now we visualize the expression of the top 6 features identified by Moran's I. SpatialFeaturePlot(slide.seq, features = head(SpatiallyVariableFeatures(slide.seq, selection.method = "moransi"), 6), ncol = 3, alpha = c(0.1, 1), max.cutoff = "q95") ``` -Install RCTD +We can also use RCTD for annotation which is published [here](https://doi.org/10.1038/s41587-021-00830-w). RCTD is able to decompose cell mixtures to accurately annotate spatial datasets. + +First, we install the `spacexr` package from GitHub which implements RCTD. ```{r, eval=FALSE} devtools::install_github("dmcable/spacexr", build_vignettes = FALSE) ``` -Annotation using RCTD +Counts, cluster, and spot information is extracted from the Seurat query and reference objects to construct `Reference` and `SpatialRNA` objects used by RCTD for annotation. -```{r rctd, warning=FALSE, results=FALSE} +```{r rctd.setup, warning=FALSE, results=FALSE} library(spacexr) # set up reference @@ -483,15 +485,19 @@ coords <- GetTissueCoordinates(slide.seq) colnames(coords) <- c("x", "y") coords[is.na(colnames(coords))] <- NULL query <- SpatialRNA(coords, counts, colSums(counts)) +``` + +Using the `reference` and `query` object, we annotate the dataset and add the cell type labels to the query Seurat object. RCTD parallelizes well, so multiple cores can be specified for faster performance. +```{r run.rctd, warning=FALSE, results=FALSE} RCTD <- create.RCTD(query, reference, max_cores = 8) RCTD <- run.RCTD(RCTD, doublet_mode = 'doublet') +slide.seq <- AddMetaData(slide.seq, metadata = RCTD@results$results_df) ``` Next, plot the RCTD annotations. Because we ran RCTD in doublet mode, the algorithm assigns a `first_type` and `second_type` for each barcode or spot ```{r rctd_results, fig.height=8, fig.width=14} -slide.seq <- AddMetaData(slide.seq, metadata = RCTD@results$results_df) p1 <- SpatialDimPlot(slide.seq, group.by = "first_type") p2 <- SpatialDimPlot(slide.seq, group.by = "second_type") p1 | p2 From 03e544ab74c8978a432f80f0aa1bda75a7291709 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Fri, 17 Mar 2023 11:03:39 -0400 Subject: [PATCH 522/979] add update vignettes to getting started --- vignettes/vignettes_v5.yaml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/vignettes/vignettes_v5.yaml b/vignettes/vignettes_v5.yaml index 4cf722647..a0452d233 100644 --- a/vignettes/vignettes_v5.yaml +++ b/vignettes/vignettes_v5.yaml @@ -65,3 +65,15 @@ summary: | Perform sketch integration on a large dataset from Parse Biosciences. image: ParseBio_sketch_integration.jpg + + - title: Sketch Clustering Updated + name: seurat5_sketch_analysis + summary: | + Analyze a large mouse brain dataset using the on-disk capabilities introduced in Seurat v5. + image: tbd + + - title: BPCells interation + name: seurat5_bpcells_interaction_vignette + summary: | + Load and save large on-disk matrices + image: tbd \ No newline at end of file From 35f41c7029ea8f5574ec279c0bb6a3bd6dff0402 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Fri, 17 Mar 2023 12:45:27 -0400 Subject: [PATCH 523/979] add new vignetetes --- .../seurat5_bpcells_interaction_vignette.Rmd | 180 ++++++++++++++++++ vignettes/seurat5_sketch_analysis.Rmd | 171 +++++++++++++++++ 2 files changed, 351 insertions(+) create mode 100644 vignettes/seurat5_bpcells_interaction_vignette.Rmd create mode 100644 vignettes/seurat5_sketch_analysis.Rmd diff --git a/vignettes/seurat5_bpcells_interaction_vignette.Rmd b/vignettes/seurat5_bpcells_interaction_vignette.Rmd new file mode 100644 index 000000000..b4e819458 --- /dev/null +++ b/vignettes/seurat5_bpcells_interaction_vignette.Rmd @@ -0,0 +1,180 @@ +--- +title: "Using BPCells with Seurat Objects" +output: + html_document: + theme: united + df_print: kable + pdf_document: default +date: 'Compiled: `r format(Sys.Date(), "%B %d, %Y")`' +--- +*** +```{r setup, include=TRUE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + tidy.opts = list(width.cutoff = 95), + warning = FALSE, + error = TRUE, + message = FALSE, + fig.width = 8, + time_it = TRUE +) +``` + +BPCells is an [R package](https://github.com/bnprks/BPCells) that allows for computationally efficient single-cell analysis. It utilizes bit-packing compression to store counts matrices on disk and C++ code to cache operations. + +We leverage the high performance capabilities of BPCells to work with Seurat objects in memory while accessing the counts on disk. In this vignette, we show how to use BPCells to load data, work with a Seurat objects in a more memory-efficient way, and write out Seurat objects with BPCells matrices. + +We will show the methods for interacting with both a single dataset in one file or multiple datasets across multiple files using BPCells. BPCells allows us to easily analyze these large datasets in memory, and we encourage users to check out some of our other vignettes [here]() and [here]() to see further applications. + + +```{r install, message = FALSE, warning = FALSE} +devtools::install_github("bnprks/BPCells") +library(BPCells) +library(Seurat) +library(SeuratObject) +library(SeuratDisk) +library(Azimuth) + +options(Seurat.object.assay.version = "v5") +``` + +We use BPCells functionality to both load in our data and write the counts layers to bitpacked compressed binary files on disk to improve computation speeds. BPCells has multiple functions for reading in files. + +# Load Data + +### Load Data from one h5 file +In this section, we will load a dataset of mouse brain cells freely available from 10X Genomics. This includes 1.3 Million single cells that were sequenced on the Illumina NovaSeq 6000. The raw data can be found [here](https://www.10xgenomics.com/resources/datasets/10k-human-brains-3-ht-v3-1-chromium-x-3-1-high). + +To read in the file, we will use open_matrix_10x_hdf5, a BPCells function written to read in feature matrices from 10x. We then write a matrix directory, load the matrix, and create a Seurat object. + +```{r} +brain.data <- open_matrix_10x_hdf5(path = "../data/1M_neurons_filtered_gene_bc_matrices_h5.h5") +# Write the matrix to a directory +write_matrix_dir(mat = brain.data, dir = '../data/brain_counts') +# Now that we have the matrix on disk, we can load it +brain.mat <- open_matrix_dir(dir = "../data/brain_counts") +brain.mat <- Azimuth::ConvertEnsembleToSymbol(mat = brain.mat, species = "mouse") + +# Create Seurat Object +brain <- CreateSeuratObject(counts = brain.mat) +``` + +
    + **What if I already have a Seurat Object?** + +You can use BPCells to convert the matrices in your already created Seurat objects to on-disk matrices. Note, that this is only possible for V5 assays. As an example, if you'd like to convert the counts matrix of your RNA assay to a BPCells matrix, you can use the following: + +```{r, message=FALSE, warning=FALSE, eval=FALSE} +obj <- readRDS("/path/to/reference.rds") + +# Write the counts layer to a directory +write_matrix_dir(mat = obj[["RNA"]]$counts, dir = '../data/brain_counts') +counts.mat <- open_matrix_dir(dir = "../data/brain_counts") + +obj[["RNA"]]$counts <- counts.mat +``` + +
    + +### Example Analsyis + +Once this conversion is done, you can perform typical Seurat functions on the object. For example, we can normalize data and visualize features by automatically accessing the on-disk counts. + +```{r} +VlnPlot(brain, features = c("Sox10", "Slc17a7", "Aif1"), ncol = 3, layer = "counts") + +# We then normalize and visualize again +brain <- NormalizeData(brain, normalization.method = "LogNormalize") +VlnPlot(brain, features = c("Sox10", "Slc17a7", "Aif1"), ncol = 3, layer = "data") +``` + + +### Load data from multiple h5ad files + +You can also download data from multiple matrices. In this section, we create a Seurat object using multiple peripheral blood mononuclear cell (PBMC) samples that are freely available for downlaod from CZI [here](https://cellxgene.cziscience.com/collections). We download data from [Ahern et al. (2022) Nature](https://cellxgene.cziscience.com/collections/8f126edf-5405-4731-8374-b5ce11f53e82), [Jin et al. (2021) Science](https://cellxgene.cziscience.com/collections/b9fc3d70-5a72-4479-a046-c2cc1ab19efc), and [Yoshida et al. (2022) Nature](https://cellxgene.cziscience.com/collections/03f821b4-87be-4ff4-b65a-b5fc00061da7). We use the BPCells function to read h5ad files. + + +```{r, warning=FALSE} +file.dir <- "../data/h5ad_files/" +files.set <- c("ahern_pbmc.h5ad", "jin_pbmc.h5ad", "yoshida_pbmc.h5ad") + +# Loop through h5ad files and output BPCells matrices on-disk +data.list <- c() +metadata.list <- c() + +for (i in 1:length(files.set)) { + name <- gsub(".h5ad", "", files.set[i]) + path <- paste0(file.dir, files.set[i]) + data <- open_matrix_anndata_hdf5(path) + write_matrix_dir(mat = data, dir = paste0(file.dir, name, "_BP")) + # Load in BP matrices + mat <- open_matrix_dir(dir = paste0(file.dir, name, "_BP")) + mat <- Azimuth::ConvertEnsembleToSymbol(mat = mat, species = "human") + # Get metadata + metadata.list[[i]] <- readH5AD_obs(file = path) + data.list[[i]] <- mat + names(data.list[i]) <- name +} + +# Only pull out needed metadata files +#Reduce(intersect,lapply(metadata.list, colnames)) # all in common +metadata.list <- lapply(metadata.list, function(x) { + x <- x[, c("sex", "cell_type", "donor_id", "disease")] + return(x) +}) +metadata <- Reduce(rbind, metadata.list) +``` + +When we create the Seurat object with the list of , we can then see that multiple counts layers exist that represent each dataset. This object contains over a million cells, yet only takes up minimal space in memory! + +```{r} +options(Seurat.object.assay.version = "v5") +merged.object <- CreateSeuratObject(counts = data.list[1:2], meta.data = metadata) +merged.object +``` + +### Parse Biosciences + +Here, we show how to load a 1 million cell data set from Parse Biosciences and create a Seurat Object. The data is available for download [here](https://support.parsebiosciences.com/hc/en-us/articles/7704577188500-How-to-analyze-a-1-million-cell-data-set-using-Scanpy-and-Harmony) + +```{r} +parse.data <- open_matrix_anndata_hdf5("../data/h5ad_files/ParseBio_PBMC.h5ad") +write_matrix_dir(mat = parse.data, dir = "../data/parse_counts") +parse.mat <- open_matrix_dir(dir = "../data/parse_counts") + +metadata <- readRDS("../data/ParseBio_PBMC_meta.rds") +metadata$disease <- sapply(strsplit(x = metadata$sample, split = "_"), "[", 1) + +parse.object <- CreateSeuratObject(counts = parse.mat, meta.data = metadata) +``` + + +## Saving Seurat objects with on-disk layers + +If you save your object and load it in in the future, Seurat will access the on-disk matrices by their path, which is stored in the assay level data. To make it easy to ensure these are saved in the same place, we provide new functionality to the saveRDS function. In this function, you specify your filename and the destination directory. The pointer to the path in the Seurat object will change to the destination directory. + +This also makes it easy to share your Seurat objects with BPCells matrices by sharing a folder that contains both the object and the BPCells directory. + +```{r} +saveRDS(brain, file = "obj.Rds", destdir = "../data/brain_object") +``` + + +
    + **Session Info** +```{r} +sessionInfo() +``` +
    diff --git a/vignettes/seurat5_sketch_analysis.Rmd b/vignettes/seurat5_sketch_analysis.Rmd new file mode 100644 index 000000000..021ac9605 --- /dev/null +++ b/vignettes/seurat5_sketch_analysis.Rmd @@ -0,0 +1,171 @@ +--- +title: "Sketch-based analysis in Seurat v5" +output: + html_document: + theme: united + df_print: kable + pdf_document: default +date: 'Compiled: `r Sys.Date()`' +--- + +```{r setup, include=FALSE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + tidy.opts = list(width.cutoff = 95), + message = FALSE, + warning = FALSE, + fig.width = 10, + time_it = TRUE, + error = TRUE +) +``` + +## Intro: Sketch-based analysis in Seurat v5 +As single-cell sequencing technologies continue to improve in scalability in throughput, the generation of datasets spanning a million or more cells is becoming increasingly routine. In Seurat v5, we introduce new infrastructure and methods to analyze, interpret, and explore these exciting datasets. +In this vignette, we introduce a sketch-based analysis workflow to analyze a 1.3 million cell dataset of the developing mouse brain, freely available from 10x Genomics. Analyzing datasets of this size with standard workflows can be challenging, slow, and memory-intensive. Here we introduce an alternative workflow that is highly scalable, even to datasets ranging beyond 10 million cells in size. +Our 'sketch-based' workflow involves three new features in Seurat v5 +* Infrastructure for on-disk storage of large single-cell datasets +Storing expression matrices in memory can be challenging for extremely large scRNA-seq datasets. In Seurat v5, we introduce support for multiple on-disk storage formats. +* 'Sketching' methods to subsample cells from large datasets while preserving rare populations +As introduced in [Hie et al, 2019](https://www.sciencedirect.com/science/article/pii/S2405471219301528), cell sketching methods aim to compactly summarize large single-cell datasets in a small number of cells, while preserving the presence of both abundant and rare cell types. In Seurat v5, we leverage this idea to select subsamples ('sketches') of cells from large datasets that are stored on-disk. However, after sketching, the subsampled cells can be stored in-memory, allowing for interactive and rapid visualization and exploration. +We store sketched cells (in-memory) and the full dataset (on-disk) as two assays in the same Seurat object. Users can then easily switch between the two versions, providing the flexibiltiy to perform quick analyses on a subset of cells in-memory, while retaining access to the full dataset on-disk. +* Support for 'bit-packing' compression and infrastructure +We demonstrate the on-disk capabilities in Seurat v5 using the [BPCells package](https://github.com/bnprks/BPCells) developed by Ben Parks in the Greenleaf Lab. This package utilizes bit-packing compression and optimized, streaming-compatible C++ code to substantially improve I/O and computational performance when working with on-disk data. +To run this vignette please install Seurat v5, using the installation instructions found [here](LINK). Additionally, you will need to install the `BPcells` package, using the installation instructions found [here](LINK). +```{r, warning=FALSE, message=FALSE} +library(Seurat) +library(BPCells) +library(ggplot2) +# needs to be set for large dataset analysis +options(future.globals.maxSize = 1e9) +``` +## Create a Seurat object with a v5 assay for on-disk storage + +We start by loading the 1.3M dataset from 10x Genomics using the `open_matrix_dir` function from `BPCells`. Note that this function does not load the dataset into memory, but instead, creates a connection to the data stored on-disk. We then store this on-disk representation in the Seurat object. Note that in our [Introduction to on-disk storage vignette](link), we demonstrate how to create this on-disk representation. +```{r} +# note to Yuhan- this directory should already contain the ensembl-modified names. we can show how to do the ensembl mods in the interaction vignette +# Connect to the counts matrix stored on-disk +ondisk_matrix <- open_matrix_dir('../data/mouse_1M_neurons_counts') + +# specify that you would like to create a Seurat v5 assay +# note that we require setting this option to ensure that existing pipelines are not affected +options(Seurat.object.assay.version = 'v5') +# Create the Seurat object, which contains 1.3M cells stored on-disk as part of the 'RNA' assay +obj <- CreateSeuratObject(counts = ondisk_matrix) +obj +# Note that since the data is stored on-disk, the object size easily fits in-memory (<1GB) +format(object.size(obj), units = 'Mb') +``` +## 'Sketch' a subset of cells, and load these into memory +We select a subset ('sketch') of 50,000 cells (out of 1.3M). Rather than sampling all cells with uniform probability, we compute and sample based off a 'leverage score' for each cell, which reflects the magnitude of its contribution to the gene-covariance matrix, and its importance to the overall dataset. In [Hao et al, 2022](https://www.biorxiv.org/content/10.1101/2022.02.24.481684v1.full), we demonstrate that the leverage score is highest for rare populations in a dataset. Therefore, our sketched set of 50,000 cells will oversample rare populations, retaining the biological complexity of the sample while drastically compressing the dataset. +The function `SketchData` takes a normalized single-cell dataset (stored either on-disk or in-memory), and a set of variable features. It returns a Seurat object with a new assay (`sketch`), consisting of 50,000 cells, but these cells are now stored in-memory. Users can now easily switch between the in-memory and on-desk representation just by changing the default assay. +```{r, warning=FALSE, message=FALSE} +obj <- NormalizeData(obj) +obj <- FindVariableFeatures(obj) +obj <- SketchData(object = obj, ncells = 50000, method = 'LeverageScore', sketched.assay = 'sketch') +obj +# switch to analyzing the full dataset (on-disk) +DefaultAssay(obj) <- 'RNA' +# switch to analyzing the sketched dataset (in-memory) +DefaultAssay(obj) <- 'sketch' +``` + +## Perform clustering on the sketched dataset +Now that we have compressed the dataset, we can perform standard clustering and visualization of a 50,000 cell dataset. +After clustering, we can see groups of cells that clearly correspond to precursors of distinct lineages, including endothelial cells (Igfbp7), Excitatory (Neurod6) and Inhibitory (Dlx2) neurons, Intermediate Progenitors (Eomes), Radial Glia (Vim), Cajal-Retzius cells (Reln), Oligodendroytes (Olig1), and extremely rare populations of macrophages (C1qa) that were oversampled in our sketched data. +```{r, warning=FALSE, message=FALSE} +DefaultAssay(obj) <- 'sketch' +obj <- FindVariableFeatures(obj) +obj <- ScaleData(obj) +obj <- RunPCA(obj) +obj <- FindNeighbors(obj, dims = 1:50) +obj <- FindClusters(obj, resolution = 2) +obj <- RunUMAP(obj, dims = 1:50, return.model = T) +DimPlot(obj, label = T, reduction = 'umap') + NoLegend() +``` + +```{r,fig.height = 15, fig.width = 15} +FeaturePlot(obj, c('Igfbp7', 'Neurod6', 'Dlx2', 'Eomes', 'Vim', 'Reln', 'Olig1', 'C1qa'), ncol = 4) +``` +## Extend results to the full datasets +We can now extend the cluster labels and dimensional reductions learned on the sketched cells to the full dataset. The `ProjectData` function projects the on-disk data, onto the `sketch` assay. It returns a Seurat object that includes a +* Dimensional reduction (PCA): The `pca.full` dimensional reduction extends the `pca` reduction on the sketched cells to all cells in the dataset +* Dimensional reduction (UMAP): The `umap.full` dimensional reduction extends the `full` reduction on the sketched cells to all cells in the dataset +* Cluster labels: The `cluster_full` column in the object metadata now labels all cells in the dataset with one of the cluster labels derived from the sketched cells +NOTE GET RID OF THE predicted_ IN THE METADATA COLUMN NAME +```{r, warning=FALSE, message=FALSE} +obj <- ProjectData(object = obj, + assay = 'RNA', + full.reduction = 'pca.full', + sketched.assay = 'sketch', + sketched.reduction = 'pca', + umap.model = 'umap', + dims = 1:50, + refdata = list(cluster_full = 'seurat_clusters') + ) +# now that we have projected the full dataset, switch back to analyzing all cells +DefaultAssay(obj) <- 'RNA' +``` + +```{r} +DimPlot(obj, label = T, reduction = 'ref.umap', group.by = 'predicted.cluster_full', alpha = 0.1) + NoLegend() +# visualize gene expression on the sketched cells (fast) and the full dataset (slower) +DefaultAssay(obj) <- 'sketch' +x1 <- FeaturePlot(obj, 'C1qa') +DefaultAssay(obj) <- 'RNA' +x2 <- FeaturePlot(obj, 'C1qa') +x1 | x2 +``` + +## Perform iterative sub-clustering +NOTE THAT THE CLUSTER IDS WILL CHANGE +Now that we have performed an initial analysis of the dataset, we can iteratively 'zoom-in' on a cell subtype of interest, extract all cells of this type, and perform iterative sub-clustering. For example, we can see that Dlx2+ interneuron precursors are defined by clusters 7, 9, 16, and 29 +```{r} +DefaultAssay(obj) <- 'sketch' +VlnPlot(obj, 'Dlx2') +``` + +We therefore extract all cells from the full on-disk dataset that are present in these clusters. There are XX,XXX of them. Since this is a manageable number, we can convert these data from on-disk storage into in-memory storage. We can then proceed with standard clustering. +```{r} +# subset cells in these clusters. Note that the data remains on-disk after subsetting +obj.sub <- subset(obj, subset = predicted.cluster_full %in% c(7, 9, 15, 29)) +# now convert the RNA assay (previously on-disk) into an in-memory representation (sparse Matrix) +obj.sub[['RNA']] <- CastAssay(object = obj.sub[['RNA']], to = 'dgCMatrix') +# recluster the cells +obj.sub <- FindVariableFeatures(obj.sub) +obj.sub <- ScaleData(obj.sub) +obj.sub <- RunPCA(obj.sub) +obj.sub <- RunUMAP(obj.sub, dims = 1:30) +obj.sub <- FindNeighbors(obj.sub, dims = 1:30) +obj.sub <- FindClusters(obj.sub) +``` + +```{r} +DimPlot(obj.sub, label = T) + NoLegend() +``` +Note that we can start to see distinct interneuron lineages emerging in this dataset. We can see a clear separation of interneuron precursors that originated from the medial ganglionic eminence (Lhx6) or caudal ganglionic eminence (Nr2f2). We can further see the emergence of Sst (Sst) and Pvalb (Mef2c)-committed interneurons, and a CGE-derived Meis2-expressing progenitor population. +These results closely mirror our findings from [Mayer*, Hafemeister*, Bandler* et al, Nature 2018](https://www.nature.com/articles/nature25999), where we enriched for interneuron precursors using a Dlx6a-cre fate-mapping strategy. Here, we obtain similar results using only computational enrichment, enabled by the large size of the original dataset. + +```{r save.times, include=FALSE} +print(as.data.frame(all_times)) +write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/seurat5_sketch_analysis.csv") +``` + +
    + **Session Info** +```{r} +sessionInfo() +``` +
    \ No newline at end of file From 9896fbedeea6f208645854fbd097cc6f471cd23e Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Fri, 17 Mar 2023 22:37:29 -0400 Subject: [PATCH 524/979] fix sketch formatting --- vignettes/seurat5_bpcells_interaction_vignette.Rmd | 2 +- vignettes/seurat5_sketch_analysis.Rmd | 2 ++ vignettes/seurat5_spatial_vignette.Rmd | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/vignettes/seurat5_bpcells_interaction_vignette.Rmd b/vignettes/seurat5_bpcells_interaction_vignette.Rmd index b4e819458..42aed0424 100644 --- a/vignettes/seurat5_bpcells_interaction_vignette.Rmd +++ b/vignettes/seurat5_bpcells_interaction_vignette.Rmd @@ -8,7 +8,7 @@ output: date: 'Compiled: `r format(Sys.Date(), "%B %d, %Y")`' --- *** -```{r setup, include=TRUE} +```{r setup, include=FALSE} all_times <- list() # store the time for each chunk knitr::knit_hooks$set(time_it = local({ now <- NULL diff --git a/vignettes/seurat5_sketch_analysis.Rmd b/vignettes/seurat5_sketch_analysis.Rmd index 021ac9605..47ac4f377 100644 --- a/vignettes/seurat5_sketch_analysis.Rmd +++ b/vignettes/seurat5_sketch_analysis.Rmd @@ -36,12 +36,14 @@ knitr::opts_chunk$set( As single-cell sequencing technologies continue to improve in scalability in throughput, the generation of datasets spanning a million or more cells is becoming increasingly routine. In Seurat v5, we introduce new infrastructure and methods to analyze, interpret, and explore these exciting datasets. In this vignette, we introduce a sketch-based analysis workflow to analyze a 1.3 million cell dataset of the developing mouse brain, freely available from 10x Genomics. Analyzing datasets of this size with standard workflows can be challenging, slow, and memory-intensive. Here we introduce an alternative workflow that is highly scalable, even to datasets ranging beyond 10 million cells in size. Our 'sketch-based' workflow involves three new features in Seurat v5 + * Infrastructure for on-disk storage of large single-cell datasets Storing expression matrices in memory can be challenging for extremely large scRNA-seq datasets. In Seurat v5, we introduce support for multiple on-disk storage formats. * 'Sketching' methods to subsample cells from large datasets while preserving rare populations As introduced in [Hie et al, 2019](https://www.sciencedirect.com/science/article/pii/S2405471219301528), cell sketching methods aim to compactly summarize large single-cell datasets in a small number of cells, while preserving the presence of both abundant and rare cell types. In Seurat v5, we leverage this idea to select subsamples ('sketches') of cells from large datasets that are stored on-disk. However, after sketching, the subsampled cells can be stored in-memory, allowing for interactive and rapid visualization and exploration. We store sketched cells (in-memory) and the full dataset (on-disk) as two assays in the same Seurat object. Users can then easily switch between the two versions, providing the flexibiltiy to perform quick analyses on a subset of cells in-memory, while retaining access to the full dataset on-disk. * Support for 'bit-packing' compression and infrastructure + We demonstrate the on-disk capabilities in Seurat v5 using the [BPCells package](https://github.com/bnprks/BPCells) developed by Ben Parks in the Greenleaf Lab. This package utilizes bit-packing compression and optimized, streaming-compatible C++ code to substantially improve I/O and computational performance when working with on-disk data. To run this vignette please install Seurat v5, using the installation instructions found [here](LINK). Additionally, you will need to install the `BPcells` package, using the installation instructions found [here](LINK). ```{r, warning=FALSE, message=FALSE} diff --git a/vignettes/seurat5_spatial_vignette.Rmd b/vignettes/seurat5_spatial_vignette.Rmd index 8859d77de..28a1f8e20 100644 --- a/vignettes/seurat5_spatial_vignette.Rmd +++ b/vignettes/seurat5_spatial_vignette.Rmd @@ -8,7 +8,7 @@ output: date: 'Compiled: `r Sys.Date()`' --- -```{r setup, include=TRUE} +```{r setup, include=FALSE} all_times <- list() # store the time for each chunk knitr::knit_hooks$set(time_it = local({ now <- NULL From 8ba79c1679389fec1a6ead1f87d0511fcace16db Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Sat, 18 Mar 2023 15:36:57 -0400 Subject: [PATCH 525/979] add niches function --- R/utilities.R | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/R/utilities.R b/R/utilities.R index b6d07f1a7..86b086d1c 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -2812,3 +2812,69 @@ CreateCategoryMatrix <- function( rownames(category.matrix) <- cells.name return(category.matrix) } + +#' Construct an assay for spatial niche analysis +#' +#' This function will construct a new assay where each feature is a +#' cell label The values represents the sum of a particular cell label +#' neighboring a given cell. +#' +#' @param object A Seurat object +#' @param fov FOV object to gather cell positions from +#' @param group.by Cell classifications to count in spatial neighborhood +#' @param assay Name for spatial neighborhoods assay +#' @param neighbors.k Number of neighbors to consider for each cell +#' @param niches.k Number of clusters to return based on the niche assay +#' +#' @importFrom stats kmeans +#' @return Seurat object containing a new assay +#' @concept clustering +#' @export +#' +BuildNicheAssay <- function( + object, + fov, + group.by, + assay = "niche", + neighbors.k = 20, + niches.k = 4 +) { + # find neighbors based on tissue position + coords <- GetTissueCoordinates(object[[fov]], which = "centroids") + cells <- coords$cell + rownames(coords) <- cells + coords <- as.matrix(coords[ , c("x", "y")]) + neighbors <- FindNeighbors(coords, k.param = neighbors.k) + neighbors$nn <- neighbors$nn[Cells(object), Cells(object)] + + # build cell x cell type matrix + ct.mtx <- matrix( + data = 0, + nrow = length(cells), + ncol = length(unlist(unique(object[[group.by]]))) + ) + rownames(ct.mtx) <- cells + colnames(ct.mtx) <- unique(unlist(object[[group.by]])) + cts <- object[[group.by]] + for (i in 1:length(cells)) { + ct <- as.character(cts[cells[[i]], ]) + ct.mtx[cells[[i]], ct] <- 1 + } + + # create niche assay + sum.mtx <- as.matrix(neighbors$nn %*% ct.mtx) + niche.assay <- CreateAssayObject(counts = t(sum.mtx)) + object[[assay]] <- niche.assay + DefaultAssay(object) <- assay + + # cluster niches assay + object <- ScaleData(object) + results <- kmeans( + x = t(object[[assay]]@scale.data), + centers = niches.k, + iter.max = 100 + ) + object$niches <- results[["cluster"]] + + return(object) +} From 9fff47ac6832fa6bfd070c9d9a98051c8564b373 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Sat, 18 Mar 2023 15:37:33 -0400 Subject: [PATCH 526/979] bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9618da446..a6b226676 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9038 -Date: 2023-03-16 +Version: 4.9.9.9039 +Date: 2023-03-18 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From 10d7988006bff4f50df52c5e84288acc4a9f1499 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Sat, 18 Mar 2023 15:57:46 -0400 Subject: [PATCH 527/979] update namespace --- NAMESPACE | 1 + 1 file changed, 1 insertion(+) diff --git a/NAMESPACE b/NAMESPACE index ba5ea6011..34923ac91 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -198,6 +198,7 @@ export(BlueAndRed) export(BoldTitle) export(BridgeCellsRepresentation) export(BuildClusterTree) +export(BuildNicheAssay) export(CCAIntegration) export(CalcPerturbSig) export(CalculateBarcodeInflections) From 1f652b1f0643a53006c8b842d8d2c684d66f99a1 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Sat, 18 Mar 2023 15:59:19 -0400 Subject: [PATCH 528/979] update namespace --- NAMESPACE | 1 + 1 file changed, 1 insertion(+) diff --git a/NAMESPACE b/NAMESPACE index ba5ea6011..34923ac91 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -198,6 +198,7 @@ export(BlueAndRed) export(BoldTitle) export(BridgeCellsRepresentation) export(BuildClusterTree) +export(BuildNicheAssay) export(CCAIntegration) export(CalcPerturbSig) export(CalculateBarcodeInflections) From c4c2aac3628c6f0367ba901fc1810f067389c291 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 20 Mar 2023 13:23:51 -0400 Subject: [PATCH 529/979] add cluster.name --- R/clustering.R | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/R/clustering.R b/R/clustering.R index e4661e6e0..a9d240c23 100644 --- a/R/clustering.R +++ b/R/clustering.R @@ -403,6 +403,7 @@ FindClusters.default <- function( #' @importFrom methods is #' #' @param graph.name Name of graph to use for the clustering algorithm +#' @param cluster.name Name of output clusters #' #' @rdname FindClusters #' @export @@ -412,6 +413,7 @@ FindClusters.default <- function( FindClusters.Seurat <- function( object, graph.name = NULL, + cluster.name = NULL, modularity.fxn = 1, initial.membership = NULL, node.sizes = NULL, @@ -452,11 +454,13 @@ FindClusters.Seurat <- function( verbose = verbose, ... ) - names(x = clustering.results) <- paste( - graph.name, - names(x = clustering.results), - sep = '_' - ) + cluster.name <- cluster.name %||% + paste( + graph.name, + names(x = clustering.results), + sep = '_' + ) + names(x = clustering.results) <- cluster.name # object <- AddMetaData(object = object, metadata = clustering.results) # Idents(object = object) <- colnames(x = clustering.results)[ncol(x = clustering.results)] idents.use <- names(x = clustering.results)[ncol(x = clustering.results)] From c78bef14cc3d24d2aff231043bb696bebd97769e Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Mon, 20 Mar 2023 14:32:23 -0400 Subject: [PATCH 530/979] v5 vignette text --- vignettes/install_seurat5.Rmd | 19 +++ vignettes/seurat5_integration.Rmd | 91 +++++++++++++++ vignettes/seurat5_spatial_vignette_2.Rmd | 140 ++++++++++++++++++++++- vignettes/spatial_vignette_2.Rmd | 139 ++++++++++++++++++++++ vignettes/vignettes_v5.yaml | 16 ++- 5 files changed, 402 insertions(+), 3 deletions(-) create mode 100644 vignettes/install_seurat5.Rmd create mode 100644 vignettes/seurat5_integration.Rmd diff --git a/vignettes/install_seurat5.Rmd b/vignettes/install_seurat5.Rmd new file mode 100644 index 000000000..0bddac8d1 --- /dev/null +++ b/vignettes/install_seurat5.Rmd @@ -0,0 +1,19 @@ +--- +title: "Install Seurat v5" +output: + html_document: + theme: united + df_print: kable +date: 'Compiled: `r format(Sys.Date(), "%B %d, %Y")`' +--- +*** + +Copy the code below to install Seurat v5. + +```{r, eval=FALSE} +remotes::install_github("mojaveazure/seurat-object", "seurat5", quiet = TRUE) +remotes::install_github("satijalab/seurat", "seurat5", quiet = TRUE) +remotes::install_github("satijalab/seurat-data", "seurat5", quiet = TRUE) +remotes::install_github("stuart-lab/signac", "seurat5", quiet = TRUE) +remotes::install_github("bnprks/BPCells") # for on-disk capabilities +``` diff --git a/vignettes/seurat5_integration.Rmd b/vignettes/seurat5_integration.Rmd new file mode 100644 index 000000000..e6911e43e --- /dev/null +++ b/vignettes/seurat5_integration.Rmd @@ -0,0 +1,91 @@ +--- +title: "Integrative analysis in Seurat v5" +output: html_document +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + +```{r setup, include=FALSE} +library(Seurat) +library(SeuratData) +library(Azimuth) +options(future.globals.maxSize = 1e9) +``` + +## Introduction +Integration of single-cell sequencing datasets, for example across experimental batches, donors, or conditions, is often an important step in scRNA-seq workflows. Integrative analysis can help to match shared cell types and states across datasets, which can boost statistical power, and most importantly, facilitate accurate comparative analysis across datasets. In previous versions of Seurat we introduced methods for integrative analysis, including our ‘anchor-based’ integration workflow. Many labs have also published powerful and pioneering methods, including Harmony and scVI, for integrative analysis. +We recognize that while the goal of matching shared cell types across datasets may be important for many problems, users may also be concerned about which method to use, or that integration could result in a loss of biological resolution. In Seurat v5, we introduce more flexible and streamlined infrastructure to run different integration algorithms with a single line of code. This makes it easier to explore the results of different integration methods, and to compare these results to a workflow that excludes integration steps. +For this vignette, we use a [dataset of human PBMC profiled with seven different technologies](https://www.nature.com/articles/s41587-020-0465-8), profiled as part of a systematic comparative analysis (`pbmcsca`). The data is available as part of our [SeuratData](https://github.com/satijalab/seurat-data) package. +## Layers in the Seurat v5 object +Seurat v5 assays store data in layers. These layers can store raw, un-normalized counts (`layer='counts'`), normalized data (`layer='data'`), or z-scored/variance-stabilized data (`layer='scale.data'`). We can load in the data, remove low-quality cells, and obtain predicted cell annotations (which will be useful for assessing integration later), using our [Azimuth pipeline](https://satijalab.github.io/azimuth/articles/run_azimuth_tutorial.html). +```{r} +# load in the pbmc systematic comparative analysis dataset +obj <- LoadData("pbmcsca") +obj <- UpdateSeuratObject(obj) +obj <- subset(obj, nFeature_RNA > 1000) +obj <- RunAzimuth(obj, reference = "pbmcref") +# currently, the object has two layers in the RNA assay: counts, and data +obj +``` +The object contains data from nine different batches (stored in the `Method` column in the object metadata), representing seven different technologies. We will aim to integrate the different batches together. In previous versions of Seurat, we would require the data to be represented as nine different Seurat objects. When using Seurat v5 assays, we can instead keep all the data in one object, but simply split the layers. +After splitting, there are now 18 layers (a `counts` and `data` layer for each batch). We can also run a standard scRNA-seq analysis (i.e. without integration). Note that since the data is split into layers, normalization and variable feature identification is performed for each batch independently (a consensus set of variable features is automatically identified). +```{r} +obj[["RNA"]] <- split(obj[["RNA"]], f = obj$Method) +obj +obj <- NormalizeData(obj) +obj <- FindVariableFeatures(obj) +obj <- ScaleData(obj) +obj <- RunPCA(obj) +``` +We can now visualize the results of a standard analysis without integration. Note that cells are grouping both by cell type and by underlying method. While a UMAP analysis is just a visualization of this, clustering this dataset would return predominantly batch-specific clusters. Especially if previous cell-type annotations were not available, this would make downstream analysis extremely challenging. +```{r} +obj <- FindNeighbors(obj, dims=1:30, reduction = 'pca') +obj <- FindNeighbors(obj, resolution = 2, cluster.names="unintegrated_clusters") +obj <- RunUMAP(obj, dims = 1:30, reduction = 'pca', reduction.name = 'umap.unintegrated') +# visualize by batch and cell type annotation +# cell type annotations were previously added by Azimuth +DimPlot(obj, reduction = 'unintegrated.umap', group.by=c('batch','predicted.celltype.l2')) +``` +Seurat v5 enables streamlined integrative analysis using the `IntegrateLayers` function. The method currently supports five integration methods. Each of these methods performs integration in low-dimensional space, and returns a dimensional reduction (i.e. `integrated.rpca`) that aims to co-embed shared cell types across batches: +* Anchor-based CCA integration (`method=CCAIntegration`) +* Anchor-based RPCA integration (`method=RPCAIntegration`) +* Harmony (`method=HarmonyIntegration`) +* mnnCorrect (`method= mnnCorrectIntegration`) +* scVI (`method=scVIIntegration`) +Note that scVI integration requires INSERT DESCRIPTIVE TEXT FOR REQUIREMENTS +```{r} +# add results for Anchor-based (CCA) +obj <- IntegrateLayers(object = obj, method = CCAIntegration, verbose = F, new.reduction = 'integrated.cca') +# add results for Anchor-based (rPCA) +obj <- IntegrateLayers(object = obj, method = RPCAIntegration, verbose = F, new.reduction = 'integrated.rpca') +# add results for Harmony +obj <- IntegrateLayers(object = obj, method = HarmonyIntegration, verbose = F, new.reduction = 'harmony') +# add results for Anchor-based (scI) +obj <- IntegrateLayers(object = obj, method = scVIIntegration, verbose = F, new.reduction = 'integrated.scvi', + conda_env = '/home/haoy/miniconda3/envs/scvi-env') + +``` +For any of the methods, we can now visualize and cluster the datasets. We show this for CCA integration and scVI, but you can do this for any method +```{r} +obj <- FindNeighbors(obj, reduction = 'integrated.cca', dims = 1:30) +obj <- FindClusters(obj,resolution = 2, cluster.names='cca_clusters') +obj <- RunUMAP(obj, reduction = "integrated.cca", dims = 1:30, reduction.name = 'umap.cca') +DimPlot(obj, reduction="umap.cca", group.by=c("Method", "predicted.celltype.l2", "cca_clusters")) +obj <- FindNeighbors(obj, reduction = 'integrated.scvi', dims = 1:30) +obj <- FindClusters(obj,resolution = 2, cluster.names='scvi_clusters') +obj <- RunUMAP(obj, reduction = "integrated.scvi", dims = 1:30, reduction.name = 'umap.scvi') +DimPlot(obj, reduction="umap.scvi", group.by=c("Method", "predicted.celltype.l2", "scvi_clusters")) +``` +We hope that by simplifying the process of performing integrative analysis, users can more carefully evaluate the biological information retained in the integrated dataset. For example, users can compare the expression of biological markers based on different clustering solutions, or visualize one method's clustering solution on different UMAP visualizations. +```{r} +p1 <- VlnPlot(obj, "CD8A", group.by = 'unintegrated_clusters') +p2 <- VlnPlot(obj, "CD8A", group.by = 'cca_clusters') +p3 <- VlnPlot(obj, "CD8A", group.by = 'scvi_clusters') +p1 | p2 | p3 +p4 <- DimPlot(obj, reduction="umap.unintegrated", group.by=c("cca_clusters")) +p5 <- DimPlot(obj, reduction="umap.rpca", group.by=c("cca_clusters")) +p6 <- DimPlot(obj, reduction="umap.scvi", group.by=c("cca_clusters")) +p4 | p5 | p6 +``` \ No newline at end of file diff --git a/vignettes/seurat5_spatial_vignette_2.Rmd b/vignettes/seurat5_spatial_vignette_2.Rmd index db9ca53b2..45fe14221 100644 --- a/vignettes/seurat5_spatial_vignette_2.Rmd +++ b/vignettes/seurat5_spatial_vignette_2.Rmd @@ -48,6 +48,7 @@ library(Seurat) options(Seurat.object.assay.version = "v5") library(future) plan("multisession", workers = 10) +library(ggplot2) ``` # Mouse Brain: Vizgen MERSCOPE @@ -233,7 +234,6 @@ ImageDimPlot(xenium.obj, fov = "fov", molecules = c("Gad1", "Sst", "Pvalb", "Gfa ``` ```{r save.img, include=FALSE} -library(ggplot2) plot <- ImageDimPlot(xenium.obj, fov = "fov", molecules = c("Gad1", "Gfap"), nmols = 40000, alpha=0.01, dark.background = F, mols.alpha = 0.6) + coord_flip() + scale_x_reverse() + NoLegend() ggsave(filename = "../output/images/spatial_vignette_2.jpg", height = 5, width = 9, plot = plot) ``` @@ -279,6 +279,144 @@ We can now use `ImageDimPlot()` to color the cell positions colored by the clust ImageDimPlot(xenium.obj, cols = "polychrome", size = 0.75) ``` +Using the positional information of each cell, we compute spatial niches. +We use a cortex reference from the the Allen Brain Institute to annotate cells, so we first crop the dataset to the cortex. +Below, we use Slc17a7 expression to help determine the cortical region. + +```{r, fig.width=5, fig.height=5, warning=FALSE} +xenium.obj <- LoadXenium("../data/xenium_tiny_subset") +p1 <- ImageFeaturePlot(xenium.obj, features = "Slc17a7", axes = TRUE, max.cutoff = "q90") +p1 +``` + +```{r resolve.crop, fig.width=5, fig.height=7, warning=FALSE} +crop <- Crop(xenium.obj[["fov"]], x=c(600, 2100), y=c(900, 4700)) +xenium.obj[["crop"]] <- crop +p2 <- ImageFeaturePlot( + xenium.obj, + fov = "crop", + features = "Slc17a7", + size = 1, + axes = TRUE, + max.cutoff = "q90") +p2 +``` + +Annotation of spatial datasets can be tricky, and single cell methods are not always effective. Here, we use RCTD, which directly accounts for cell type mixing at each spot or segmentation, to annotate cells. For more details on RCTD, please see the [paper](https://doi.org/10.1038/s41587-021-00830-w). + +First, we install the `spacexr` package from GitHub which implements RCTD. + +```{r, rctd.install, eval=FALSE} +devtools::install_github("dmcable/spacexr", build_vignettes = FALSE) +``` + +Counts, cluster, and spot information is extracted from the Seurat query and reference objects to construct `Reference` and `SpatialRNA` objects used by RCTD for annotation. + +```{r rctd.qeury, warning=FALSE} +library(spacexr) + +query.counts <- GetAssayData(xenium.obj, assay = "Xenium", slot = "counts")[, Cells(xenium.obj[["crop"]])] +coords <- GetTissueCoordinates(xenium.obj[["crop"]], which = "centroids") +rownames(coords) <- coords$cell +coords$cell <- NULL +query <- SpatialRNA(coords, query.counts, colSums(query.counts)) +``` + +```{r rctd.reference, eval=FALSE} +allen.cortex.ref <- readRDS("/home/hartmana/github/seurat-private/data/allen_cortex.rds") +allen.cortex.ref <- UpdateSeuratObject(allen.cortex.ref) + +Idents(allen.cortex.ref) <- "subclass" +# remove CR cells because there aren't enough of them for annotation +allen.cortex.ref <- subset(allen.cortex.ref, subset = subclass != "CR") +counts <- GetAssayData(allen.cortex.ref, assay = "RNA", slot = "counts") +cluster <- as.factor(allen.cortex.ref$subclass) +names(cluster) <- colnames(allen.cortex.ref) +nUMI <- allen.cortex.ref$nCount_RNA +names(nUMI) <- colnames(allen.cortex.ref) +nUMI <- colSums(counts) +levels(cluster) <- gsub("/", "-", levels(cluster)) +reference <- Reference(counts, cluster, nUMI) +``` + +```{r niche.run.rctd, warning=FALSE, results=FALSE, eval=FALSE} +# run RCTD with many cores +RCTD <- create.RCTD(query, reference, max_cores = 8) +RCTD <- run.RCTD(RCTD, doublet_mode = "doublet") +``` + +Many spot annotations contain multiple cell type markers, so we filter only to singlets and add the annotations to the Seurat object for downstream analysis. + +```{r niche.add.annotations, eval=FALSE} +annotations.df <- RCTD@results$results_df +annotations.df <- annotations.df[annotations.df$spot_class == "singlet", ] +annotations <- annotations.df$first_type +names(annotations) <- rownames(annotations.df) +xenium.obj$predicted.celltype <- annotations +keep.cells <- Cells(xenium.obj)[!is.na(xenium.obj$predicted.celltype)] +xenium.obj <- subset(xenium.obj, cells = keep.cells) +``` + +We call the `BuildNicheAssay` function from within Seurat to construct a new assay called `niche` containing the cell type composition spatially neighboring each cell. A metadata column called `niches` is also returned, which contains cluster assignments based on the niche assay. + +```{r build.niche.assay, eval=FALSE} +xenium.obj <- BuildNicheAssay( + object = xenium.obj, + fov = "crop", + group.by = "predicted.celltype", + niches.k = 5, + neighbors.k = 30 +) +``` + +```{r load.niche.results, eval=TRUE, include=FALSE} +xenium.obj <- readRDS("/brahms/hartmana/xenium_niches_presaved.rds") +``` + +After clustering the cell type composition nearby each cell, the neuronal layers in the cortex are visually demarcated. + +```{r, niche.dimplots, fig.width=8, fig.height=6, warning=FALSE} +celltype.plot <- ImageDimPlot( + xenium.obj, + group.by = "predicted.celltype", + size = 1.5, + cols = "polychrome", + dark.background = F) + + ggtitle("Cell type") +niche.plot <- ImageDimPlot( + xenium.obj, + group.by = "niches", + size = 1.5, + dark.background = F) + + ggtitle("Niches") + + scale_fill_manual( + values = c("#442288", "#6CA2EA", "#B5D33D", "#FED23F", "#EB7D5B")) +celltype.plot | niche.plot +``` + +Further, we observe that the composition of each niche is enriched for distinct cell types. + +```{r niche.composition} +table(xenium.obj$predicted.celltype, xenium.obj$niches) +``` + +Next, we perform DE between atrocytes from two of the niches. + +Note: I think this style of analysis is very risky - most of the DEGs comparing a cell type across niches are cell type markers of the other cell types enriched in one of the niches likely due to incorrect molecular assignment to cells. + +```{r niche.de} +xenium.obj$celltype.niches <- paste0(xenium.obj$predicted.celltype, "_", xenium.obj$niches) +Idents(xenium.obj) <- "celltype.niches" +niche.markers <- FindMarkers(xenium.obj, assay = "Xenium", ident.1 = "Astro_1", ident.2 = "Astro_5") +``` + +```{r niche.vln} +VlnPlot( + xenium.obj, + idents = c("Astro_1", "Astro_5"), + assay = "Xenium", + features = rownames(niche.markers)[1:6]) +``` # Human Lung: Nanostring CosMx Spatial Molecular Imager diff --git a/vignettes/spatial_vignette_2.Rmd b/vignettes/spatial_vignette_2.Rmd index b891d1cf0..953892cff 100644 --- a/vignettes/spatial_vignette_2.Rmd +++ b/vignettes/spatial_vignette_2.Rmd @@ -278,6 +278,145 @@ We can now use `ImageDimPlot()` to color the cell positions colored by the clust ImageDimPlot(xenium.obj, cols = "polychrome", size = 0.75) ``` +Using the positional information of each cell, we compute spatial niches. +We use a cortex reference from the the Allen Brain Institute to annotate cells, so we first crop the dataset to the cortex. +Below, we use Slc17a7 expression to help determine the cortical region. + +```{r, fig.width=5, fig.height=5, warning=FALSE} +xenium.obj <- LoadXenium("../data/xenium_tiny_subset") +p1 <- ImageFeaturePlot(xenium.obj, features = "Slc17a7", axes = TRUE, max.cutoff = "q90") +p1 +``` + +```{r resolve.crop, fig.width=5, fig.height=7, warning=FALSE} +crop <- Crop(xenium.obj[["fov"]], x=c(600, 2100), y=c(900, 4700)) +xenium.obj[["crop"]] <- crop +p2 <- ImageFeaturePlot( + xenium.obj, + fov = "crop", + features = "Slc17a7", + size = 1, + axes = TRUE, + max.cutoff = "q90") +p2 +``` + +Annotation of spatial datasets can be tricky, and single cell methods are not always effective. Here, we use RCTD, which directly accounts for cell type mixing at each spot or segmentation, to annotate cells. For more details on RCTD, please see the [paper](https://doi.org/10.1038/s41587-021-00830-w). + +First, we install the `spacexr` package from GitHub which implements RCTD. + +```{r, rctd.install, eval=FALSE} +devtools::install_github("dmcable/spacexr", build_vignettes = FALSE) +``` + +Counts, cluster, and spot information is extracted from the Seurat query and reference objects to construct `Reference` and `SpatialRNA` objects used by RCTD for annotation. + +```{r rctd.qeury, warning=FALSE} +library(spacexr) + +query.counts <- GetAssayData(xenium.obj, assay = "Xenium", slot = "counts")[, Cells(xenium.obj[["crop"]])] +coords <- GetTissueCoordinates(xenium.obj[["crop"]], which = "centroids") +rownames(coords) <- coords$cell +coords$cell <- NULL +query <- SpatialRNA(coords, query.counts, colSums(query.counts)) +``` + +```{r rctd.reference, eval=FALSE} +allen.cortex.ref <- readRDS("/home/hartmana/github/seurat-private/data/allen_cortex.rds") +allen.cortex.ref <- UpdateSeuratObject(allen.cortex.ref) + +Idents(allen.cortex.ref) <- "subclass" +# remove CR cells because there aren't enough of them for annotation +allen.cortex.ref <- subset(allen.cortex.ref, subset = subclass != "CR") +counts <- GetAssayData(allen.cortex.ref, assay = "RNA", slot = "counts") +cluster <- as.factor(allen.cortex.ref$subclass) +names(cluster) <- colnames(allen.cortex.ref) +nUMI <- allen.cortex.ref$nCount_RNA +names(nUMI) <- colnames(allen.cortex.ref) +nUMI <- colSums(counts) +levels(cluster) <- gsub("/", "-", levels(cluster)) +reference <- Reference(counts, cluster, nUMI) +``` + +```{r niche.run.rctd, warning=FALSE, results=FALSE, eval=FALSE} +# run RCTD with many cores +RCTD <- create.RCTD(query, reference, max_cores = 8) +RCTD <- run.RCTD(RCTD, doublet_mode = "doublet") +``` + +Many spot annotations contain multiple cell type markers, so we filter only to singlets and add the annotations to the Seurat object for downstream analysis. + +```{r niche.add.annotations, eval=FALSE} +annotations.df <- RCTD@results$results_df +annotations.df <- annotations.df[annotations.df$spot_class == "singlet", ] +annotations <- annotations.df$first_type +names(annotations) <- rownames(annotations.df) +xenium.obj$predicted.celltype <- annotations +keep.cells <- Cells(xenium.obj)[!is.na(xenium.obj$predicted.celltype)] +xenium.obj <- subset(xenium.obj, cells = keep.cells) +``` + +We call the `BuildNicheAssay` function from within Seurat to construct a new assay called `niche` containing the cell type composition spatially neighboring each cell. A metadata column called `niches` is also returned, which contains cluster assignments based on the niche assay. + +```{r build.niche.assay, eval=FALSE} +xenium.obj <- BuildNicheAssay( + object = xenium.obj, + fov = "crop", + group.by = "predicted.celltype", + niches.k = 5, + neighbors.k = 30 +) +``` + +```{r load.niche.results, eval=TRUE, include=FALSE} +xenium.obj <- readRDS("/brahms/hartmana/xenium_niches_presaved.rds") +``` + +After clustering the cell type composition nearby each cell, the neuronal layers in the cortex are visually demarcated. + +```{r, niche.dimplots, fig.width=8, fig.height=6, warning=FALSE} +celltype.plot <- ImageDimPlot( + xenium.obj, + group.by = "predicted.celltype", + size = 1.5, + cols = "polychrome", + dark.background = F) + + ggtitle("Cell type") +niche.plot <- ImageDimPlot( + xenium.obj, + group.by = "niches", + size = 1.5, + dark.background = F) + + ggtitle("Niches") + + scale_fill_manual( + values = c("#442288", "#6CA2EA", "#B5D33D", "#FED23F", "#EB7D5B")) +celltype.plot | niche.plot +``` + +Further, we observe that the composition of each niche is enriched for distinct cell types. + +```{r niche.composition} +table(xenium.obj$predicted.celltype, xenium.obj$niches) +``` + +Next, we perform DE between atrocytes from two of the niches. + +Note: I think this style of analysis is very risky - most of the DEGs comparing a cell type across niches are cell type markers of the other cell types enriched in one of the niches likely due to incorrect molecular assignment to cells. + +```{r niche.de} +xenium.obj$celltype.niches <- paste0(xenium.obj$predicted.celltype, "_", xenium.obj$niches) +Idents(xenium.obj) <- "celltype.niches" +niche.markers <- FindMarkers(xenium.obj, assay = "Xenium", ident.1 = "Astro_1", ident.2 = "Astro_5") +``` + +```{r niche.vln} +VlnPlot( + xenium.obj, + idents = c("Astro_1", "Astro_5"), + assay = "Xenium", + features = rownames(niche.markers)[1:6]) +``` + # Human Lung: Nanostring CosMx Spatial Molecular Imager This dataset was produced using Nanostring CosMx Spatial Molecular Imager (SMI). The CosMX SMI performs multiplexed single molecule profiling, can profile both RNA and protein targets, and can be applied directly to FFPE tissues. The dataset represents 8 FFPE samples taken from 5 non-small-cell lung cancer (NSCLC) tissues, and is available for [public download](https://www.nanostring.com/products/cosmx-spatial-molecular-imager/ffpe-dataset/). The gene panel consists of 960 transcripts. diff --git a/vignettes/vignettes_v5.yaml b/vignettes/vignettes_v5.yaml index a0452d233..8c440d62a 100644 --- a/vignettes/vignettes_v5.yaml +++ b/vignettes/vignettes_v5.yaml @@ -70,10 +70,22 @@ name: seurat5_sketch_analysis summary: | Analyze a large mouse brain dataset using the on-disk capabilities introduced in Seurat v5. - image: tbd + image: tbd.jpg + + - title: Integration Updated + name: seurat5_integration + summary: | + Integrate datesets in Seurat 5 using a variety of methods + image: tbd.jpg - title: BPCells interation name: seurat5_bpcells_interaction_vignette summary: | Load and save large on-disk matrices - image: tbd \ No newline at end of file + image: tbd.jpg + + - title: Install Seurat v5 + name: install_seurat5 + summary: | + Install Seurat 5 and required dependencies + image: seurat_logo.jpg \ No newline at end of file From 17520a36f712eba6e4737f9794be8f3306d1f28a Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Mon, 20 Mar 2023 14:36:10 -0400 Subject: [PATCH 531/979] fix paths --- vignettes/seurat5_spatial_vignette_2.Rmd | 12 ++++++------ vignettes/spatial_vignette_2.Rmd | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/vignettes/seurat5_spatial_vignette_2.Rmd b/vignettes/seurat5_spatial_vignette_2.Rmd index 45fe14221..5338db219 100644 --- a/vignettes/seurat5_spatial_vignette_2.Rmd +++ b/vignettes/seurat5_spatial_vignette_2.Rmd @@ -63,7 +63,7 @@ We use the `LoadVizgen()` function, which we have written to read in the output ```{r, message=FALSE, warning=FALSE} # Loading segmentations is a slow process and multi processing with the future pacakge is recommended -vizgen.obj <- LoadVizgen(data.dir = "/brahms/hartmana/vignette_data/vizgen/s2r1/", fov = "s2r1") +vizgen.obj <- LoadVizgen(data.dir = "../data/vizgen/s2r1/", fov = "s2r1") ``` The next pieces of information are specific to imaging assays, and is stored in the images slot of the resulting Seurat object: @@ -214,7 +214,7 @@ unzip Xenium_V1_FF_Mouse_Brain_Coronal_Subset_CTX_HP_outs.zip First we read in the dataset and create a Seurat object. Provide the path to the data folder for a Xenium run as the input path. The RNA data is stored in the `Xenium` assay of the Seurat object. This step should take about a minute. ```{r load.xenium, results='hide'} -path <- "/brahms/hartmana/vignette_data/xenium_tiny_subset" +path <- "../data/xenium_tiny_subset" # Load the Xenium data xenium.obj <- LoadXenium(path, fov = "fov") # remove cells with 0 counts @@ -370,7 +370,7 @@ xenium.obj <- BuildNicheAssay( ``` ```{r load.niche.results, eval=TRUE, include=FALSE} -xenium.obj <- readRDS("/brahms/hartmana/xenium_niches_presaved.rds") +xenium.obj <- readRDS("../data/xenium_niches_presaved.rds") ``` After clustering the cell type composition nearby each cell, the neuronal layers in the cortex are visually demarcated. @@ -427,12 +427,12 @@ In this vignette, we load one of 8 samples (lung 5, replicate 1). We use the `Lo For this dataset, instead of performing unsupervised analysis, we map the Nanostring profiles to our Azimuth Healthy Human Lung reference, which was defined by scRNA-seq. We used Azimuth version 0.4.3 with the [human lung](https://azimuth.hubmapconsortium.org/references/#Human%20-%20Lung%20v1) reference version 1.0.0. You can download the precomputed results [here](https://seurat.nygenome.org/vignette_data/spatial_vignette_2/nanostring_data.Rds), which include annotations, prediction scores, and a UMAP visualization. The median number of detected transcripts/cell is 249, which does create uncertainty for the annotation process. ```{r load} -nano.obj <- LoadNanostring(data.dir = "/brahms/hartmana/vignette_data/nanostring/lung5_rep1", fov="lung5.rep1") +nano.obj <- LoadNanostring(data.dir = "../data/nanostring/lung5_rep1", fov="lung5.rep1") ``` ```{r integration} # add in precomputed Azimuth annotations -azimuth.data <- readRDS("/brahms/hartmana/vignette_data/nanostring_data.Rds") +azimuth.data <- readRDS("../data/nanostring_data.Rds") nano.obj <- AddMetaData(nano.obj, metadata = azimuth.data$annotations) nano.obj[["proj.umap"]] <- azimuth.data$umap Idents(nano.obj) <- nano.obj$predicted.annotation.l1 @@ -514,7 +514,7 @@ First, we load in the data of a HuBMAP dataset using the `LoadAkoya()` function ```{r} codex.obj <- LoadAkoya( - filename = "/brahms/hartmana/vignette_data/LN7910_20_008_11022020_reg001_compensated.csv", + filename = "../data/LN7910_20_008_11022020_reg001_compensated.csv", type = "processor", fov = "HBM754.WKLP.262" ) diff --git a/vignettes/spatial_vignette_2.Rmd b/vignettes/spatial_vignette_2.Rmd index 953892cff..edf7a452d 100644 --- a/vignettes/spatial_vignette_2.Rmd +++ b/vignettes/spatial_vignette_2.Rmd @@ -369,7 +369,7 @@ xenium.obj <- BuildNicheAssay( ``` ```{r load.niche.results, eval=TRUE, include=FALSE} -xenium.obj <- readRDS("/brahms/hartmana/xenium_niches_presaved.rds") +xenium.obj <- readRDS("../data/xenium_niches_presaved.rds") ``` After clustering the cell type composition nearby each cell, the neuronal layers in the cortex are visually demarcated. From 56cedbe2f19f1bd9cb1298743327f1fbf953e018 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Mon, 20 Mar 2023 16:10:50 -0400 Subject: [PATCH 532/979] add knitr options --- vignettes/install_seurat5.Rmd | 27 +++++++++++++++++++++++++-- vignettes/seurat5_integration.Rmd | 30 +++++++++++++++++++++++++++--- 2 files changed, 52 insertions(+), 5 deletions(-) diff --git a/vignettes/install_seurat5.Rmd b/vignettes/install_seurat5.Rmd index 0bddac8d1..3182c8cdf 100644 --- a/vignettes/install_seurat5.Rmd +++ b/vignettes/install_seurat5.Rmd @@ -3,10 +3,33 @@ title: "Install Seurat v5" output: html_document: theme: united - df_print: kable + pdf_document: default date: 'Compiled: `r format(Sys.Date(), "%B %d, %Y")`' --- -*** + +```{r setup, include=FALSE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + tidy.opts = list(width.cutoff = 95), + fig.width = 10, + message = FALSE, + warning = FALSE, + time_it = TRUE, + error = TRUE +) +``` Copy the code below to install Seurat v5. diff --git a/vignettes/seurat5_integration.Rmd b/vignettes/seurat5_integration.Rmd index e6911e43e..59f74bdb8 100644 --- a/vignettes/seurat5_integration.Rmd +++ b/vignettes/seurat5_integration.Rmd @@ -1,13 +1,37 @@ --- title: "Integrative analysis in Seurat v5" -output: html_document +output: + html_document: + theme: united + pdf_document: default +date: 'Compiled: `r format(Sys.Date(), "%B %d, %Y")`' --- ```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + tidy.opts = list(width.cutoff = 95), + fig.width = 10, + message = FALSE, + warning = FALSE, + time_it = TRUE, + error = TRUE +) ``` -```{r setup, include=FALSE} +```{r setup} library(Seurat) library(SeuratData) library(Azimuth) From 7634feb0fbec55df0c0b5fe513f71671b317b493 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Mon, 20 Mar 2023 18:29:23 -0400 Subject: [PATCH 533/979] adding fastmnn to integration vignette --- vignettes/seurat5_integration.Rmd | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vignettes/seurat5_integration.Rmd b/vignettes/seurat5_integration.Rmd index 59f74bdb8..7a0d3b2d6 100644 --- a/vignettes/seurat5_integration.Rmd +++ b/vignettes/seurat5_integration.Rmd @@ -76,7 +76,7 @@ Seurat v5 enables streamlined integrative analysis using the `IntegrateLayers` f * Anchor-based CCA integration (`method=CCAIntegration`) * Anchor-based RPCA integration (`method=RPCAIntegration`) * Harmony (`method=HarmonyIntegration`) -* mnnCorrect (`method= mnnCorrectIntegration`) +* FastMNN (`method= FastMNNIntegration`) * scVI (`method=scVIIntegration`) Note that scVI integration requires INSERT DESCRIPTIVE TEXT FOR REQUIREMENTS ```{r} @@ -87,6 +87,7 @@ obj <- IntegrateLayers(object = obj, method = RPCAIntegration, verbose = F, new. # add results for Harmony obj <- IntegrateLayers(object = obj, method = HarmonyIntegration, verbose = F, new.reduction = 'harmony') # add results for Anchor-based (scI) +obj <- IntegrateLayers(object = obj, method = FastMNNIntegration, verbose = F, new.reduction = 'integrated.mnn') obj <- IntegrateLayers(object = obj, method = scVIIntegration, verbose = F, new.reduction = 'integrated.scvi', conda_env = '/home/haoy/miniconda3/envs/scvi-env') From 48b6f561b84a3fccf71191ca0fdac7f6db2d516d Mon Sep 17 00:00:00 2001 From: Gesmira Date: Mon, 20 Mar 2023 18:32:17 -0400 Subject: [PATCH 534/979] seurat wrappers in integration vignette --- vignettes/seurat5_integration.Rmd | 1 + vignettes/seurat5_integration_bridge.Rmd | 18 +++++++++--------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/vignettes/seurat5_integration.Rmd b/vignettes/seurat5_integration.Rmd index 7a0d3b2d6..400de0108 100644 --- a/vignettes/seurat5_integration.Rmd +++ b/vignettes/seurat5_integration.Rmd @@ -34,6 +34,7 @@ knitr::opts_chunk$set( ```{r setup} library(Seurat) library(SeuratData) +library(SeuratWrappers) library(Azimuth) options(future.globals.maxSize = 1e9) ``` diff --git a/vignettes/seurat5_integration_bridge.Rmd b/vignettes/seurat5_integration_bridge.Rmd index 6f07c095e..f4295a36f 100644 --- a/vignettes/seurat5_integration_bridge.Rmd +++ b/vignettes/seurat5_integration_bridge.Rmd @@ -63,8 +63,8 @@ We start by loading a 10x multiome dataset, consisting of ~12,000 PBMC from a he **Load and setup the 10x multiome object** ```{r} -# the 10x hdf5 file contains both data types. -inputdata.10x <- Read10X_h5("../data/pbmc_cellranger_arc_2/pbmc_granulocyte_sorted_10k_filtered_feature_bc_matrix.h5") +# the 10x hdf5 file contains both data types. +inputdata.10x <- Read10X_h5("/brahms/haoy/vignette_data/pbmc_granulocyte_sorted_10k_filtered_feature_bc_matrix.h5") # extract RNA and ATAC data rna_counts <- inputdata.10x$`Gene Expression` atac_counts <- inputdata.10x$Peaks @@ -80,11 +80,11 @@ grange.use <- seqnames(grange.counts) %in% standardChromosomes(grange.counts) atac_counts <- atac_counts[as.vector(grange.use), ] # Get gene annotations annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86) -# Change style to UCSC +# Change style to UCSC seqlevelsStyle(annotations) <- 'UCSC' genome(annotations) <- "hg38" -# File with ATAC per fragment information file -frag.file <- "../data/pbmc_cellranger_arc_2/pbmc_granulocyte_sorted_10k_atac_fragments.tsv.gz" +# File with ATAC per fragment information file +frag.file <- "/brahms/haoy/vignette_data/pbmc_granulocyte_sorted_10k_atac_fragments.tsv.gz" # Add in ATAC-seq data as ChromatinAssay object chrom_assay <- CreateChromatinAssay( counts = atac_counts, @@ -94,7 +94,7 @@ chrom_assay <- CreateChromatinAssay( min.cells = 10, annotation = annotations ) -# Add the ATAC assay to the multiome object +# Add the ATAC assay to the multiome object obj.multi[["ATAC"]] <- chrom_assay # Filter ATAC data based on QC metrics obj.multi <- subset( @@ -119,8 +119,8 @@ We note that it is important to quantify the same set of genomic features in the ```{r, message=FALSE, warning=FALSE} # Load ATAC dataset -atac_pbmc_data <- Read10X_h5(filename = "../data/10k_PBMC_ATAC_nextgem_Chromium_X_filtered_peak_bc_matrix.h5") -fragpath <- "../data/10k_PBMC_ATAC_nextgem_Chromium_X_fragments.tsv.gz" +atac_pbmc_data <- Read10X_h5(filename = "/brahms/haoy/vignette_data/10k_PBMC_ATAC_nextgem_Chromium_X_filtered_peak_bc_matrix.h5") +fragpath <- "/brahms/haoy/vignette_data/10k_PBMC_ATAC_nextgem_Chromium_X_fragments.tsv.gz" # Get gene annotations annotation <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86) # Change to UCSC style @@ -156,7 +156,7 @@ obj.atac <- subset(obj.atac, subset = nCount_ATAC < 7e4 & nCount_ATAC > 2000) We load the reference (download [here](https://atlas.fredhutch.org/data/nygc/multimodal/pbmc_multimodal.h5seurat)) from our recent [paper](https://doi.org/10.1016/j.cell.2021.04.048). This reference is stored as an h5Seurat file, a format that enables on-disk storage of multimodal Seurat objects (more details on h5Seurat and `SeuratDisk` can be found [here](https://mojaveazure.github.io/seurat-disk/index.html)). ```{r pbmc.ref} -obj.rna <- LoadH5Seurat("../data/pbmc_multimodal.h5seurat") +obj.rna <- LoadH5Seurat("/brahms/haoy/seurat4_pbmc/pbmc_multimodal.h5seurat") ```
    **What if I want to use my own reference dataset?** From 5a2b56366670bc2f7dba010660a2a12210228ec2 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Mon, 20 Mar 2023 18:39:15 -0400 Subject: [PATCH 535/979] update chunk name --- vignettes/seurat5_integration.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/seurat5_integration.Rmd b/vignettes/seurat5_integration.Rmd index 59f74bdb8..b26ce7386 100644 --- a/vignettes/seurat5_integration.Rmd +++ b/vignettes/seurat5_integration.Rmd @@ -31,7 +31,7 @@ knitr::opts_chunk$set( ) ``` -```{r setup} +```{r init} library(Seurat) library(SeuratData) library(Azimuth) From 40762b0fadbc81e1edbac010959ad89872210187 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Mon, 20 Mar 2023 23:41:41 -0400 Subject: [PATCH 536/979] update site --- _pkgdown.yaml | 67 +----- index.md | 4 - vignettes/get_started_v5.Rmd | 10 +- vignettes/get_started_v5.nb.html | 293 -------------------------- vignettes/seurat5_sketch_analysis.Rmd | 11 +- vignettes/vignettes_v5.yaml | 88 ++------ 6 files changed, 35 insertions(+), 438 deletions(-) delete mode 100644 vignettes/get_started_v5.nb.html diff --git a/_pkgdown.yaml b/_pkgdown.yaml index 8841d4c53..bf124eaf5 100644 --- a/_pkgdown.yaml +++ b/_pkgdown.yaml @@ -17,7 +17,7 @@ navbar: href: articles/install.html - text: "Get started" href: articles/get_started.html - - text: "Get started v5" + - text: "Seurat v5" href: articles/get_started_v5.html - text: "Vignettes" menu: @@ -76,71 +76,6 @@ navbar: href: articles/interaction_vignette.html - text: "Merging Seurat objects" href: articles/merge_vignette.html - - text: "Vignettes v5" - menu: - - text: Introductory Vignettes - - text: "PBMC 3K guided tutorial" - href: articles/seurat5_pbmc3k_tutorial.html - - text: "Using Seurat with multi-modal data" - href: articles/seurat5_multimodal_vignette.html - - text: "Analysis of spatial datasets (Sequencing-based)" - href: articles/seurat5_spatial_vignette.html - - text: "Analysis of spatial datasets (Imaging-based)" - href: articles/seurat5_spatial_vignette_2.html - - text: ------- - - text: Data Integration - - text: "Introduction to scRNA-seq integration" - href: articles/seurat5_integration_introduction.html - - text: "Mapping and annotating query datasets" - href: articles/seurat5_integration_mapping.html - - text: "Fast integration using reciprocal PCA (RPCA)" - href: articles/seurat5_integration_rpca.html - - text: "Tips for integrating large datasets" - href: articles/seurat5_integration_large_datasets.html - - text: "Integrating scRNA-seq and scATAC-seq data" - href: articles/seurat5_atacseq_integration_vignette.html - - text: "Multimodal reference mapping" - href: articles/seurat5_multimodal_reference_mapping.html - - text: ------- - - text: New Statistical Methods - - text: "Weighted Nearest Neighbor Analysis" - href: articles/seurat5_weighted_nearest_neighbor_analysis.html - - text: "Mixscape Vignette" - href: articles/seurat5_mixscape_vignette.html - - text: "Using sctransform in Seurat" - href: articles/seurat5_sctransform_vignette.html - - text: "SCTransform, v2 regularization" - href: articles/seurat5_sctransform_v2_vignette.html - - text: ------- - - text: Other - - text: "Data visualization vignette" - href: articles/seurat5_visualization_vignette.html - - text: "Cell-cycle scoring and regression" - href: articles/seurat5_cell_cycle_vignette.html - - text: "Differential expression testing" - href: articles/seurat5_de_vignette.html - - text: "Demultiplexing with hashtag oligos (HTOs)" - href: articles/seurat5_hashing_vignette.html - - text: "Interoperability between single-cell object formats" - href: articles/seurat5_conversion_vignette.html - - text: "Parallelization in Seurat with future" - href: articles/seurat5_future_vignette.html - - text: "Dimensional reduction vignette" - href: articles/seurat5_dim_reduction_vignette.html - - text: "Seurat essential commands list" - href: articles/seurat5_essential_commands.html - - text: "Seurat interaction tips" - href: articles/seurat5_interaction_vignette.html - - text: "Merging Seurat objects" - href: articles/seurat5_merge_vignette.html - - text: "Bridge Integration" - href: articles/seurat5_integration_bridge.html - - text: "Sketch Clustering (BPCells)" - href: articles/MouseBrain_sketch_clustering.html - - text: "COVID Mapping (BPCells)" - href: articles/COVID_SCTMapping.html - - text: "Sketch Integration (BPCells)" - href: articles/ParseBio_sketch_integration.html - text: Extensions href: articles/extensions.html - text: FAQ diff --git a/index.md b/index.md index a72af2569..7925418e9 100644 --- a/index.md +++ b/index.md @@ -1,9 +1,5 @@ ![](articles/assets/seurat_banner.jpg) -# Pre-release of Seurat 5.0 - -This is some text about Seurat v5.0 - # Official release of Seurat 4.0 We are excited to release Seurat v4.0! This update brings the following new features and functionality: diff --git a/vignettes/get_started_v5.Rmd b/vignettes/get_started_v5.Rmd index 0735df1b7..1a7f58ebb 100644 --- a/vignettes/get_started_v5.Rmd +++ b/vignettes/get_started_v5.Rmd @@ -1,5 +1,5 @@ --- -title: "Getting Started with Seurat 5" +title: "Introduction to Seurat 5" output: html_document: theme: united @@ -99,13 +99,7 @@ vdat <- read_yaml(file = "vignettes_v5.yaml") ``` -We provide a series of vignettes, tutorials, and analysis walkthroughs to help users get started with Seurat. You can also check out our [Reference page](../reference/index.html) which contains a full list of functions available to users. - -# Seurat 5 Vignettes - -For new users of Seurat, we suggest starting with a guided walk through of a dataset of 2,700 Peripheral Blood Mononuclear Cells (PBMCs) made publicly available by 10X Genomics. This tutorial implements the major components of a standard unsupervised clustering workflow including QC and data filtration, calculation of high-variance genes, dimensional reduction, graph-based clustering, and the identification of cluster markers. - -We provide additional introductory vignettes for users who are interested in analyzing multimodal single-cell datasets (e.g. from CITE-seq, or the 10x multiome kit), or spatial datasets (e.g. 10x Visium or Vizgen MERFISH). +Seurat 5 placeholder text. ```{r results='asis', echo=FALSE, warning=FALSE, message = FALSE} make_vignette_card_section(vdat = vdat, cat = 1) diff --git a/vignettes/get_started_v5.nb.html b/vignettes/get_started_v5.nb.html deleted file mode 100644 index 4bb9d6bf6..000000000 --- a/vignettes/get_started_v5.nb.html +++ /dev/null @@ -1,293 +0,0 @@ - - - - - - - - - - - - - -R Notebook - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    - - - - - - - - -

    This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

    -

    Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

    - - - -
    plot(cars)
    - - - -

    Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.

    -

    When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).

    -

    The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.

    - - -
    LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKVGhpcyBpcyBhbiBbUiBNYXJrZG93bl0oaHR0cDovL3JtYXJrZG93bi5yc3R1ZGlvLmNvbSkgTm90ZWJvb2suIFdoZW4geW91IGV4ZWN1dGUgY29kZSB3aXRoaW4gdGhlIG5vdGVib29rLCB0aGUgcmVzdWx0cyBhcHBlYXIgYmVuZWF0aCB0aGUgY29kZS4gCgpUcnkgZXhlY3V0aW5nIHRoaXMgY2h1bmsgYnkgY2xpY2tpbmcgdGhlICpSdW4qIGJ1dHRvbiB3aXRoaW4gdGhlIGNodW5rIG9yIGJ5IHBsYWNpbmcgeW91ciBjdXJzb3IgaW5zaWRlIGl0IGFuZCBwcmVzc2luZyAqQ3RybCtTaGlmdCtFbnRlciouIAoKYGBge3J9CnBsb3QoY2FycykKYGBgCgpBZGQgYSBuZXcgY2h1bmsgYnkgY2xpY2tpbmcgdGhlICpJbnNlcnQgQ2h1bmsqIGJ1dHRvbiBvbiB0aGUgdG9vbGJhciBvciBieSBwcmVzc2luZyAqQ3RybCtBbHQrSSouCgpXaGVuIHlvdSBzYXZlIHRoZSBub3RlYm9vaywgYW4gSFRNTCBmaWxlIGNvbnRhaW5pbmcgdGhlIGNvZGUgYW5kIG91dHB1dCB3aWxsIGJlIHNhdmVkIGFsb25nc2lkZSBpdCAoY2xpY2sgdGhlICpQcmV2aWV3KiBidXR0b24gb3IgcHJlc3MgKkN0cmwrU2hpZnQrSyogdG8gcHJldmlldyB0aGUgSFRNTCBmaWxlKS4KClRoZSBwcmV2aWV3IHNob3dzIHlvdSBhIHJlbmRlcmVkIEhUTUwgY29weSBvZiB0aGUgY29udGVudHMgb2YgdGhlIGVkaXRvci4gQ29uc2VxdWVudGx5LCB1bmxpa2UgKktuaXQqLCAqUHJldmlldyogZG9lcyBub3QgcnVuIGFueSBSIGNvZGUgY2h1bmtzLiBJbnN0ZWFkLCB0aGUgb3V0cHV0IG9mIHRoZSBjaHVuayB3aGVuIGl0IHdhcyBsYXN0IHJ1biBpbiB0aGUgZWRpdG9yIGlzIGRpc3BsYXllZC4K
    - - - -
    - - - - - - - - - - - - - - - - diff --git a/vignettes/seurat5_sketch_analysis.Rmd b/vignettes/seurat5_sketch_analysis.Rmd index 47ac4f377..662a4a770 100644 --- a/vignettes/seurat5_sketch_analysis.Rmd +++ b/vignettes/seurat5_sketch_analysis.Rmd @@ -59,7 +59,8 @@ We start by loading the 1.3M dataset from 10x Genomics using the `open_matrix_di ```{r} # note to Yuhan- this directory should already contain the ensembl-modified names. we can show how to do the ensembl mods in the interaction vignette # Connect to the counts matrix stored on-disk -ondisk_matrix <- open_matrix_dir('../data/mouse_1M_neurons_counts') +ondisk_matrix <- open_matrix_dir('/brahms/hartmana/vignette_data/mouse_1M_neurons_counts') +ondisk_matrix <- Azimuth::ConvertEnsembleToSymbol(mat = ondisk_matrix, species = "mouse") # specify that you would like to create a Seurat v5 assay # note that we require setting this option to ensure that existing pipelines are not affected @@ -121,6 +122,12 @@ obj <- ProjectData(object = obj, DefaultAssay(obj) <- 'RNA' ``` +```{r save.img} +library(ggplot2) +p <- DimPlot(obj, label = T, label.size=8, reduction = "ref.umap", group.by = "predicted.cluster_full", alpha = 0.1) + NoLegend() +ggsave(filename = "../output/images/MouseBrain_sketch_clustering.jpg", height = 7, width = 7, plot = p, quality = 50) +``` + ```{r} DimPlot(obj, label = T, reduction = 'ref.umap', group.by = 'predicted.cluster_full', alpha = 0.1) + NoLegend() # visualize gene expression on the sketched cells (fast) and the full dataset (slower) @@ -170,4 +177,4 @@ write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/seurat5_ske ```{r} sessionInfo() ``` -
    \ No newline at end of file +
    diff --git a/vignettes/vignettes_v5.yaml b/vignettes/vignettes_v5.yaml index 8c440d62a..296025a7e 100644 --- a/vignettes/vignettes_v5.yaml +++ b/vignettes/vignettes_v5.yaml @@ -1,91 +1,49 @@ - category: Introduction to v5 vignettes: - - title: Guided tutorial --- 2,700 PBMCs - name: seurat5_pbmc3k_tutorial - summary: | - A basic overview of Seurat that includes an introduction to common analytical workflows. - image: pbmc3k_umap.jpg - - - title: Multimodal analysis - name: seurat5_multimodal_vignette - summary: | - An introduction to working with multi-modal datasets in Seurat. - image: citeseq_plot.jpg - - - title: Analysis of spatial datasets (Sequencing-based) - name: seurat5_spatial_vignette + - title: Bridge Integration + name: seurat5_integration_bridge summary: | - Learn to explore spatially-resolved transcriptomic data with examples from 10x Visium and Slide-seq v2. - image: spatial_vignette_ttr.jpg + Map scATAC-seq onto an scRNA-seq reference using a multi-omic bridge dataset. + image: bridge_integration.png - title: Analysis of spatial datasets (Imaging-based) name: seurat5_spatial_vignette_2 summary: | - Learn to explore spatially-resolved data from multiplexed imaging technologies, including MERFISH, Xenium, CosMx SMI, and CODEX. + Learn to explore spatially-resolved data from multiplexed imaging technologies, including MERSCOPE, Xenium, CosMx SMI, and CODEX. image: spatial_vignette_2.jpg - - title: SCTransform - name: seurat5_sctransform_vignette - summary: | - Examples of how to use the SCTransform wrapper in Seurat. - image: assets/sctransform.png - - - title: Visualization - name: seurat5_visualization_vignette + - title: Sketch Clustering + name: seurat5_sketch_analysis summary: | - An overview of the major visualization functionality within Seurat. - image: visualization_vignette.jpg + Analyze a 1.3 million cell mouse brain dataset using the on-disk capabilities introduced in Seurat 5. + image: MouseBrain_sketch_clustering.jpg - - title: Cell Cycle Regression - name: seurat5_cell_cycle_vignette + - title: Integration + name: seurat5_integration summary: | - Mitigate the effects of cell cycle heterogeneity by computing cell cycle phase scores based on marker genes. - image: cell_cycle_vignette.jpg + Integrate datesets in Seurat 5 using a variety of methods. + image: tbd.jpg - - title: Bridge Integration - name: seurat5_integration_bridge + - title: BPCells Interaction + name: seurat5_bpcells_interaction_vignette summary: | - Map scATAC-seq onto an scRNA-seq reference using a multi-omic bridge dataset. - image: bridge_integration.png + Load and save large on-disk matrices using BPCells. + image: tbd.jpg - - title: Sketch Clustering (BPCells) - name: MouseBrain_sketch_clustering + - title: Install Seurat 5 + name: install_seurat5 summary: | - Analyze a large mouse brain dataset using the on-disk capabilities introduced in Seurat v5. - image: MouseBrain_sketch_clustering.jpg + Install Seurat 5 and required dependencies. + image: SeuratV5.png - - title: COVID Mapping (BPCells) + - title: COVID Mapping name: COVID_SCTMapping summary: | Map PBMC datasets from COVID-19 patients to a healthy PBMC reference. image: COVID_SCTMapping.jpg - - title: Sketch Integration (BPCells) + - title: Sketch Integration name: ParseBio_sketch_integration summary: | Perform sketch integration on a large dataset from Parse Biosciences. image: ParseBio_sketch_integration.jpg - - - title: Sketch Clustering Updated - name: seurat5_sketch_analysis - summary: | - Analyze a large mouse brain dataset using the on-disk capabilities introduced in Seurat v5. - image: tbd.jpg - - - title: Integration Updated - name: seurat5_integration - summary: | - Integrate datesets in Seurat 5 using a variety of methods - image: tbd.jpg - - - title: BPCells interation - name: seurat5_bpcells_interaction_vignette - summary: | - Load and save large on-disk matrices - image: tbd.jpg - - - title: Install Seurat v5 - name: install_seurat5 - summary: | - Install Seurat 5 and required dependencies - image: seurat_logo.jpg \ No newline at end of file From ffee523f68dbb827bb4265dbb9f6d920515b71cc Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Tue, 21 Mar 2023 11:57:45 -0400 Subject: [PATCH 537/979] update vignettes --- vignettes/seurat5_integration.Rmd | 4 ++++ vignettes/seurat5_integration_bridge.Rmd | 10 +++++----- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/vignettes/seurat5_integration.Rmd b/vignettes/seurat5_integration.Rmd index 4b23b2c26..9b3c14662 100644 --- a/vignettes/seurat5_integration.Rmd +++ b/vignettes/seurat5_integration.Rmd @@ -39,6 +39,10 @@ library(Azimuth) options(future.globals.maxSize = 1e9) ``` +```{r, include=TRUE} +InstallData("pbmcref") +``` + ## Introduction Integration of single-cell sequencing datasets, for example across experimental batches, donors, or conditions, is often an important step in scRNA-seq workflows. Integrative analysis can help to match shared cell types and states across datasets, which can boost statistical power, and most importantly, facilitate accurate comparative analysis across datasets. In previous versions of Seurat we introduced methods for integrative analysis, including our ‘anchor-based’ integration workflow. Many labs have also published powerful and pioneering methods, including Harmony and scVI, for integrative analysis. We recognize that while the goal of matching shared cell types across datasets may be important for many problems, users may also be concerned about which method to use, or that integration could result in a loss of biological resolution. In Seurat v5, we introduce more flexible and streamlined infrastructure to run different integration algorithms with a single line of code. This makes it easier to explore the results of different integration methods, and to compare these results to a workflow that excludes integration steps. diff --git a/vignettes/seurat5_integration_bridge.Rmd b/vignettes/seurat5_integration_bridge.Rmd index f4295a36f..22f799399 100644 --- a/vignettes/seurat5_integration_bridge.Rmd +++ b/vignettes/seurat5_integration_bridge.Rmd @@ -64,7 +64,7 @@ We start by loading a 10x multiome dataset, consisting of ~12,000 PBMC from a he ```{r} # the 10x hdf5 file contains both data types. -inputdata.10x <- Read10X_h5("/brahms/haoy/vignette_data/pbmc_granulocyte_sorted_10k_filtered_feature_bc_matrix.h5") +inputdata.10x <- Read10X_h5("../data/pbmc_granulocyte_sorted_10k_filtered_feature_bc_matrix.h5") # extract RNA and ATAC data rna_counts <- inputdata.10x$`Gene Expression` atac_counts <- inputdata.10x$Peaks @@ -84,7 +84,7 @@ annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86) seqlevelsStyle(annotations) <- 'UCSC' genome(annotations) <- "hg38" # File with ATAC per fragment information file -frag.file <- "/brahms/haoy/vignette_data/pbmc_granulocyte_sorted_10k_atac_fragments.tsv.gz" +frag.file <- "../data/pbmc_granulocyte_sorted_10k_atac_fragments.tsv.gz" # Add in ATAC-seq data as ChromatinAssay object chrom_assay <- CreateChromatinAssay( counts = atac_counts, @@ -119,8 +119,8 @@ We note that it is important to quantify the same set of genomic features in the ```{r, message=FALSE, warning=FALSE} # Load ATAC dataset -atac_pbmc_data <- Read10X_h5(filename = "/brahms/haoy/vignette_data/10k_PBMC_ATAC_nextgem_Chromium_X_filtered_peak_bc_matrix.h5") -fragpath <- "/brahms/haoy/vignette_data/10k_PBMC_ATAC_nextgem_Chromium_X_fragments.tsv.gz" +atac_pbmc_data <- Read10X_h5(filename = "../data/10k_PBMC_ATAC_nextgem_Chromium_X_filtered_peak_bc_matrix.h5") +fragpath <- "../data/10k_PBMC_ATAC_nextgem_Chromium_X_fragments.tsv.gz" # Get gene annotations annotation <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86) # Change to UCSC style @@ -156,7 +156,7 @@ obj.atac <- subset(obj.atac, subset = nCount_ATAC < 7e4 & nCount_ATAC > 2000) We load the reference (download [here](https://atlas.fredhutch.org/data/nygc/multimodal/pbmc_multimodal.h5seurat)) from our recent [paper](https://doi.org/10.1016/j.cell.2021.04.048). This reference is stored as an h5Seurat file, a format that enables on-disk storage of multimodal Seurat objects (more details on h5Seurat and `SeuratDisk` can be found [here](https://mojaveazure.github.io/seurat-disk/index.html)). ```{r pbmc.ref} -obj.rna <- LoadH5Seurat("/brahms/haoy/seurat4_pbmc/pbmc_multimodal.h5seurat") +obj.rna <- LoadH5Seurat("../data/pbmc_multimodal.h5seurat") ```
    **What if I want to use my own reference dataset?** From 0e3b43fdbfe7e9357e6397840c182728b0920c20 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Tue, 21 Mar 2023 15:21:14 -0400 Subject: [PATCH 538/979] install SeuratObject from seurat5 branch automatically --- DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index a6b226676..9a3677c27 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,7 +27,8 @@ Authors@R: c( URL: https://satijalab.org/seurat, https://github.com/satijalab/seurat BugReports: https://github.com/satijalab/seurat/issues Remotes: - bnprks/BPCells + bnprks/BPCells, + mojaveazure/seurat-object@seurat5 Depends: R (>= 4.0.0), methods, From b50060cccaeb7323a5081fb57cbc423b0282d7d2 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Tue, 21 Mar 2023 15:21:38 -0400 Subject: [PATCH 539/979] loadh5adobs function --- vignettes/seurat5_bpcells_interaction_vignette.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/seurat5_bpcells_interaction_vignette.Rmd b/vignettes/seurat5_bpcells_interaction_vignette.Rmd index 42aed0424..788fb5d98 100644 --- a/vignettes/seurat5_bpcells_interaction_vignette.Rmd +++ b/vignettes/seurat5_bpcells_interaction_vignette.Rmd @@ -123,7 +123,7 @@ for (i in 1:length(files.set)) { mat <- open_matrix_dir(dir = paste0(file.dir, name, "_BP")) mat <- Azimuth::ConvertEnsembleToSymbol(mat = mat, species = "human") # Get metadata - metadata.list[[i]] <- readH5AD_obs(file = path) + metadata.list[[i]] <- LoadH5ADobs(file = path) data.list[[i]] <- mat names(data.list[i]) <- name } From bee41fd8335550d581fbf1f5236c20fef4dc9ead Mon Sep 17 00:00:00 2001 From: Gesmira Date: Tue, 21 Mar 2023 15:26:20 -0400 Subject: [PATCH 540/979] typo fix --- vignettes/seurat5_bpcells_interaction_vignette.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/seurat5_bpcells_interaction_vignette.Rmd b/vignettes/seurat5_bpcells_interaction_vignette.Rmd index 788fb5d98..4c058f58d 100644 --- a/vignettes/seurat5_bpcells_interaction_vignette.Rmd +++ b/vignettes/seurat5_bpcells_interaction_vignette.Rmd @@ -123,7 +123,7 @@ for (i in 1:length(files.set)) { mat <- open_matrix_dir(dir = paste0(file.dir, name, "_BP")) mat <- Azimuth::ConvertEnsembleToSymbol(mat = mat, species = "human") # Get metadata - metadata.list[[i]] <- LoadH5ADobs(file = path) + metadata.list[[i]] <- LoadH5ADobs(path = path) data.list[[i]] <- mat names(data.list[i]) <- name } From fcd9c1d40a1c27ab1e3bc9c82f9bbbf0133cefde Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Tue, 21 Mar 2023 15:30:45 -0400 Subject: [PATCH 541/979] update install requirements --- vignettes/install_seurat5.Rmd | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/vignettes/install_seurat5.Rmd b/vignettes/install_seurat5.Rmd index 3182c8cdf..c7063a32c 100644 --- a/vignettes/install_seurat5.Rmd +++ b/vignettes/install_seurat5.Rmd @@ -1,5 +1,5 @@ --- -title: "Install Seurat v5" +title: "Install Seurat 5" output: html_document: theme: united @@ -31,12 +31,18 @@ knitr::opts_chunk$set( ) ``` -Copy the code below to install Seurat v5. +Copy the code below to install Seurat 5. -```{r, eval=FALSE} +```{r required, eval=FALSE} remotes::install_github("mojaveazure/seurat-object", "seurat5", quiet = TRUE) remotes::install_github("satijalab/seurat", "seurat5", quiet = TRUE) +``` + +The following packages are not required but used in many Seurat 5 vignettes. +```{r additional, eval=FALSE} remotes::install_github("satijalab/seurat-data", "seurat5", quiet = TRUE) +remotes::install_github("satijalab/azimuth", "seurat5", quiet = TRUE) +remotes::install_github("mojaveazure/seurat-wrappers", "seurat5", quiet = TRUE) remotes::install_github("stuart-lab/signac", "seurat5", quiet = TRUE) remotes::install_github("bnprks/BPCells") # for on-disk capabilities ``` From 0a16bfdccabd2d628ce4fcbba3c7770d3d94c093 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Tue, 21 Mar 2023 15:50:58 -0400 Subject: [PATCH 542/979] remove UpdateSeuratObject references after LoadData --- vignettes/atacseq_integration_vignette.Rmd | 3 --- vignettes/conversion_vignette.Rmd | 1 - vignettes/de_vignette.Rmd | 1 - vignettes/dim_reduction_vignette.Rmd | 1 - vignettes/integration_introduction.Rmd | 2 -- vignettes/integration_mapping.Rmd | 1 - vignettes/integration_rpca.Rmd | 2 -- vignettes/interaction_vignette.Rmd | 1 - vignettes/mixscape_vignette.Rmd | 1 - vignettes/multimodal_reference_mapping.Rmd | 5 ----- vignettes/sctransform_v2_vignette.Rmd | 1 - vignettes/seurat5_atacseq_integration_vignette.Rmd | 3 --- vignettes/seurat5_de_vignette.Rmd | 1 - vignettes/seurat5_dim_reduction_vignette.Rmd | 1 - vignettes/seurat5_integration.Rmd | 1 - vignettes/seurat5_interaction_vignette.Rmd | 1 - vignettes/seurat5_merge_vignette.Rmd | 1 - vignettes/seurat5_visualization_vignette.Rmd | 3 +-- vignettes/seurat5_weighted_nearest_neighbor_analysis.Rmd | 1 - vignettes/visualization_vignette.Rmd | 3 +-- 20 files changed, 2 insertions(+), 32 deletions(-) diff --git a/vignettes/atacseq_integration_vignette.Rmd b/vignettes/atacseq_integration_vignette.Rmd index dadea5323..bbe70ac34 100644 --- a/vignettes/atacseq_integration_vignette.Rmd +++ b/vignettes/atacseq_integration_vignette.Rmd @@ -67,9 +67,6 @@ library(cowplot) pbmc.rna <- LoadData("pbmcMultiome", "pbmc.rna") pbmc.atac <- LoadData("pbmcMultiome", "pbmc.atac") -pbmc.rna <- UpdateSeuratObject(pbmc.rna) -pbmc.atac <- UpdateSeuratObject(pbmc.atac) - # repeat QC steps performed in the WNN vignette pbmc.rna <- subset(pbmc.rna, seurat_annotations != 'filtered') pbmc.atac <- subset(pbmc.atac, seurat_annotations != 'filtered') diff --git a/vignettes/conversion_vignette.Rmd b/vignettes/conversion_vignette.Rmd index f45965cbd..7b9af30c9 100644 --- a/vignettes/conversion_vignette.Rmd +++ b/vignettes/conversion_vignette.Rmd @@ -57,7 +57,6 @@ library(patchwork) # Use PBMC3K from SeuratData InstallData("pbmc3k") pbmc <- LoadData(ds = "pbmc3k", type = "pbmc3k.final") -pbmc <- UpdateSeuratObject(pbmc) pbmc.sce <- as.SingleCellExperiment(pbmc) p1 <- plotExpression(pbmc.sce, features = 'MS4A1', x = 'ident') + theme(axis.text.x = element_text(angle = 45, hjust = 1)) p2 <- plotPCA(pbmc.sce, colour_by = 'ident') diff --git a/vignettes/de_vignette.Rmd b/vignettes/de_vignette.Rmd index 711275f7a..d4a3906b5 100644 --- a/vignettes/de_vignette.Rmd +++ b/vignettes/de_vignette.Rmd @@ -43,7 +43,6 @@ This vignette highlights some example workflows for performing differential expr library(Seurat) library(SeuratData) pbmc <- LoadData("pbmc3k", type = "pbmc3k.final") -pbmc <- UpdateSeuratObject(pbmc) ``` # Perform default differential expression tests diff --git a/vignettes/dim_reduction_vignette.Rmd b/vignettes/dim_reduction_vignette.Rmd index 173ee1d71..a13721b77 100644 --- a/vignettes/dim_reduction_vignette.Rmd +++ b/vignettes/dim_reduction_vignette.Rmd @@ -38,7 +38,6 @@ This vignette demonstrates how to store and interact with dimensional reduction library(Seurat) library(SeuratData) pbmc <- LoadData("pbmc3k", type = "pbmc3k.final") -pbmc <- UpdateSeuratObject(pbmc) ``` # Explore the new dimensional reduction structure diff --git a/vignettes/integration_introduction.Rmd b/vignettes/integration_introduction.Rmd index a7c40ca6b..fe8c1e92f 100644 --- a/vignettes/integration_introduction.Rmd +++ b/vignettes/integration_introduction.Rmd @@ -68,7 +68,6 @@ InstallData('ifnb') ```{r init, results='hide', message=FALSE, fig.keep='none'} # load dataset ifnb <- LoadData('ifnb') -ifnb <- UpdateSeuratObject(ifnb) # split the dataset into a list of two seurat objects (stim and CTRL) ifnb.list <- SplitObject(ifnb, split.by = "stim") @@ -231,7 +230,6 @@ Below, we demonstrate how to modify the Seurat integration workflow for datasets ```{r panc8.cca.sct.init, results='hide', message=FALSE, fig.keep='none'} ifnb <- LoadData('ifnb') -ifnb <- UpdateSeuratObject(ifnb) ifnb.list <- SplitObject(ifnb, split.by = "stim") ifnb.list <- lapply(X = ifnb.list, FUN = SCTransform) features <- SelectIntegrationFeatures(object.list = ifnb.list, nfeatures = 3000) diff --git a/vignettes/integration_mapping.Rmd b/vignettes/integration_mapping.Rmd index 4dd02f598..1d44a9fe2 100644 --- a/vignettes/integration_mapping.Rmd +++ b/vignettes/integration_mapping.Rmd @@ -54,7 +54,6 @@ To construct a reference, we will identify 'anchors' between the individual data ```{r preprocessing1} panc8 <- LoadData('panc8') -panc8 <- UpdateSeuratObject(panc8) pancreas.list <- SplitObject(panc8, split.by = "tech") pancreas.list <- pancreas.list[c("celseq", "celseq2", "fluidigmc1", "smartseq2")] ``` diff --git a/vignettes/integration_rpca.Rmd b/vignettes/integration_rpca.Rmd index 5d1652621..f249d13c0 100644 --- a/vignettes/integration_rpca.Rmd +++ b/vignettes/integration_rpca.Rmd @@ -57,7 +57,6 @@ InstallData('ifnb') ```{r init, results='hide', message=FALSE, fig.keep='none'} # load dataset ifnb <- LoadData('ifnb') -ifnb <- UpdateSeuratObject(ifnb) # split the dataset into a list of two seurat objects (stim and CTRL) ifnb.list <- SplitObject(ifnb, split.by = "stim") @@ -151,7 +150,6 @@ As an additional example, we repeat the analyses performed above, but normalize ```{r panc8.cca.sct.init, results='hide', message=FALSE, fig.keep='none'} ifnb <- LoadData('ifnb') -ifnb <- UpdateSeuratObject(ifnb) ifnb.list <- SplitObject(ifnb, split.by = "stim") ifnb.list <- lapply(X = ifnb.list, FUN = SCTransform, method = "glmGamPoi") features <- SelectIntegrationFeatures(object.list = ifnb.list, nfeatures = 3000) diff --git a/vignettes/interaction_vignette.Rmd b/vignettes/interaction_vignette.Rmd index 6ca7079c6..2b007dbfb 100644 --- a/vignettes/interaction_vignette.Rmd +++ b/vignettes/interaction_vignette.Rmd @@ -43,7 +43,6 @@ library(Seurat) library(SeuratData) InstallData("pbmc3k") pbmc <- LoadData("pbmc3k", type = "pbmc3k.final") -pbmc <- UpdateSeuratObject(pbmc) # pretend that cells were originally assigned to one of two replicates (we assign randomly here) # if your cells do belong to multiple replicates, and you want to add this info to the Seurat object diff --git a/vignettes/mixscape_vignette.Rmd b/vignettes/mixscape_vignette.Rmd index 34997ade4..fdbc622e3 100644 --- a/vignettes/mixscape_vignette.Rmd +++ b/vignettes/mixscape_vignette.Rmd @@ -66,7 +66,6 @@ We use a 111 gRNA ECCITE-seq dataset generated from stimulated THP-1 cells that ```{r eccite.load} # Load object. eccite <- LoadData(ds = "thp1.eccite") -eccite <- UpdateSeuratObject(eccite) # Normalize protein. eccite <- NormalizeData( diff --git a/vignettes/multimodal_reference_mapping.Rmd b/vignettes/multimodal_reference_mapping.Rmd index 6c79330a3..f7928bb42 100644 --- a/vignettes/multimodal_reference_mapping.Rmd +++ b/vignettes/multimodal_reference_mapping.Rmd @@ -79,7 +79,6 @@ To demonstrate mapping to this multimodal reference, we will use a dataset of 2, ```{r 3k.load} library(SeuratData) InstallData('pbmc3k') -pbmc3k <- UpdateSeuratObject(pbmc3k) ``` The reference was normalized using `SCTransform()`, so we use the same approach to normalize the query here. @@ -236,10 +235,6 @@ bm <- LoadData(ds = "bmcite") #load query data InstallData('hcabm40k') hcabm40k <- LoadData(ds = "hcabm40k") - -bm <- UpdateSeuratObject(bm) -hcabm40k <- UpdateSeuratObject(hcabm40k) - ``` The reference dataset contains a [WNN graph](weighted_nearest_neighbor_analysis.html), reflecting a weighted combination of the RNA and protein data in this CITE-seq experiment. diff --git a/vignettes/sctransform_v2_vignette.Rmd b/vignettes/sctransform_v2_vignette.Rmd index 0f7fa2423..9bcb03614 100644 --- a/vignettes/sctransform_v2_vignette.Rmd +++ b/vignettes/sctransform_v2_vignette.Rmd @@ -87,7 +87,6 @@ InstallData("ifnb") ```{r init, results='hide', message=FALSE, fig.keep='none'} # load dataset ifnb <- LoadData("ifnb") -ifnb <- UpdateSeuratObject(ifnb) # split the dataset into a list of two seurat objects (stim and CTRL) ifnb.list <- SplitObject(ifnb, split.by = "stim") diff --git a/vignettes/seurat5_atacseq_integration_vignette.Rmd b/vignettes/seurat5_atacseq_integration_vignette.Rmd index eadcd9b43..3dc33d667 100644 --- a/vignettes/seurat5_atacseq_integration_vignette.Rmd +++ b/vignettes/seurat5_atacseq_integration_vignette.Rmd @@ -68,9 +68,6 @@ library(cowplot) pbmc.rna <- LoadData("pbmcMultiome", "pbmc.rna") pbmc.atac <- LoadData("pbmcMultiome", "pbmc.atac") -pbmc.rna <- UpdateSeuratObject(pbmc.rna) -pbmc.atac <- UpdateSeuratObject(pbmc.atac) - pbmc.rna[['RNA']] <- as(pbmc.rna[['RNA']], Class = 'Assay5') # repeat QC steps performed in the WNN vignette pbmc.rna <- subset(pbmc.rna, seurat_annotations != 'filtered') diff --git a/vignettes/seurat5_de_vignette.Rmd b/vignettes/seurat5_de_vignette.Rmd index 97ba236cf..858331e09 100644 --- a/vignettes/seurat5_de_vignette.Rmd +++ b/vignettes/seurat5_de_vignette.Rmd @@ -43,7 +43,6 @@ This vignette highlights some example workflows for performing differential expr library(Seurat) library(SeuratData) pbmc <- LoadData("pbmc3k", type = "pbmc3k.final") -pbmc <- UpdateSeuratObject(pbmc) ``` # Perform default differential expression tests diff --git a/vignettes/seurat5_dim_reduction_vignette.Rmd b/vignettes/seurat5_dim_reduction_vignette.Rmd index 6a3b624ad..85e9a663f 100644 --- a/vignettes/seurat5_dim_reduction_vignette.Rmd +++ b/vignettes/seurat5_dim_reduction_vignette.Rmd @@ -38,7 +38,6 @@ This vignette demonstrates how to store and interact with dimensional reduction library(Seurat) library(SeuratData) pbmc <- LoadData("pbmc3k", type = "pbmc3k.final") -pbmc <- UpdateSeuratObject(pbmc) ``` # Explore the new dimensional reduction structure diff --git a/vignettes/seurat5_integration.Rmd b/vignettes/seurat5_integration.Rmd index 9b3c14662..b1c4ca451 100644 --- a/vignettes/seurat5_integration.Rmd +++ b/vignettes/seurat5_integration.Rmd @@ -52,7 +52,6 @@ Seurat v5 assays store data in layers. These layers can store raw, un-normalized ```{r} # load in the pbmc systematic comparative analysis dataset obj <- LoadData("pbmcsca") -obj <- UpdateSeuratObject(obj) obj <- subset(obj, nFeature_RNA > 1000) obj <- RunAzimuth(obj, reference = "pbmcref") # currently, the object has two layers in the RNA assay: counts, and data diff --git a/vignettes/seurat5_interaction_vignette.Rmd b/vignettes/seurat5_interaction_vignette.Rmd index 5cc03dfd1..e29f3059f 100644 --- a/vignettes/seurat5_interaction_vignette.Rmd +++ b/vignettes/seurat5_interaction_vignette.Rmd @@ -43,7 +43,6 @@ library(Seurat) library(SeuratData) InstallData("pbmc3k") pbmc <- LoadData("pbmc3k", type = "pbmc3k.final") -pbmc <- UpdateSeuratObject(pbmc) # pretend that cells were originally assigned to one of two replicates (we assign randomly here) # if your cells do belong to multiple replicates, and you want to add this info to the Seurat object diff --git a/vignettes/seurat5_merge_vignette.Rmd b/vignettes/seurat5_merge_vignette.Rmd index 84814bb7f..037614972 100644 --- a/vignettes/seurat5_merge_vignette.Rmd +++ b/vignettes/seurat5_merge_vignette.Rmd @@ -74,7 +74,6 @@ To merge more than two `Seurat` objects, simply pass a vector of multiple `Seura library(SeuratData) InstallData("pbmc3k") pbmc3k <- LoadData("pbmc3k", type = "pbmc3k.final") -pbmc3k <- UpdateSeuratObject(pbmc3k) pbmc3k pbmc.big <- merge(pbmc3k, y = c(pbmc4k, pbmc8k), add.cell.ids = c('3K', '4K', '8K'), project = 'PBMC15K') diff --git a/vignettes/seurat5_visualization_vignette.Rmd b/vignettes/seurat5_visualization_vignette.Rmd index f5f2052b4..4724b0151 100644 --- a/vignettes/seurat5_visualization_vignette.Rmd +++ b/vignettes/seurat5_visualization_vignette.Rmd @@ -52,8 +52,7 @@ library(SeuratData) library(ggplot2) library(patchwork) -data("pbmc3k.final") -pbmc3k.final <- UpdateSeuratObject(pbmc3k.final) +pbmc3k.final <- LoadData("pbmc3k", type = "pbmc3k.final") pbmc3k.final[["RNA"]] <- as(pbmc3k.final[["RNA"]], Class = "Assay5") pbmc3k.final <- NormalizeData(pbmc3k.final) pbmc3k.final <- FindVariableFeatures(pbmc3k.final) diff --git a/vignettes/seurat5_weighted_nearest_neighbor_analysis.Rmd b/vignettes/seurat5_weighted_nearest_neighbor_analysis.Rmd index d5b38df08..64eb316a9 100644 --- a/vignettes/seurat5_weighted_nearest_neighbor_analysis.Rmd +++ b/vignettes/seurat5_weighted_nearest_neighbor_analysis.Rmd @@ -62,7 +62,6 @@ library(dplyr) ```{r} InstallData("bmcite") bm <- LoadData(ds = "bmcite") -bm <- UpdateSeuratObject(bm) bm[["ADT"]] <- CreateAssay5Object(bm[["ADT"]]$counts) bm[["RNA"]] <- CreateAssay5Object(bm[["RNA"]]$counts) ``` diff --git a/vignettes/visualization_vignette.Rmd b/vignettes/visualization_vignette.Rmd index c4476d861..dd6988ecc 100644 --- a/vignettes/visualization_vignette.Rmd +++ b/vignettes/visualization_vignette.Rmd @@ -50,8 +50,7 @@ library(Seurat) library(SeuratData) library(ggplot2) library(patchwork) -data("pbmc3k.final") -pbmc3k.final <- UpdateSeuratObject(pbmc3k.final) +pbmc3k.final <- LoadData("pbmc3k", type = "pbmc3k.final") pbmc3k.final$groups <- sample(c("group1", "group2"), size = ncol(pbmc3k.final), replace = TRUE) features <- c("LYZ", "CCL5", "IL32", "PTPRCAP", "FCGR3A", "PF4") pbmc3k.final From 4a9d96e69858e6cf7327fd16f81f36ed1bcccab3 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Tue, 21 Mar 2023 16:27:08 -0400 Subject: [PATCH 543/979] update vignettes --- vignettes/install_seurat5.Rmd | 4 ++-- vignettes/seurat5_integration_bridge.Rmd | 19 +++++++++---------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/vignettes/install_seurat5.Rmd b/vignettes/install_seurat5.Rmd index c7063a32c..f339a199e 100644 --- a/vignettes/install_seurat5.Rmd +++ b/vignettes/install_seurat5.Rmd @@ -42,7 +42,7 @@ The following packages are not required but used in many Seurat 5 vignettes. ```{r additional, eval=FALSE} remotes::install_github("satijalab/seurat-data", "seurat5", quiet = TRUE) remotes::install_github("satijalab/azimuth", "seurat5", quiet = TRUE) -remotes::install_github("mojaveazure/seurat-wrappers", "seurat5", quiet = TRUE) +remotes::install_github("satijalab/seurat-wrappers", "seurat5", quiet = TRUE) remotes::install_github("stuart-lab/signac", "seurat5", quiet = TRUE) -remotes::install_github("bnprks/BPCells") # for on-disk capabilities +remotes::install_github("bnprks/BPCells", quiet = TRUE) # for on-disk capabilities ``` diff --git a/vignettes/seurat5_integration_bridge.Rmd b/vignettes/seurat5_integration_bridge.Rmd index 22f799399..8a430a360 100644 --- a/vignettes/seurat5_integration_bridge.Rmd +++ b/vignettes/seurat5_integration_bridge.Rmd @@ -48,7 +48,6 @@ First, we install the updated version of Seurat that supports this infrastructur library(remotes) library(Seurat) options(Seurat.object.assay.version = "v5") -library(SeuratDisk) library(Signac) library(EnsDb.Hsapiens.v86) library(dplyr) @@ -63,8 +62,8 @@ We start by loading a 10x multiome dataset, consisting of ~12,000 PBMC from a he **Load and setup the 10x multiome object** ```{r} -# the 10x hdf5 file contains both data types. -inputdata.10x <- Read10X_h5("../data/pbmc_granulocyte_sorted_10k_filtered_feature_bc_matrix.h5") +# the 10x hdf5 file contains both data types. +inputdata.10x <- Read10X_h5("/brahms/hartmana/vignette_data/pbmc_cellranger_arc_2/pbmc_granulocyte_sorted_10k_filtered_feature_bc_matrix.h5") # extract RNA and ATAC data rna_counts <- inputdata.10x$`Gene Expression` atac_counts <- inputdata.10x$Peaks @@ -80,11 +79,11 @@ grange.use <- seqnames(grange.counts) %in% standardChromosomes(grange.counts) atac_counts <- atac_counts[as.vector(grange.use), ] # Get gene annotations annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86) -# Change style to UCSC +# Change style to UCSC seqlevelsStyle(annotations) <- 'UCSC' genome(annotations) <- "hg38" -# File with ATAC per fragment information file -frag.file <- "../data/pbmc_granulocyte_sorted_10k_atac_fragments.tsv.gz" +# File with ATAC per fragment information file +frag.file <- "/brahms/hartmana/vignette_data/pbmc_cellranger_arc_2/pbmc_granulocyte_sorted_10k_atac_fragments.tsv.gz" # Add in ATAC-seq data as ChromatinAssay object chrom_assay <- CreateChromatinAssay( counts = atac_counts, @@ -94,7 +93,7 @@ chrom_assay <- CreateChromatinAssay( min.cells = 10, annotation = annotations ) -# Add the ATAC assay to the multiome object +# Add the ATAC assay to the multiome object obj.multi[["ATAC"]] <- chrom_assay # Filter ATAC data based on QC metrics obj.multi <- subset( @@ -119,8 +118,8 @@ We note that it is important to quantify the same set of genomic features in the ```{r, message=FALSE, warning=FALSE} # Load ATAC dataset -atac_pbmc_data <- Read10X_h5(filename = "../data/10k_PBMC_ATAC_nextgem_Chromium_X_filtered_peak_bc_matrix.h5") -fragpath <- "../data/10k_PBMC_ATAC_nextgem_Chromium_X_fragments.tsv.gz" +atac_pbmc_data <- Read10X_h5(filename = "/brahms/hartmana/vignette_data/10k_PBMC_ATAC_nextgem_Chromium_X_filtered_peak_bc_matrix.h5") +fragpath <- "/brahms/hartmana/vignette_data/10k_PBMC_ATAC_nextgem_Chromium_X_fragments.tsv.gz" # Get gene annotations annotation <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86) # Change to UCSC style @@ -156,7 +155,7 @@ obj.atac <- subset(obj.atac, subset = nCount_ATAC < 7e4 & nCount_ATAC > 2000) We load the reference (download [here](https://atlas.fredhutch.org/data/nygc/multimodal/pbmc_multimodal.h5seurat)) from our recent [paper](https://doi.org/10.1016/j.cell.2021.04.048). This reference is stored as an h5Seurat file, a format that enables on-disk storage of multimodal Seurat objects (more details on h5Seurat and `SeuratDisk` can be found [here](https://mojaveazure.github.io/seurat-disk/index.html)). ```{r pbmc.ref} -obj.rna <- LoadH5Seurat("../data/pbmc_multimodal.h5seurat") +obj.rna <- readRDS("/brahms/haoy/seurat4_pbmc/pbmc_multimodal_2023.rds") ```
    **What if I want to use my own reference dataset?** From 9411c7e64ba416c905bc853e95ace6d9cabfe688 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Tue, 21 Mar 2023 18:02:12 -0400 Subject: [PATCH 544/979] adding assay param to fetch residuals for v5 assay --- R/preprocessing.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/preprocessing.R b/R/preprocessing.R index 498913185..8213a5d0f 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -442,6 +442,7 @@ GetResidual <- function( X = sct.models, FUN = function(x) { FetchResidualSCTModel(object = object, + assay = assay, umi.assay = umi.assay, SCTModel = x, new_features = features, From f6fe037e3451b8c70b43113529dada89a5a6837f Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Tue, 21 Mar 2023 23:22:35 -0400 Subject: [PATCH 545/979] vignette updates --- vignettes/COVID_SCTMapping.Rmd | 4 ++-- vignettes/get_started_v5.Rmd | 19 +++++++++++++++++-- .../seurat5_bpcells_interaction_vignette.Rmd | 2 +- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/vignettes/COVID_SCTMapping.Rmd b/vignettes/COVID_SCTMapping.Rmd index 4d7585e23..8e2dfa506 100755 --- a/vignettes/COVID_SCTMapping.Rmd +++ b/vignettes/COVID_SCTMapping.Rmd @@ -120,14 +120,14 @@ anchor ``` -```{r} +```{r, fig.width=10, fig.height=6} p1<- DimPlot(object, reduction = 'ref.umap', group.by = 'predicted.l2.s5',alpha = 0.8, label = T) + NoLegend() p2<- DimPlot(object, reduction = 'ref.umap', group.by = 'celltype',alpha = 0.8, label = T) + NoLegend() p1+p2 ``` -```{r, fig.width=10, fig.height=10} +```{r, fig.width=10, fig.height=6} p3 <-DimPlot(object, reduction = 'ref.umap', group.by = 'celltype',alpha = 0.8, label = T, split.by = 'batch', ncol = 3) + NoLegend() p3 ``` diff --git a/vignettes/get_started_v5.Rmd b/vignettes/get_started_v5.Rmd index 1a7f58ebb..329c22038 100644 --- a/vignettes/get_started_v5.Rmd +++ b/vignettes/get_started_v5.Rmd @@ -1,5 +1,5 @@ --- -title: "Introduction to Seurat 5" +title: "Getting started with Seurat 5" output: html_document: theme: united @@ -26,7 +26,10 @@ process_entry <- function(dat) { } else { img <- paste0('![](', '../output/images/', dat$image, '){width=3000px}') } - if (grepl(pattern = "https://satijalab.org/", x = dat$name)) { + + if (dat$name == "seurat5_run_azimuth") { + link <- "https://satijalab.github.io/azimuth/articles/run_azimuth_tutorial.html" + } else if (grepl(pattern = "https://satijalab.org/", x = dat$name)) { link <- dat$name } else { link <- paste0(dat$name, ".html") @@ -99,8 +102,20 @@ vdat <- read_yaml(file = "vignettes_v5.yaml") ``` +Some text. + +# Introduction to Seurat 5 + Seurat 5 placeholder text. ```{r results='asis', echo=FALSE, warning=FALSE, message = FALSE} make_vignette_card_section(vdat = vdat, cat = 1) ``` + +# Analysis in Seurat 5 + +Some more text about these vignettes. + +```{r results='asis', echo=FALSE, warning=FALSE, message = FALSE} +make_vignette_card_section(vdat = vdat, cat = 2) +``` diff --git a/vignettes/seurat5_bpcells_interaction_vignette.Rmd b/vignettes/seurat5_bpcells_interaction_vignette.Rmd index 4c058f58d..4fd0367ab 100644 --- a/vignettes/seurat5_bpcells_interaction_vignette.Rmd +++ b/vignettes/seurat5_bpcells_interaction_vignette.Rmd @@ -55,7 +55,7 @@ We use BPCells functionality to both load in our data and write the counts layer # Load Data ### Load Data from one h5 file -In this section, we will load a dataset of mouse brain cells freely available from 10X Genomics. This includes 1.3 Million single cells that were sequenced on the Illumina NovaSeq 6000. The raw data can be found [here](https://www.10xgenomics.com/resources/datasets/10k-human-brains-3-ht-v3-1-chromium-x-3-1-high). +In this section, we will load a dataset of mouse brain cells freely available from 10X Genomics. This includes 1.3 Million single cells that were sequenced on the Illumina NovaSeq 6000. The raw data can be found [here](https://support.10xgenomics.com/single-cell-gene-expression/datasets/1.3.0/1M_neurons?). To read in the file, we will use open_matrix_10x_hdf5, a BPCells function written to read in feature matrices from 10x. We then write a matrix directory, load the matrix, and create a Seurat object. From 251d2d721b7039eabd3c54cd98c9fbdc20295b2c Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Tue, 21 Mar 2023 23:59:14 -0400 Subject: [PATCH 546/979] bpcells to rds --- vignettes/ParseBio_sketch_integration.Rmd | 38 ++++------------------- 1 file changed, 6 insertions(+), 32 deletions(-) diff --git a/vignettes/ParseBio_sketch_integration.Rmd b/vignettes/ParseBio_sketch_integration.Rmd index 2ec61acfd..05e5ef7c0 100755 --- a/vignettes/ParseBio_sketch_integration.Rmd +++ b/vignettes/ParseBio_sketch_integration.Rmd @@ -43,38 +43,12 @@ library(dplyr) ## load matrix ```{r, warning=F, message=F} -time0_loadMatrix <- system.time({ - mat <- open_matrix_dir('../data/pbmc_150k_sparse/') - meta <- readRDS('../data/ParseBio_PBMC_meta_100K.rds') - meta$disease <- sample(c('H','D'), nrow(meta), replace = T) -}) -``` - -## sketch object -```{r,warning=F, message=F} - -options(Seurat.object.assay.version = "v5", Seurat.object.assay.calcn = T) -time1_normalize <- system.time({ - object <- CreateSeuratObject(counts = mat, meta.data = meta) - object <- NormalizeData(object, verbose = FALSE) -}) - - -time2_split.mat <- system.time({ - options(Seurat.object.assay.calcn = FALSE) - object[['RNA']] <- split(object[['RNA']], f = object$sample) -}) - - -time3_FindVariable <- system.time({ - object <- FindVariableFeatures(object, layer = 'counts', verbose = FALSE) -} -) - -time4_LeverageScoreSampling <- system.time({ - object <- LeverageScoreSampling(object = object, ncells = 5000, cast = 'dgCMatrix', verbose = FALSE) -}) - +object <- readRDS("/brahms/hartmana/vignette_data/parse_1m_pbmc.rds") +object <- NormalizeData(object, verbose = FALSE) +options(Seurat.object.assay.calcn = FALSE) +object[['RNA']] <- split(object[['RNA']], f = object$sample) +object <- FindVariableFeatures(object, layer = 'counts', verbose = FALSE) +object <- LeverageScoreSampling(object = object, ncells = 5000, cast = 'dgCMatrix', verbose = FALSE) ``` ## integrate sketched assay From 57ce9f4b739cb29cc68388dfb47ac5c695aec701 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Wed, 22 Mar 2023 00:31:11 -0400 Subject: [PATCH 547/979] update v5 landing page --- .../seurat5_bpcells_interaction_vignette.Rmd | 4 +- vignettes/seurat5_sketch_analysis.Rmd | 13 ++-- vignettes/vignettes_v5.yaml | 70 +++++++++++-------- 3 files changed, 49 insertions(+), 38 deletions(-) diff --git a/vignettes/seurat5_bpcells_interaction_vignette.Rmd b/vignettes/seurat5_bpcells_interaction_vignette.Rmd index 4fd0367ab..9119730b1 100644 --- a/vignettes/seurat5_bpcells_interaction_vignette.Rmd +++ b/vignettes/seurat5_bpcells_interaction_vignette.Rmd @@ -107,7 +107,7 @@ You can also download data from multiple matrices. In this section, we create a ```{r, warning=FALSE} -file.dir <- "../data/h5ad_files/" +file.dir <- "/brahms/hartmana/vignette_data/h5ad_files/" files.set <- c("ahern_pbmc.h5ad", "jin_pbmc.h5ad", "yoshida_pbmc.h5ad") # Loop through h5ad files and output BPCells matrices on-disk @@ -118,7 +118,7 @@ for (i in 1:length(files.set)) { name <- gsub(".h5ad", "", files.set[i]) path <- paste0(file.dir, files.set[i]) data <- open_matrix_anndata_hdf5(path) - write_matrix_dir(mat = data, dir = paste0(file.dir, name, "_BP")) + write_matrix_dir(mat = data, dir = paste0(file.dir, name, "_BP"), overwrite = TRUE) # Load in BP matrices mat <- open_matrix_dir(dir = paste0(file.dir, name, "_BP")) mat <- Azimuth::ConvertEnsembleToSymbol(mat = mat, species = "human") diff --git a/vignettes/seurat5_sketch_analysis.Rmd b/vignettes/seurat5_sketch_analysis.Rmd index 662a4a770..27b5bd9b7 100644 --- a/vignettes/seurat5_sketch_analysis.Rmd +++ b/vignettes/seurat5_sketch_analysis.Rmd @@ -55,19 +55,16 @@ options(future.globals.maxSize = 1e9) ``` ## Create a Seurat object with a v5 assay for on-disk storage -We start by loading the 1.3M dataset from 10x Genomics using the `open_matrix_dir` function from `BPCells`. Note that this function does not load the dataset into memory, but instead, creates a connection to the data stored on-disk. We then store this on-disk representation in the Seurat object. Note that in our [Introduction to on-disk storage vignette](link), we demonstrate how to create this on-disk representation. +We start by loading the 1.3M dataset from 10x Genomics using the `open_matrix_dir` function from `BPCells`. Note that this function does not load the dataset into memory, but instead, creates a connection to the data stored on-disk. We then store this on-disk representation in the Seurat object. Note that in our [Introduction to on-disk storage vignette](https://satijalab.org/seurat/articles/seurat5_bpcells_interaction_vignette.html), we demonstrate how to create this on-disk representation. ```{r} -# note to Yuhan- this directory should already contain the ensembl-modified names. we can show how to do the ensembl mods in the interaction vignette -# Connect to the counts matrix stored on-disk -ondisk_matrix <- open_matrix_dir('/brahms/hartmana/vignette_data/mouse_1M_neurons_counts') -ondisk_matrix <- Azimuth::ConvertEnsembleToSymbol(mat = ondisk_matrix, species = "mouse") - # specify that you would like to create a Seurat v5 assay # note that we require setting this option to ensure that existing pipelines are not affected options(Seurat.object.assay.version = 'v5') -# Create the Seurat object, which contains 1.3M cells stored on-disk as part of the 'RNA' assay -obj <- CreateSeuratObject(counts = ondisk_matrix) + +# Read the Seurat object, which contains 1.3M cells stored on-disk as part of the 'RNA' assay +obj <- readRDS("/brahms/hartmana/vignette_data/1p3_million_mouse_brain.rds") obj + # Note that since the data is stored on-disk, the object size easily fits in-memory (<1GB) format(object.size(obj), units = 'Mb') ``` diff --git a/vignettes/vignettes_v5.yaml b/vignettes/vignettes_v5.yaml index 296025a7e..19ab76f70 100644 --- a/vignettes/vignettes_v5.yaml +++ b/vignettes/vignettes_v5.yaml @@ -1,40 +1,30 @@ -- category: Introduction to v5 +- category: Introduction to Seurat 5 vignettes: - - title: Bridge Integration - name: seurat5_integration_bridge - summary: | - Map scATAC-seq onto an scRNA-seq reference using a multi-omic bridge dataset. - image: bridge_integration.png - - - title: Analysis of spatial datasets (Imaging-based) - name: seurat5_spatial_vignette_2 - summary: | - Learn to explore spatially-resolved data from multiplexed imaging technologies, including MERSCOPE, Xenium, CosMx SMI, and CODEX. - image: spatial_vignette_2.jpg - - - title: Sketch Clustering - name: seurat5_sketch_analysis + - title: Install Seurat 5 + name: install_seurat5 summary: | - Analyze a 1.3 million cell mouse brain dataset using the on-disk capabilities introduced in Seurat 5. - image: MouseBrain_sketch_clustering.jpg - - - title: Integration - name: seurat5_integration + Install Seurat 5 and required dependencies. + image: SeuratV5.png + + - title: Seurat 5 Assay + name: seurat5_assay summary: | - Integrate datesets in Seurat 5 using a variety of methods. - image: tbd.jpg + Explore the new assay structure introduced in Seurat 5. + image: assay.png - title: BPCells Interaction name: seurat5_bpcells_interaction_vignette summary: | Load and save large on-disk matrices using BPCells. - image: tbd.jpg + image: bpcells.png - - title: Install Seurat 5 - name: install_seurat5 +- category: Analysis in Seurat 5 + vignettes: + - title: Integration + name: seurat5_integration summary: | - Install Seurat 5 and required dependencies. - image: SeuratV5.png + Integrate datesets in Seurat 5 using a variety of methods. + image: integration_seurat5.jpg - title: COVID Mapping name: COVID_SCTMapping @@ -42,8 +32,32 @@ Map PBMC datasets from COVID-19 patients to a healthy PBMC reference. image: COVID_SCTMapping.jpg + - title: Analysis of spatial datasets (Imaging-based) + name: seurat5_spatial_vignette_2 + summary: | + Learn to explore spatially-resolved data from multiplexed imaging technologies, including MERSCOPE, Xenium, CosMx SMI, and CODEX. + image: spatial_vignette_2.jpg + + - title: Sketch Clustering + name: seurat5_sketch_analysis + summary: | + Analyze a 1.3 million cell mouse brain dataset using the on-disk capabilities introduced in Seurat 5. + image: sketch.png + - title: Sketch Integration name: ParseBio_sketch_integration summary: | Perform sketch integration on a large dataset from Parse Biosciences. - image: ParseBio_sketch_integration.jpg + image: sketch.png + + - title: Bridge Integration + name: seurat5_integration_bridge + summary: | + Map scATAC-seq onto an scRNA-seq reference using a multi-omic bridge dataset. + image: bridge_integration.png + + - title: RunAzimuth + name: seurat5_run_azimuth + summary: | + Annotate cells locally using Azimuth. + image: azimuth.png From 885178dfd717ac8dab0a636b568ecf350fdc92d3 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Wed, 22 Mar 2023 00:42:49 -0400 Subject: [PATCH 548/979] replace bpcell with seurat object --- vignettes/COVID_SCTMapping.Rmd | 40 +++------------------------------- 1 file changed, 3 insertions(+), 37 deletions(-) diff --git a/vignettes/COVID_SCTMapping.Rmd b/vignettes/COVID_SCTMapping.Rmd index 8e2dfa506..c53e22c4d 100755 --- a/vignettes/COVID_SCTMapping.Rmd +++ b/vignettes/COVID_SCTMapping.Rmd @@ -40,49 +40,15 @@ library(BPCells) library(dplyr) ``` -## load matrix -```{r, warning=F, message=F} -time0_loadMatrix <- system.time({ - -file.dir <- "../data/PBMCVignette/" -files.set <- c("arunachalam_2020_processed.BPCells", "combes_2021_processed.BPCells", "lee_2020_processed.BPCells", - "wilk_2020_processed.BPCells", "yao_2021_processed.BPCells") -meta.list <- readRDS('../data/PBMCVignette/PBMC_meta.list') -names(meta.list) <- gsub('_processed.BPCells','',files.set) - -input.list <- list() -for (i in 1:length(files.set)) { - input.list[[i]] <- open_matrix_dir(dir = paste0(file.dir, files.set[i]) ) - colnames(input.list[[i]]) <- paste0(names(meta.list)[i], "_", colnames(input.list[[i]])) - rownames(meta.list[[i]]) <-paste0(names(meta.list)[i], "_", rownames(meta.list[[i]])) - meta.list[[i]]$batch <- names(meta.list)[i] - meta.list[[i]]$celltype <- meta.list[[i]]$predicted.celltype.l2 - -} - names(input.list) <- paste0('counts.',gsub('_processed.BPCells','',files.set)) - meta_data <- lapply(meta.list, function(x) { - x <- x[,c('batch', 'celltype', 'patient', 'disease_status_standard' )] - return(x) - }) - meta_data <- Reduce(rbind, meta_data) -}) -``` - ## load query ```{r,warning=F, message=F} - -options(Seurat.object.assay.version = "v5", Seurat.object.assay.calcn = T) -time1_normalize <- system.time({ - - object <- CreateSeuratObject(counts = input.list[1:2], meta.data = meta_data) - object <- NormalizeData(object, verbose = FALSE) -}) - +object <- readRDS("/brahms/hartmana/vignette_data/covid_mapping_merged_object.rds") +object <- NormalizeData(object, verbose = FALSE) ``` ## load reference ```{r} -obj.ref <- readRDS("../data/pbmc_multimodal_2023.rds") +obj.ref <- readRDS("/brahms/hartmana/vignette_data/pbmc_multimodal_2023.rds") obj.ref ``` ## mapping From 884b1eabc62639b625fae18d83e404a8a15ce882 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Wed, 22 Mar 2023 18:54:49 -0400 Subject: [PATCH 549/979] update vignettes --- vignettes/COVID_SCTMapping.Rmd | 31 +++---- vignettes/ParseBio_sketch_integration.Rmd | 2 +- vignettes/get_started_v5.Rmd | 11 ++- vignettes/install_seurat5.Rmd | 20 ++++- .../seurat5_bpcells_interaction_vignette.Rmd | 28 +++--- vignettes/seurat5_integration.Rmd | 85 ++++++++++++------- vignettes/seurat5_integration_bridge.Rmd | 2 +- vignettes/seurat5_sketch_analysis.Rmd | 26 +++--- vignettes/seurat5_spatial_vignette_2.Rmd | 34 ++------ vignettes/spatial_vignette_2.Rmd | 2 +- vignettes/vignettes_v5.yaml | 12 +-- 11 files changed, 138 insertions(+), 115 deletions(-) diff --git a/vignettes/COVID_SCTMapping.Rmd b/vignettes/COVID_SCTMapping.Rmd index c53e22c4d..88008b6ac 100755 --- a/vignettes/COVID_SCTMapping.Rmd +++ b/vignettes/COVID_SCTMapping.Rmd @@ -38,6 +38,7 @@ knitr::opts_chunk$set( library(Seurat) library(BPCells) library(dplyr) +options(future.globals.maxSize = 1e9) ``` ## load query @@ -58,10 +59,10 @@ obj.ref time2_anchoring <- system.time({ anchor <- FindTransferAnchors(reference = obj.ref, query = object, - reference.reduction = 'spca', + reference.reduction = 'spca', normalization.method = 'SCT', dims = 1:50, - k.filter = NA, + k.filter = NA, k.anchor = 5, features = rownames(obj.ref[['spca']]@feature.loadings)) }) @@ -87,11 +88,9 @@ anchor ```{r, fig.width=10, fig.height=6} - p1<- DimPlot(object, reduction = 'ref.umap', group.by = 'predicted.l2.s5',alpha = 0.8, label = T) + NoLegend() - p2<- DimPlot(object, reduction = 'ref.umap', group.by = 'celltype',alpha = 0.8, label = T) + NoLegend() - - p1+p2 - +p1<- DimPlot(object, reduction = 'ref.umap', group.by = 'predicted.l2.s5',alpha = 0.8, label = T) + NoLegend() +p2<- DimPlot(object, reduction = 'ref.umap', group.by = 'celltype',alpha = 0.8, label = T) + NoLegend() +p1 + p2 ``` ```{r, fig.width=10, fig.height=6} p3 <-DimPlot(object, reduction = 'ref.umap', group.by = 'celltype',alpha = 0.8, label = T, split.by = 'batch', ncol = 3) + NoLegend() @@ -100,24 +99,21 @@ p3 ## pseudo-bulk ```{r} - -time4_bulk <- system.time( bulk <- AverageExpression(object, - method = 'aggregate', - return.seurat = T, - slot = 'counts', - assays = 'RNA', - group.by = c("predicted.l2.s5","patient","disease_status_standard") - ) +bulk <- AverageExpression(object, + method = 'aggregate', + return.seurat = T, + slot = 'counts', + assays = 'RNA', + group.by = c("predicted.l2.s5", "patient", "disease_status_standard") ) bulk$celltype <- sapply(strsplit(Cells(bulk), split = "_"), '[', 1) bulk$donor <- sapply(strsplit(Cells(bulk), split = "_"), '[', 2) bulk$disease <- sapply(strsplit(Cells(bulk), split = "_"), '[', 3) bulk <- subset(bulk, subset = disease != 'other') - ``` -```{R} +```{r} marker.list <- list() celltype.set <- unique(bulk$celltype ) for (i in seq_along(celltype.set)) { @@ -131,7 +127,6 @@ for (i in seq_along(celltype.set)) { } names(marker.list) <- celltype.set - ``` ```{r} marker.list.filter <- lapply(marker.list, function(x) { diff --git a/vignettes/ParseBio_sketch_integration.Rmd b/vignettes/ParseBio_sketch_integration.Rmd index 05e5ef7c0..90922bf05 100755 --- a/vignettes/ParseBio_sketch_integration.Rmd +++ b/vignettes/ParseBio_sketch_integration.Rmd @@ -48,7 +48,7 @@ object <- NormalizeData(object, verbose = FALSE) options(Seurat.object.assay.calcn = FALSE) object[['RNA']] <- split(object[['RNA']], f = object$sample) object <- FindVariableFeatures(object, layer = 'counts', verbose = FALSE) -object <- LeverageScoreSampling(object = object, ncells = 5000, cast = 'dgCMatrix', verbose = FALSE) +object <- SketchData(object = object, ncells = 50000, method = 'LeverageScore', sketched.assay = 'sketch') ``` ## integrate sketched assay diff --git a/vignettes/get_started_v5.Rmd b/vignettes/get_started_v5.Rmd index 329c22038..28a65e1f1 100644 --- a/vignettes/get_started_v5.Rmd +++ b/vignettes/get_started_v5.Rmd @@ -102,11 +102,9 @@ vdat <- read_yaml(file = "vignettes_v5.yaml") ``` -Some text. - # Introduction to Seurat 5 -Seurat 5 placeholder text. +The following vignettes outline how to install, create, and interact with Seurat 5 objects. ```{r results='asis', echo=FALSE, warning=FALSE, message = FALSE} make_vignette_card_section(vdat = vdat, cat = 1) @@ -114,7 +112,12 @@ make_vignette_card_section(vdat = vdat, cat = 1) # Analysis in Seurat 5 -Some more text about these vignettes. +Below we demonstrate new functionality introduced in Seurat 5 including: + +* Sketch-based workflows to efficiently process large datasets while preserving rare and abundant cell types +* Bridge integration to annotate cells from complementary technologies with a scRNA-seq reference +* Spatial infrastructure to analyze public datasets from multiple technologies +* Integration using CCA, RPCA, scVI, and Harmony in a common framework ```{r results='asis', echo=FALSE, warning=FALSE, message = FALSE} make_vignette_card_section(vdat = vdat, cat = 2) diff --git a/vignettes/install_seurat5.Rmd b/vignettes/install_seurat5.Rmd index f339a199e..9de82ba94 100644 --- a/vignettes/install_seurat5.Rmd +++ b/vignettes/install_seurat5.Rmd @@ -31,18 +31,30 @@ knitr::opts_chunk$set( ) ``` -Copy the code below to install Seurat 5. +Copy the code below to install Seurat 5: ```{r required, eval=FALSE} -remotes::install_github("mojaveazure/seurat-object", "seurat5", quiet = TRUE) remotes::install_github("satijalab/seurat", "seurat5", quiet = TRUE) ``` -The following packages are not required but used in many Seurat 5 vignettes. +The following packages are not required but are used in many Seurat 5 vignettes: + +* SeuratData: automatically load datasets pre-packaged as Seurat objects +* Azimuth: local annotation of scRNA-seq and scATAC-seq queries across multiple organs and tissues +* SeuratWrappers: enables use of additional integration and differential expression methods +* Signac: analysis of single-cell chromatin data + ```{r additional, eval=FALSE} remotes::install_github("satijalab/seurat-data", "seurat5", quiet = TRUE) remotes::install_github("satijalab/azimuth", "seurat5", quiet = TRUE) remotes::install_github("satijalab/seurat-wrappers", "seurat5", quiet = TRUE) remotes::install_github("stuart-lab/signac", "seurat5", quiet = TRUE) -remotes::install_github("bnprks/BPCells", quiet = TRUE) # for on-disk capabilities ``` + +Seurat 5 utilizes BPCells to support analysis of extremely large datasets: + +```{r bpcells, eval=FALSE} +remotes::install_github("bnprks/BPCells", quiet = TRUE) +``` + +For more information on BPCells installation, please see the [installation instructions](https://bnprks.github.io/BPCells/#installation). For macOS users, the following GitHub issues concerning [M1 chip installation](https://github.com/bnprks/BPCells/issues/6) and [compiler compatibility](https://github.com/bnprks/BPCells/issues/3) may be of use. diff --git a/vignettes/seurat5_bpcells_interaction_vignette.Rmd b/vignettes/seurat5_bpcells_interaction_vignette.Rmd index 9119730b1..0ed2103e7 100644 --- a/vignettes/seurat5_bpcells_interaction_vignette.Rmd +++ b/vignettes/seurat5_bpcells_interaction_vignette.Rmd @@ -60,12 +60,12 @@ In this section, we will load a dataset of mouse brain cells freely available f To read in the file, we will use open_matrix_10x_hdf5, a BPCells function written to read in feature matrices from 10x. We then write a matrix directory, load the matrix, and create a Seurat object. ```{r} -brain.data <- open_matrix_10x_hdf5(path = "../data/1M_neurons_filtered_gene_bc_matrices_h5.h5") +brain.data <- open_matrix_10x_hdf5(path = "/brahms/hartmana/vignette_data/1M_neurons_filtered_gene_bc_matrices_h5.h5") # Write the matrix to a directory -write_matrix_dir(mat = brain.data, dir = '../data/brain_counts') +write_matrix_dir(mat = brain.data, dir = '/brahms/hartmana/vignette_data/bpcells/brain_counts', overwrite = TRUE) # Now that we have the matrix on disk, we can load it -brain.mat <- open_matrix_dir(dir = "../data/brain_counts") -brain.mat <- Azimuth::ConvertEnsembleToSymbol(mat = brain.mat, species = "mouse") +brain.mat <- open_matrix_dir(dir = "/brahms/hartmana/vignette_data/bpcells/brain_counts") +brain.mat <- Azimuth:::ConvertEnsembleToSymbol(mat = brain.mat, species = "mouse") # Create Seurat Object brain <- CreateSeuratObject(counts = brain.mat) @@ -79,9 +79,9 @@ You can use BPCells to convert the matrices in your already created Seurat objec ```{r, message=FALSE, warning=FALSE, eval=FALSE} obj <- readRDS("/path/to/reference.rds") -# Write the counts layer to a directory -write_matrix_dir(mat = obj[["RNA"]]$counts, dir = '../data/brain_counts') -counts.mat <- open_matrix_dir(dir = "../data/brain_counts") +# Write the counts layer to a directory +write_matrix_dir(mat = obj[["RNA"]]$counts, dir = '/brahms/hartmana/vignette_data/bpcells/brain_counts') +counts.mat <- open_matrix_dir(dir = "/brahms/hartmana/vignette_data/bpcells/brain_counts") obj[["RNA"]]$counts <- counts.mat ``` @@ -121,7 +121,7 @@ for (i in 1:length(files.set)) { write_matrix_dir(mat = data, dir = paste0(file.dir, name, "_BP"), overwrite = TRUE) # Load in BP matrices mat <- open_matrix_dir(dir = paste0(file.dir, name, "_BP")) - mat <- Azimuth::ConvertEnsembleToSymbol(mat = mat, species = "human") + mat <- Azimuth:::ConvertEnsembleToSymbol(mat = mat, species = "human") # Get metadata metadata.list[[i]] <- LoadH5ADobs(path = path) data.list[[i]] <- mat @@ -141,7 +141,7 @@ When we create the Seurat object with the list of , we can then see that multipl ```{r} options(Seurat.object.assay.version = "v5") -merged.object <- CreateSeuratObject(counts = data.list[1:2], meta.data = metadata) +merged.object <- CreateSeuratObject(counts = data.list, meta.data = metadata) merged.object ``` @@ -150,11 +150,11 @@ merged.object Here, we show how to load a 1 million cell data set from Parse Biosciences and create a Seurat Object. The data is available for download [here](https://support.parsebiosciences.com/hc/en-us/articles/7704577188500-How-to-analyze-a-1-million-cell-data-set-using-Scanpy-and-Harmony) ```{r} -parse.data <- open_matrix_anndata_hdf5("../data/h5ad_files/ParseBio_PBMC.h5ad") -write_matrix_dir(mat = parse.data, dir = "../data/parse_counts") -parse.mat <- open_matrix_dir(dir = "../data/parse_counts") +parse.data <- open_matrix_anndata_hdf5("/brahms/hartmana/vignette_data/h5ad_files/ParseBio_PBMC.h5ad") +write_matrix_dir(mat = parse.data, dir = "/brahms/hartmana/vignette_data/bpcells/parse_1m_pbmc") +parse.mat <- open_matrix_dir(dir = "/brahms/hartmana/vignette_data/bpcells/parse_1m_pbmc") -metadata <- readRDS("../data/ParseBio_PBMC_meta.rds") +metadata <- readRDS("/brahms/hartmana/vignette_data/ParseBio_PBMC_meta.rds") metadata$disease <- sapply(strsplit(x = metadata$sample, split = "_"), "[", 1) parse.object <- CreateSeuratObject(counts = parse.mat, meta.data = metadata) @@ -168,7 +168,7 @@ If you save your object and load it in in the future, Seurat will access the on- This also makes it easy to share your Seurat objects with BPCells matrices by sharing a folder that contains both the object and the BPCells directory. ```{r} -saveRDS(brain, file = "obj.Rds", destdir = "../data/brain_object") +saveRDS(brain, file = "obj.Rds", destdir = "/brahms/hartmana/vignette_data/bpcells/brain_object") ``` diff --git a/vignettes/seurat5_integration.Rmd b/vignettes/seurat5_integration.Rmd index b1c4ca451..6cb6a65dc 100644 --- a/vignettes/seurat5_integration.Rmd +++ b/vignettes/seurat5_integration.Rmd @@ -36,20 +36,24 @@ library(Seurat) library(SeuratData) library(SeuratWrappers) library(Azimuth) +library(ggplot2) options(future.globals.maxSize = 1e9) -``` - -```{r, include=TRUE} -InstallData("pbmcref") +options(Seurat.object.assay.version = "v5") ``` ## Introduction Integration of single-cell sequencing datasets, for example across experimental batches, donors, or conditions, is often an important step in scRNA-seq workflows. Integrative analysis can help to match shared cell types and states across datasets, which can boost statistical power, and most importantly, facilitate accurate comparative analysis across datasets. In previous versions of Seurat we introduced methods for integrative analysis, including our ‘anchor-based’ integration workflow. Many labs have also published powerful and pioneering methods, including Harmony and scVI, for integrative analysis. We recognize that while the goal of matching shared cell types across datasets may be important for many problems, users may also be concerned about which method to use, or that integration could result in a loss of biological resolution. In Seurat v5, we introduce more flexible and streamlined infrastructure to run different integration algorithms with a single line of code. This makes it easier to explore the results of different integration methods, and to compare these results to a workflow that excludes integration steps. For this vignette, we use a [dataset of human PBMC profiled with seven different technologies](https://www.nature.com/articles/s41587-020-0465-8), profiled as part of a systematic comparative analysis (`pbmcsca`). The data is available as part of our [SeuratData](https://github.com/satijalab/seurat-data) package. + ## Layers in the Seurat v5 object Seurat v5 assays store data in layers. These layers can store raw, un-normalized counts (`layer='counts'`), normalized data (`layer='data'`), or z-scored/variance-stabilized data (`layer='scale.data'`). We can load in the data, remove low-quality cells, and obtain predicted cell annotations (which will be useful for assessing integration later), using our [Azimuth pipeline](https://satijalab.github.io/azimuth/articles/run_azimuth_tutorial.html). -```{r} + +```{r installdata, include=FALSE, eval=TRUE} +InstallData("pbmcref") +``` + +```{r loadannotate, message=FALSE} # load in the pbmc systematic comparative analysis dataset obj <- LoadData("pbmcsca") obj <- subset(obj, nFeature_RNA > 1000) @@ -59,7 +63,7 @@ obj ``` The object contains data from nine different batches (stored in the `Method` column in the object metadata), representing seven different technologies. We will aim to integrate the different batches together. In previous versions of Seurat, we would require the data to be represented as nine different Seurat objects. When using Seurat v5 assays, we can instead keep all the data in one object, but simply split the layers. After splitting, there are now 18 layers (a `counts` and `data` layer for each batch). We can also run a standard scRNA-seq analysis (i.e. without integration). Note that since the data is split into layers, normalization and variable feature identification is performed for each batch independently (a consensus set of variable features is automatically identified). -```{r} +```{r splitassay} obj[["RNA"]] <- split(obj[["RNA"]], f = obj$Method) obj obj <- NormalizeData(obj) @@ -68,53 +72,76 @@ obj <- ScaleData(obj) obj <- RunPCA(obj) ``` We can now visualize the results of a standard analysis without integration. Note that cells are grouping both by cell type and by underlying method. While a UMAP analysis is just a visualization of this, clustering this dataset would return predominantly batch-specific clusters. Especially if previous cell-type annotations were not available, this would make downstream analysis extremely challenging. -```{r} +```{r unintegratedUMAP, fig.height=5, fig.width=14} obj <- FindNeighbors(obj, dims=1:30, reduction = 'pca') -obj <- FindNeighbors(obj, resolution = 2, cluster.names="unintegrated_clusters") +obj <- FindNeighbors(obj, resolution = 2, cluster.name = "unintegrated_clusters") obj <- RunUMAP(obj, dims = 1:30, reduction = 'pca', reduction.name = 'umap.unintegrated') # visualize by batch and cell type annotation # cell type annotations were previously added by Azimuth -DimPlot(obj, reduction = 'unintegrated.umap', group.by=c('batch','predicted.celltype.l2')) +DimPlot(obj, reduction = 'umap.unintegrated', group.by=c('Method','predicted.celltype.l2')) ``` Seurat v5 enables streamlined integrative analysis using the `IntegrateLayers` function. The method currently supports five integration methods. Each of these methods performs integration in low-dimensional space, and returns a dimensional reduction (i.e. `integrated.rpca`) that aims to co-embed shared cell types across batches: + * Anchor-based CCA integration (`method=CCAIntegration`) * Anchor-based RPCA integration (`method=RPCAIntegration`) * Harmony (`method=HarmonyIntegration`) * FastMNN (`method= FastMNNIntegration`) * scVI (`method=scVIIntegration`) -Note that scVI integration requires INSERT DESCRIPTIVE TEXT FOR REQUIREMENTS -```{r} -# add results for Anchor-based (CCA) + +Note that scVI integration requires `reticulate` which can be installed from CRAN (`install.packages("reticulate")`) as well as `scvi-tools` and its dependencies installed in a conda environment. Please see scVI installation instructions [here](https://docs.scvi-tools.org/en/stable/installation.html). +```{r integratelayerscca} obj <- IntegrateLayers(object = obj, method = CCAIntegration, verbose = F, new.reduction = 'integrated.cca') -# add results for Anchor-based (rPCA) +``` + +```{r integratelayersrpca} obj <- IntegrateLayers(object = obj, method = RPCAIntegration, verbose = F, new.reduction = 'integrated.rpca') -# add results for Harmony +``` + +```{r integratelayersharmony} obj <- IntegrateLayers(object = obj, method = HarmonyIntegration, verbose = F, new.reduction = 'harmony') -# add results for Anchor-based (scI) +``` + +```{r integratelayersfastmnn} obj <- IntegrateLayers(object = obj, method = FastMNNIntegration, verbose = F, new.reduction = 'integrated.mnn') +``` + +```{r integratelayersscvi, eval=FALSE} obj <- IntegrateLayers(object = obj, method = scVIIntegration, verbose = F, new.reduction = 'integrated.scvi', - conda_env = '/home/haoy/miniconda3/envs/scvi-env') - + conda_env = '../miniconda3/envs/scvi-env') +``` + +```{r addscvi, include=FALSE} +scvi.reduc <- readRDS("/brahms/haoy/test/pbmcsca_scvi.dr.rds")@cell.embeddings +scvi.reduc <- scvi.reduc[Cells(obj),] +obj[["integrated.scvi"]] <- CreateDimReducObject(embeddings = scvi.reduc) ``` + For any of the methods, we can now visualize and cluster the datasets. We show this for CCA integration and scVI, but you can do this for any method -```{r} +```{r integratedprojections, fig.height=7, fig.width=16} obj <- FindNeighbors(obj, reduction = 'integrated.cca', dims = 1:30) -obj <- FindClusters(obj,resolution = 2, cluster.names='cca_clusters') +obj <- FindClusters(obj,resolution = 2, cluster.name = 'cca_clusters') obj <- RunUMAP(obj, reduction = "integrated.cca", dims = 1:30, reduction.name = 'umap.cca') -DimPlot(obj, reduction="umap.cca", group.by=c("Method", "predicted.celltype.l2", "cca_clusters")) -obj <- FindNeighbors(obj, reduction = 'integrated.scvi', dims = 1:30) -obj <- FindClusters(obj,resolution = 2, cluster.names='scvi_clusters') -obj <- RunUMAP(obj, reduction = "integrated.scvi", dims = 1:30, reduction.name = 'umap.scvi') -DimPlot(obj, reduction="umap.scvi", group.by=c("Method", "predicted.celltype.l2", "scvi_clusters")) +p1 <- DimPlot(obj, reduction="umap.cca", group.by=c("Method", "predicted.celltype.l2", "cca_clusters")) + +obj <- FindNeighbors(obj, reduction = 'integrated.scvi', dims = 1:10) +obj <- FindClusters(obj,resolution = 2, cluster.name = 'scvi_clusters') +obj <- RunUMAP(obj, reduction = "integrated.scvi", dims = 1:10, reduction.name = 'umap.scvi') +p2 <- DimPlot(obj, reduction="umap.scvi", group.by=c("Method", "predicted.celltype.l2", "scvi_clusters")) + +p1 / p2 ``` We hope that by simplifying the process of performing integrative analysis, users can more carefully evaluate the biological information retained in the integrated dataset. For example, users can compare the expression of biological markers based on different clustering solutions, or visualize one method's clustering solution on different UMAP visualizations. -```{r} -p1 <- VlnPlot(obj, "CD8A", group.by = 'unintegrated_clusters') -p2 <- VlnPlot(obj, "CD8A", group.by = 'cca_clusters') -p3 <- VlnPlot(obj, "CD8A", group.by = 'scvi_clusters') +```{r vlnplots, fig.height=5, fig.width=16, warning=FALSE} +p1 <- VlnPlot(obj, features = "rna_CD8A", group.by = 'cca_clusters') + NoLegend() + ggtitle("CD8A - Unintegrated Clusters") +p2 <- VlnPlot(obj, "rna_CD8A", group.by = 'cca_clusters') + NoLegend() + ggtitle("CD8A - CCA Clusters") +p3 <- VlnPlot(obj, "rna_CD8A", group.by = 'scvi_clusters') + NoLegend() + ggtitle("CD8A - scVI Clusters") p1 | p2 | p3 +``` + +```{r umaps, fig.height=5, fig.width=16} +obj <- RunUMAP(obj, reduction = "integrated.rpca", dims = 1:30, reduction.name = 'umap.rpca') p4 <- DimPlot(obj, reduction="umap.unintegrated", group.by=c("cca_clusters")) p5 <- DimPlot(obj, reduction="umap.rpca", group.by=c("cca_clusters")) p6 <- DimPlot(obj, reduction="umap.scvi", group.by=c("cca_clusters")) p4 | p5 | p6 -``` \ No newline at end of file +``` diff --git a/vignettes/seurat5_integration_bridge.Rmd b/vignettes/seurat5_integration_bridge.Rmd index 8a430a360..977eaf8fd 100644 --- a/vignettes/seurat5_integration_bridge.Rmd +++ b/vignettes/seurat5_integration_bridge.Rmd @@ -8,7 +8,7 @@ output: date: 'Compiled: `r format(Sys.Date(), "%B %d, %Y")`' --- *** -```{r setup, include=TRUE} +```{r setup, include=FALSE} all_times <- list() # store the time for each chunk knitr::knit_hooks$set(time_it = local({ now <- NULL diff --git a/vignettes/seurat5_sketch_analysis.Rmd b/vignettes/seurat5_sketch_analysis.Rmd index 27b5bd9b7..1053f1f0f 100644 --- a/vignettes/seurat5_sketch_analysis.Rmd +++ b/vignettes/seurat5_sketch_analysis.Rmd @@ -85,7 +85,7 @@ DefaultAssay(obj) <- 'sketch' ## Perform clustering on the sketched dataset Now that we have compressed the dataset, we can perform standard clustering and visualization of a 50,000 cell dataset. After clustering, we can see groups of cells that clearly correspond to precursors of distinct lineages, including endothelial cells (Igfbp7), Excitatory (Neurod6) and Inhibitory (Dlx2) neurons, Intermediate Progenitors (Eomes), Radial Glia (Vim), Cajal-Retzius cells (Reln), Oligodendroytes (Olig1), and extremely rare populations of macrophages (C1qa) that were oversampled in our sketched data. -```{r, warning=FALSE, message=FALSE} +```{r, warning=FALSE, message=FALSE, fig.width=5, fig.height=5} DefaultAssay(obj) <- 'sketch' obj <- FindVariableFeatures(obj) obj <- ScaleData(obj) @@ -93,18 +93,20 @@ obj <- RunPCA(obj) obj <- FindNeighbors(obj, dims = 1:50) obj <- FindClusters(obj, resolution = 2) obj <- RunUMAP(obj, dims = 1:50, return.model = T) -DimPlot(obj, label = T, reduction = 'umap') + NoLegend() +DimPlot(obj, label = T, reduction = 'umap') + NoLegend() ``` -```{r,fig.height = 15, fig.width = 15} +```{r,fig.height = 7, fig.width = 15} FeaturePlot(obj, c('Igfbp7', 'Neurod6', 'Dlx2', 'Eomes', 'Vim', 'Reln', 'Olig1', 'C1qa'), ncol = 4) ``` + ## Extend results to the full datasets We can now extend the cluster labels and dimensional reductions learned on the sketched cells to the full dataset. The `ProjectData` function projects the on-disk data, onto the `sketch` assay. It returns a Seurat object that includes a + * Dimensional reduction (PCA): The `pca.full` dimensional reduction extends the `pca` reduction on the sketched cells to all cells in the dataset * Dimensional reduction (UMAP): The `umap.full` dimensional reduction extends the `full` reduction on the sketched cells to all cells in the dataset * Cluster labels: The `cluster_full` column in the object metadata now labels all cells in the dataset with one of the cluster labels derived from the sketched cells -NOTE GET RID OF THE predicted_ IN THE METADATA COLUMN NAME + ```{r, warning=FALSE, message=FALSE} obj <- ProjectData(object = obj, assay = 'RNA', @@ -113,20 +115,19 @@ obj <- ProjectData(object = obj, sketched.reduction = 'pca', umap.model = 'umap', dims = 1:50, - refdata = list(cluster_full = 'seurat_clusters') - ) + refdata = list(cluster_full = 'seurat_clusters')) # now that we have projected the full dataset, switch back to analyzing all cells DefaultAssay(obj) <- 'RNA' ``` -```{r save.img} +```{r save.img, include = FALSE, eval=TRUE} library(ggplot2) -p <- DimPlot(obj, label = T, label.size=8, reduction = "ref.umap", group.by = "predicted.cluster_full", alpha = 0.1) + NoLegend() +p <- DimPlot(obj, label = T, label.size=8, reduction = "ref.umap", group.by = "cluster_full", alpha = 0.1) + NoLegend() ggsave(filename = "../output/images/MouseBrain_sketch_clustering.jpg", height = 7, width = 7, plot = p, quality = 50) ``` ```{r} -DimPlot(obj, label = T, reduction = 'ref.umap', group.by = 'predicted.cluster_full', alpha = 0.1) + NoLegend() +DimPlot(obj, label = T, reduction = 'ref.umap', group.by = 'cluster_full', alpha = 0.1) + NoLegend() # visualize gene expression on the sketched cells (fast) and the full dataset (slower) DefaultAssay(obj) <- 'sketch' x1 <- FeaturePlot(obj, 'C1qa') @@ -137,16 +138,18 @@ x1 | x2 ## Perform iterative sub-clustering NOTE THAT THE CLUSTER IDS WILL CHANGE -Now that we have performed an initial analysis of the dataset, we can iteratively 'zoom-in' on a cell subtype of interest, extract all cells of this type, and perform iterative sub-clustering. For example, we can see that Dlx2+ interneuron precursors are defined by clusters 7, 9, 16, and 29 +Now that we have performed an initial analysis of the dataset, we can iteratively 'zoom-in' on a cell subtype of interest, extract all cells of this type, and perform iterative sub-clustering. For example, we can see that Dlx2+ interneuron precursors are defined by clusters 7, 9, 16, and 29. + ```{r} DefaultAssay(obj) <- 'sketch' VlnPlot(obj, 'Dlx2') ``` We therefore extract all cells from the full on-disk dataset that are present in these clusters. There are XX,XXX of them. Since this is a manageable number, we can convert these data from on-disk storage into in-memory storage. We can then proceed with standard clustering. + ```{r} # subset cells in these clusters. Note that the data remains on-disk after subsetting -obj.sub <- subset(obj, subset = predicted.cluster_full %in% c(7, 9, 15, 29)) +obj.sub <- subset(obj, subset = cluster_full %in% c(2, 15, 18, 28, 40)) # now convert the RNA assay (previously on-disk) into an in-memory representation (sparse Matrix) obj.sub[['RNA']] <- CastAssay(object = obj.sub[['RNA']], to = 'dgCMatrix') # recluster the cells @@ -161,6 +164,7 @@ obj.sub <- FindClusters(obj.sub) ```{r} DimPlot(obj.sub, label = T) + NoLegend() ``` + Note that we can start to see distinct interneuron lineages emerging in this dataset. We can see a clear separation of interneuron precursors that originated from the medial ganglionic eminence (Lhx6) or caudal ganglionic eminence (Nr2f2). We can further see the emergence of Sst (Sst) and Pvalb (Mef2c)-committed interneurons, and a CGE-derived Meis2-expressing progenitor population. These results closely mirror our findings from [Mayer*, Hafemeister*, Bandler* et al, Nature 2018](https://www.nature.com/articles/nature25999), where we enriched for interneuron precursors using a Dlx6a-cre fate-mapping strategy. Here, we obtain similar results using only computational enrichment, enabled by the large size of the original dataset. diff --git a/vignettes/seurat5_spatial_vignette_2.Rmd b/vignettes/seurat5_spatial_vignette_2.Rmd index 5338db219..f2b3db5ed 100644 --- a/vignettes/seurat5_spatial_vignette_2.Rmd +++ b/vignettes/seurat5_spatial_vignette_2.Rmd @@ -63,7 +63,7 @@ We use the `LoadVizgen()` function, which we have written to read in the output ```{r, message=FALSE, warning=FALSE} # Loading segmentations is a slow process and multi processing with the future pacakge is recommended -vizgen.obj <- LoadVizgen(data.dir = "../data/vizgen/s2r1/", fov = "s2r1") +vizgen.obj <- LoadVizgen(data.dir = "/brahms/hartmana/vignette_data/vizgen/s2r1/", fov = "s2r1") ``` The next pieces of information are specific to imaging assays, and is stored in the images slot of the resulting Seurat object: @@ -135,7 +135,7 @@ The `ImageDimPlot()` and `ImageFeaturePlot()` functions have a few parameters wh Since it can be difficult to visualize the spatial localization patterns of an individual cluster when viewing them all together, we can highlight all cells that belong to a particular cluster: ```{r, fig.height=8, fig.width=12} -p1 <- ImageDimPlot(vizgen.obj, fov = "s2r1", cols = "red", cells = WhichCells(vizgen.obj, idents = 14)) +p1 <- ImageDimPlot(vizgen.obj, fov = "s2r1", cols = "red", cells = WhichCells(vizgen.obj, idents = 1)) p2 <- ImageDimPlot(vizgen.obj, fov = "s2r1", cols = "red", cells = WhichCells(vizgen.obj, idents = 15)) p1 + p2 ``` @@ -214,7 +214,7 @@ unzip Xenium_V1_FF_Mouse_Brain_Coronal_Subset_CTX_HP_outs.zip First we read in the dataset and create a Seurat object. Provide the path to the data folder for a Xenium run as the input path. The RNA data is stored in the `Xenium` assay of the Seurat object. This step should take about a minute. ```{r load.xenium, results='hide'} -path <- "../data/xenium_tiny_subset" +path <- "/brahms/hartmana/vignette_data/xenium_tiny_subset" # Load the Xenium data xenium.obj <- LoadXenium(path, fov = "fov") # remove cells with 0 counts @@ -284,7 +284,7 @@ We use a cortex reference from the the Allen Brain Institute to annotate cells, Below, we use Slc17a7 expression to help determine the cortical region. ```{r, fig.width=5, fig.height=5, warning=FALSE} -xenium.obj <- LoadXenium("../data/xenium_tiny_subset") +xenium.obj <- LoadXenium("/brahms/hartmana/vignette_data/xenium_tiny_subset") p1 <- ImageFeaturePlot(xenium.obj, features = "Slc17a7", axes = TRUE, max.cutoff = "q90") p1 ``` @@ -370,7 +370,7 @@ xenium.obj <- BuildNicheAssay( ``` ```{r load.niche.results, eval=TRUE, include=FALSE} -xenium.obj <- readRDS("../data/xenium_niches_presaved.rds") +xenium.obj <- readRDS("/brahms/hartmana/vignette_data/xenium_niches_presaved.rds") ``` After clustering the cell type composition nearby each cell, the neuronal layers in the cortex are visually demarcated. @@ -400,24 +400,6 @@ Further, we observe that the composition of each niche is enriched for distinct table(xenium.obj$predicted.celltype, xenium.obj$niches) ``` -Next, we perform DE between atrocytes from two of the niches. - -Note: I think this style of analysis is very risky - most of the DEGs comparing a cell type across niches are cell type markers of the other cell types enriched in one of the niches likely due to incorrect molecular assignment to cells. - -```{r niche.de} -xenium.obj$celltype.niches <- paste0(xenium.obj$predicted.celltype, "_", xenium.obj$niches) -Idents(xenium.obj) <- "celltype.niches" -niche.markers <- FindMarkers(xenium.obj, assay = "Xenium", ident.1 = "Astro_1", ident.2 = "Astro_5") -``` - -```{r niche.vln} -VlnPlot( - xenium.obj, - idents = c("Astro_1", "Astro_5"), - assay = "Xenium", - features = rownames(niche.markers)[1:6]) -``` - # Human Lung: Nanostring CosMx Spatial Molecular Imager This dataset was produced using Nanostring CosMx Spatial Molecular Imager (SMI). The CosMX SMI performs multiplexed single molecule profiling, can profile both RNA and protein targets, and can be applied directly to FFPE tissues. The dataset represents 8 FFPE samples taken from 5 non-small-cell lung cancer (NSCLC) tissues, and is available for [public download](https://www.nanostring.com/products/cosmx-spatial-molecular-imager/ffpe-dataset/). The gene panel consists of 960 transcripts. @@ -427,12 +409,12 @@ In this vignette, we load one of 8 samples (lung 5, replicate 1). We use the `Lo For this dataset, instead of performing unsupervised analysis, we map the Nanostring profiles to our Azimuth Healthy Human Lung reference, which was defined by scRNA-seq. We used Azimuth version 0.4.3 with the [human lung](https://azimuth.hubmapconsortium.org/references/#Human%20-%20Lung%20v1) reference version 1.0.0. You can download the precomputed results [here](https://seurat.nygenome.org/vignette_data/spatial_vignette_2/nanostring_data.Rds), which include annotations, prediction scores, and a UMAP visualization. The median number of detected transcripts/cell is 249, which does create uncertainty for the annotation process. ```{r load} -nano.obj <- LoadNanostring(data.dir = "../data/nanostring/lung5_rep1", fov="lung5.rep1") +nano.obj <- LoadNanostring(data.dir = "/brahms/hartmana/vignette_data/nanostring/lung5_rep1", fov="lung5.rep1") ``` ```{r integration} # add in precomputed Azimuth annotations -azimuth.data <- readRDS("../data/nanostring_data.Rds") +azimuth.data <- readRDS("/brahms/hartmana/vignette_data/nanostring_data.Rds") nano.obj <- AddMetaData(nano.obj, metadata = azimuth.data$annotations) nano.obj[["proj.umap"]] <- azimuth.data$umap Idents(nano.obj) <- nano.obj$predicted.annotation.l1 @@ -514,7 +496,7 @@ First, we load in the data of a HuBMAP dataset using the `LoadAkoya()` function ```{r} codex.obj <- LoadAkoya( - filename = "../data/LN7910_20_008_11022020_reg001_compensated.csv", + filename = "/brahms/hartmana/vignette_data/LN7910_20_008_11022020_reg001_compensated.csv", type = "processor", fov = "HBM754.WKLP.262" ) diff --git a/vignettes/spatial_vignette_2.Rmd b/vignettes/spatial_vignette_2.Rmd index edf7a452d..8613dfe9d 100644 --- a/vignettes/spatial_vignette_2.Rmd +++ b/vignettes/spatial_vignette_2.Rmd @@ -322,7 +322,7 @@ query <- SpatialRNA(coords, query.counts, colSums(query.counts)) ``` ```{r rctd.reference, eval=FALSE} -allen.cortex.ref <- readRDS("/home/hartmana/github/seurat-private/data/allen_cortex.rds") +allen.cortex.ref <- readRDS("../data/allen_cortex.rds") allen.cortex.ref <- UpdateSeuratObject(allen.cortex.ref) Idents(allen.cortex.ref) <- "subclass" diff --git a/vignettes/vignettes_v5.yaml b/vignettes/vignettes_v5.yaml index 19ab76f70..7bbb7fda8 100644 --- a/vignettes/vignettes_v5.yaml +++ b/vignettes/vignettes_v5.yaml @@ -3,9 +3,9 @@ - title: Install Seurat 5 name: install_seurat5 summary: | - Install Seurat 5 and required dependencies. + Install Seurat 5 and suggested dependencies. image: SeuratV5.png - + - title: Seurat 5 Assay name: seurat5_assay summary: | @@ -23,14 +23,14 @@ - title: Integration name: seurat5_integration summary: | - Integrate datesets in Seurat 5 using a variety of methods. + Integrate datasets in Seurat 5 using a variety of methods. image: integration_seurat5.jpg - title: COVID Mapping name: COVID_SCTMapping summary: | Map PBMC datasets from COVID-19 patients to a healthy PBMC reference. - image: COVID_SCTMapping.jpg + image: COVID_SCTMapping.png - title: Analysis of spatial datasets (Imaging-based) name: seurat5_spatial_vignette_2 @@ -41,7 +41,7 @@ - title: Sketch Clustering name: seurat5_sketch_analysis summary: | - Analyze a 1.3 million cell mouse brain dataset using the on-disk capabilities introduced in Seurat 5. + Analyze a 1.3 million cell mouse brain dataset using on-disk capabilities powered by BPCells. image: sketch.png - title: Sketch Integration @@ -56,7 +56,7 @@ Map scATAC-seq onto an scRNA-seq reference using a multi-omic bridge dataset. image: bridge_integration.png - - title: RunAzimuth + - title: Run Azimuth name: seurat5_run_azimuth summary: | Annotate cells locally using Azimuth. From a60e500a29c0f2d629d13958d9896a89c1af37bb Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Thu, 23 Mar 2023 09:54:09 -0400 Subject: [PATCH 550/979] update install page --- vignettes/install.Rmd | 30 ++++++++++++ vignettes/install_seurat5.Rmd | 60 ----------------------- vignettes/seurat5_install.Rmd | 90 ----------------------------------- 3 files changed, 30 insertions(+), 150 deletions(-) delete mode 100644 vignettes/install_seurat5.Rmd delete mode 100644 vignettes/seurat5_install.Rmd diff --git a/vignettes/install.Rmd b/vignettes/install.Rmd index b5e3f39eb..030542d29 100644 --- a/vignettes/install.Rmd +++ b/vignettes/install.Rmd @@ -5,6 +5,36 @@ output: html_document To install Seurat, [R](https://www.r-project.org/) version 4.0 or greater is required. We also recommend installing [R Studio](https://www.rstudio.com/). +# ![Seurat 5:](../output/images/SeuratV5.png){#id .class width=60 height=60} Seurat 5: Install from GitHub + +Copy the code below to install Seurat 5: + +```{r required, eval=FALSE} +remotes::install_github("satijalab/seurat", "seurat5", quiet = TRUE) +``` + +The following packages are not required but are used in many Seurat 5 vignettes: + +* SeuratData: automatically load datasets pre-packaged as Seurat objects +* Azimuth: local annotation of scRNA-seq and scATAC-seq queries across multiple organs and tissues +* SeuratWrappers: enables use of additional integration and differential expression methods +* Signac: analysis of single-cell chromatin data + +```{r additional, eval=FALSE} +remotes::install_github("satijalab/seurat-data", "seurat5", quiet = TRUE) +remotes::install_github("satijalab/azimuth", "seurat5", quiet = TRUE) +remotes::install_github("satijalab/seurat-wrappers", "seurat5", quiet = TRUE) +remotes::install_github("stuart-lab/signac", "seurat5", quiet = TRUE) +``` + +Seurat 5 utilizes BPCells to support analysis of extremely large datasets: + +```{r bpcells, eval=FALSE} +remotes::install_github("bnprks/BPCells", quiet = TRUE) +``` + +For more information on BPCells installation, please see the [installation instructions](https://bnprks.github.io/BPCells/#installation). For macOS users, the following GitHub issues concerning [M1 chip installation](https://github.com/bnprks/BPCells/issues/6) and [compiler compatibility](https://github.com/bnprks/BPCells/issues/3) may be of use. + # Install from CRAN Seurat is available on [CRAN](https://cran.r-project.org/package=Seurat) for all platforms. To install, run: diff --git a/vignettes/install_seurat5.Rmd b/vignettes/install_seurat5.Rmd deleted file mode 100644 index 9de82ba94..000000000 --- a/vignettes/install_seurat5.Rmd +++ /dev/null @@ -1,60 +0,0 @@ ---- -title: "Install Seurat 5" -output: - html_document: - theme: united - pdf_document: default -date: 'Compiled: `r format(Sys.Date(), "%B %d, %Y")`' ---- - -```{r setup, include=FALSE} -all_times <- list() # store the time for each chunk -knitr::knit_hooks$set(time_it = local({ - now <- NULL - function(before, options) { - if (before) { - now <<- Sys.time() - } else { - res <- difftime(Sys.time(), now, units = "secs") - all_times[[options$label]] <<- res - } - } -})) -knitr::opts_chunk$set( - tidy = TRUE, - tidy.opts = list(width.cutoff = 95), - fig.width = 10, - message = FALSE, - warning = FALSE, - time_it = TRUE, - error = TRUE -) -``` - -Copy the code below to install Seurat 5: - -```{r required, eval=FALSE} -remotes::install_github("satijalab/seurat", "seurat5", quiet = TRUE) -``` - -The following packages are not required but are used in many Seurat 5 vignettes: - -* SeuratData: automatically load datasets pre-packaged as Seurat objects -* Azimuth: local annotation of scRNA-seq and scATAC-seq queries across multiple organs and tissues -* SeuratWrappers: enables use of additional integration and differential expression methods -* Signac: analysis of single-cell chromatin data - -```{r additional, eval=FALSE} -remotes::install_github("satijalab/seurat-data", "seurat5", quiet = TRUE) -remotes::install_github("satijalab/azimuth", "seurat5", quiet = TRUE) -remotes::install_github("satijalab/seurat-wrappers", "seurat5", quiet = TRUE) -remotes::install_github("stuart-lab/signac", "seurat5", quiet = TRUE) -``` - -Seurat 5 utilizes BPCells to support analysis of extremely large datasets: - -```{r bpcells, eval=FALSE} -remotes::install_github("bnprks/BPCells", quiet = TRUE) -``` - -For more information on BPCells installation, please see the [installation instructions](https://bnprks.github.io/BPCells/#installation). For macOS users, the following GitHub issues concerning [M1 chip installation](https://github.com/bnprks/BPCells/issues/6) and [compiler compatibility](https://github.com/bnprks/BPCells/issues/3) may be of use. diff --git a/vignettes/seurat5_install.Rmd b/vignettes/seurat5_install.Rmd deleted file mode 100644 index b5e3f39eb..000000000 --- a/vignettes/seurat5_install.Rmd +++ /dev/null @@ -1,90 +0,0 @@ ---- -title: "Installation Instructions for Seurat" -output: html_document ---- - -To install Seurat, [R](https://www.r-project.org/) version 4.0 or greater is required. We also recommend installing [R Studio](https://www.rstudio.com/). - -# Install from CRAN - -Seurat is available on [CRAN](https://cran.r-project.org/package=Seurat) for all platforms. To install, run: - -```{r eval = FALSE} -# Enter commands in R (or R studio, if installed) -install.packages('Seurat') -library(Seurat) -``` - -If you see the warning message below, enter `y`: - -```{r eval=FALSE} -package which is only available in source form, and may need compilation of C/C++/Fortran: 'Seurat' -Do you want to attempt to install these from sources? -y/n: -``` - -# Install previous versions of Seurat - -## Install any version 3 release - -Any of the Seurat version 3 releases can be installed with the following command: - -```{r eval = FALSE} -remotes::install_version("Seurat", version = "3.X.X") -``` - -## Install the last version 2 release (2.3.4) - -To facilitate easy re-installation of the last version 2 release, we are hosting the binaries on our website. These can be installed with the following command: - -```{r eval = FALSE} -source("https://z.umn.edu/archived-seurat") -``` - -
    - View the script - -
    - -## Older versions of Seurat -Old versions of Seurat, from Seurat v2.0.1 and up, are hosted in CRAN's archive. To install an old version of Seurat, run: - -```{r eval = FALSE} -# Enter commands in R (or R studio, if installed) -# Install the remotes package -install.packages('remotes') -# Replace '2.3.0' with your desired version -remotes::install_version(package = 'Seurat', version = package_version('2.3.0')) -library(Seurat) -``` - -For versions of Seurat older than those not hosted on CRAN (versions 1.3.0 and 1.4.0), please download the packaged source code from our [releases page](https://github.com/satijalab/seurat/releases) and [install from the tarball](https://stackoverflow.com/questions/4739837/how-do-i-install-an-r-package-from-the-source-tarball-on-windows). - -# Install the development version of Seurat - -Install the development version of Seurat - directly from [GitHub](https://github.com/satijalab/seurat/tree/develop). - -```{r eval = FALSE} -# Enter commands in R (or R studio, if installed) -# Install the remotes package -install.packages('remotes') -remotes::install_github(repo = 'satijalab/seurat', ref = 'develop') -library(Seurat) -``` - -# Docker - -We provide docker images for Seurat via [dockerhub](https://hub.docker.com/r/satijalab/seurat). - -To pull the latest image from the command line: - -```sh -docker pull satijalab/seurat:latest -``` - -To use as a base image in a new Dockerfile: - -```sh -FROM satijalab/seurat:latest -``` - From c1f00c08d01d0d62b299432589ab7b429f57fbc2 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Thu, 23 Mar 2023 11:31:57 -0400 Subject: [PATCH 551/979] updating bpcells and bridge vignettes --- .../seurat5_bpcells_interaction_vignette.Rmd | 50 +++++++++++-------- vignettes/seurat5_integration_bridge.Rmd | 5 +- 2 files changed, 32 insertions(+), 23 deletions(-) diff --git a/vignettes/seurat5_bpcells_interaction_vignette.Rmd b/vignettes/seurat5_bpcells_interaction_vignette.Rmd index 0ed2103e7..039bcae7f 100644 --- a/vignettes/seurat5_bpcells_interaction_vignette.Rmd +++ b/vignettes/seurat5_bpcells_interaction_vignette.Rmd @@ -54,7 +54,7 @@ We use BPCells functionality to both load in our data and write the counts layer # Load Data -### Load Data from one h5 file +## Load Data from one h5 file In this section, we will load a dataset of mouse brain cells freely available from 10X Genomics. This includes 1.3 Million single cells that were sequenced on the Illumina NovaSeq 6000. The raw data can be found [here](https://support.10xgenomics.com/single-cell-gene-expression/datasets/1.3.0/1M_neurons?). To read in the file, we will use open_matrix_10x_hdf5, a BPCells function written to read in feature matrices from 10x. We then write a matrix directory, load the matrix, and create a Seurat object. @@ -93,15 +93,25 @@ obj[["RNA"]]$counts <- counts.mat Once this conversion is done, you can perform typical Seurat functions on the object. For example, we can normalize data and visualize features by automatically accessing the on-disk counts. ```{r} -VlnPlot(brain, features = c("Sox10", "Slc17a7", "Aif1"), ncol = 3, layer = "counts") +VlnPlot(brain, features = c("Sox10", "Slc17a7", "Aif1"), ncol = 3, layer = "counts", alpha = 0.1) # We then normalize and visualize again brain <- NormalizeData(brain, normalization.method = "LogNormalize") -VlnPlot(brain, features = c("Sox10", "Slc17a7", "Aif1"), ncol = 3, layer = "data") +VlnPlot(brain, features = c("Sox10", "Slc17a7", "Aif1"), ncol = 3, layer = "data", alpha = 0.1) ``` +### Saving Seurat objects with on-disk layers + +If you save your object and load it in in the future, Seurat will access the on-disk matrices by their path, which is stored in the assay level data. To make it easy to ensure these are saved in the same place, we provide new functionality to the saveRDS function. In this function, you specify your filename and the destination directory. The pointer to the path in the Seurat object will change to the destination directory. + +This also makes it easy to share your Seurat objects with BPCells matrices by sharing a folder that contains both the object and the BPCells directory. + +```{r} +saveRDS(brain, file = "obj.Rds", destdir = "/brahms/hartmana/vignette_data/bpcells/brain_object") +``` -### Load data from multiple h5ad files + +## Load data from multiple h5ad files You can also download data from multiple matrices. In this section, we create a Seurat object using multiple peripheral blood mononuclear cell (PBMC) samples that are freely available for downlaod from CZI [here](https://cellxgene.cziscience.com/collections). We download data from [Ahern et al. (2022) Nature](https://cellxgene.cziscience.com/collections/8f126edf-5405-4731-8374-b5ce11f53e82), [Jin et al. (2021) Science](https://cellxgene.cziscience.com/collections/b9fc3d70-5a72-4479-a046-c2cc1ab19efc), and [Yoshida et al. (2022) Nature](https://cellxgene.cziscience.com/collections/03f821b4-87be-4ff4-b65a-b5fc00061da7). We use the BPCells function to read h5ad files. @@ -115,21 +125,22 @@ data.list <- c() metadata.list <- c() for (i in 1:length(files.set)) { - name <- gsub(".h5ad", "", files.set[i]) path <- paste0(file.dir, files.set[i]) data <- open_matrix_anndata_hdf5(path) - write_matrix_dir(mat = data, dir = paste0(file.dir, name, "_BP"), overwrite = TRUE) + write_matrix_dir(mat = data, dir = paste0(file.dir, + gsub(".h5ad", "", path), + "_BP"), overwrite = TRUE) # Load in BP matrices - mat <- open_matrix_dir(dir = paste0(file.dir, name, "_BP")) + mat <- open_matrix_dir(dir = paste0(file.dir, + gsub(".h5ad", "", path), "_BP")) mat <- Azimuth:::ConvertEnsembleToSymbol(mat = mat, species = "human") # Get metadata metadata.list[[i]] <- LoadH5ADobs(path = path) data.list[[i]] <- mat - names(data.list[i]) <- name } - +# Name layers +names(data.list) <- c("ahern", "jin", "yoshida") # Only pull out needed metadata files -#Reduce(intersect,lapply(metadata.list, colnames)) # all in common metadata.list <- lapply(metadata.list, function(x) { x <- x[, c("sex", "cell_type", "donor_id", "disease")] return(x) @@ -137,7 +148,7 @@ metadata.list <- lapply(metadata.list, function(x) { metadata <- Reduce(rbind, metadata.list) ``` -When we create the Seurat object with the list of , we can then see that multiple counts layers exist that represent each dataset. This object contains over a million cells, yet only takes up minimal space in memory! +When we create the Seurat object with the list of matrices from each publication, we can then see that multiple counts layers exist that represent each dataset. This object contains over a million cells, yet only takes up minimal space in memory! ```{r} options(Seurat.object.assay.version = "v5") @@ -145,7 +156,11 @@ merged.object <- CreateSeuratObject(counts = data.list, meta.data = metadata) merged.object ``` -### Parse Biosciences +```{r save_merged} +saveRDS(merged.object, file = "obj.Rds", destdir = "/brahms/hartmana/vignette_data/bpcells/merged_object") +``` + +## Parse Biosciences Here, we show how to load a 1 million cell data set from Parse Biosciences and create a Seurat Object. The data is available for download [here](https://support.parsebiosciences.com/hc/en-us/articles/7704577188500-How-to-analyze-a-1-million-cell-data-set-using-Scanpy-and-Harmony) @@ -160,15 +175,8 @@ metadata$disease <- sapply(strsplit(x = metadata$sample, split = "_"), "[", 1) parse.object <- CreateSeuratObject(counts = parse.mat, meta.data = metadata) ``` - -## Saving Seurat objects with on-disk layers - -If you save your object and load it in in the future, Seurat will access the on-disk matrices by their path, which is stored in the assay level data. To make it easy to ensure these are saved in the same place, we provide new functionality to the saveRDS function. In this function, you specify your filename and the destination directory. The pointer to the path in the Seurat object will change to the destination directory. - -This also makes it easy to share your Seurat objects with BPCells matrices by sharing a folder that contains both the object and the BPCells directory. - -```{r} -saveRDS(brain, file = "obj.Rds", destdir = "/brahms/hartmana/vignette_data/bpcells/brain_object") +```{r save_parse} +saveRDS(parse.object, file = "obj.Rds", destdir = "/brahms/hartmana/vignette_data/bpcells/parse_object") ``` diff --git a/vignettes/seurat5_integration_bridge.Rmd b/vignettes/seurat5_integration_bridge.Rmd index 977eaf8fd..4c3535520 100644 --- a/vignettes/seurat5_integration_bridge.Rmd +++ b/vignettes/seurat5_integration_bridge.Rmd @@ -42,7 +42,9 @@ In this vignette we demonstrate: * Mapping the scATAC-seq dataset via bridge integration * Exploring and assessing the resulting annotations -First, we install the updated version of Seurat that supports this infrastructure, as well as other packages necessary for this vignette. +### Azimuth ATAC for Bridge Integration +Users can now automatically run bridge integration for PBMC and Bone Marrow scATAC-seq queries with the newly released Azimuth ATAC workflow. For more details, see the section on ATAC data in this [vignette](https://satijalab.github.io/azimuth/articles/run_azimuth_tutorial.html). + ```{r, message=FALSE, warning=FALSE} library(remotes) @@ -73,7 +75,6 @@ obj.multi <- CreateSeuratObject(counts = rna_counts) obj.multi[["percent.mt"]] <- PercentageFeatureSet(obj.multi, pattern = "^MT-") # add the ATAC-seq assay -options(Seurat.object.assay.version = "v3") grange.counts <- StringToGRanges(rownames(atac_counts), sep = c(":", "-")) grange.use <- seqnames(grange.counts) %in% standardChromosomes(grange.counts) atac_counts <- atac_counts[as.vector(grange.use), ] From f1ef0ecd0db9264532e020eb9cafa810ffccd05a Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 23 Mar 2023 17:42:26 -0400 Subject: [PATCH 552/979] covid vig update --- vignettes/COVID_SCTMapping.Rmd | 86 +++++++++++++++++++--------------- 1 file changed, 48 insertions(+), 38 deletions(-) diff --git a/vignettes/COVID_SCTMapping.Rmd b/vignettes/COVID_SCTMapping.Rmd index 88008b6ac..0661ff403 100755 --- a/vignettes/COVID_SCTMapping.Rmd +++ b/vignettes/COVID_SCTMapping.Rmd @@ -8,6 +8,7 @@ output: date: 'Compiled: `r Sys.Date()`' --- + ```{r setup, include=TRUE} all_times <- list() # store the time for each chunk knitr::knit_hooks$set(time_it = local({ @@ -32,8 +33,6 @@ knitr::opts_chunk$set( ) ``` -## load package - ```{r, warning=F, message=F} library(Seurat) library(BPCells) @@ -41,67 +40,78 @@ library(dplyr) options(future.globals.maxSize = 1e9) ``` -## load query -```{r,warning=F, message=F} + +## Introduction: Reference mapping analysis in Seurat v5 +In numerous studies profiling multiple tissues across hundreds of individuals and millions of cells, the single-cell reference mapping approach offers a robust and consistent method for annotating all those publicly available single-cell datasets. This vignette demonstrates the process of mapping multiple query scRNA PBMC datasets onto [our annotated CITE-seq reference of 162,000 PBMC measured with 228 antibodies](https://doi.org/10.1016/j.cell.2021.04.048) + +We download three datasets from [CZI cellxgene collections](https://cellxgene.cziscience.com/collections):[Ahern et al. (2022) Nature](https://cellxgene.cziscience.com/collections/8f126edf-5405-4731-8374-b5ce11f53e82), [Jin et al. (2021) Science](https://cellxgene.cziscience.com/collections/b9fc3d70-5a72-4479-a046-c2cc1ab19efc), and [Yoshida et al. (2022) Nature](https://cellxgene.cziscience.com/collections/03f821b4-87be-4ff4-b65a-b5fc00061da7). We demonstrate how to create a Seurat object with BPCells matrix in our BPCells interaction vignette. + +We have previously demonstrated the use of the reference-mapping approach for annotating cell labels in a single query dataset. With Seurat v5, we have substantially improved the speed, memory efficiency, and user-friendliness for mapping a large number of query datasets to the same reference. + +In this vignette, we demonstrate how to use a previously established reference to interpret three scRNA-seq query datasets: + +* Annotate all query cells from multiple studies based on a set of reference-defined cell states +* Aggregate the annotated cells into individuals for performing Pseudo-bulk differential analysis + +To run this vignette please install Seurat v5. + + + +## Load the Multimodal PBMC Reference Dataset and Query Datasets +We load the CITE-seq reference (download [here]()) from our Seurat v4 [paper](https://doi.org/10.1016/j.cell.2021.04.048). We will use the query datasets prepared in BPCells interaction vignette containing scRNA-seq data from three different studies. + +```{r load.data} +reference <- readRDS("/brahms/hartmana/vignette_data/pbmc_multimodal_2023.rds") object <- readRDS("/brahms/hartmana/vignette_data/covid_mapping_merged_object.rds") object <- NormalizeData(object, verbose = FALSE) ``` -## load reference -```{r} -obj.ref <- readRDS("/brahms/hartmana/vignette_data/pbmc_multimodal_2023.rds") -obj.ref -``` -## mapping -```{r} +## Mapping +Using the same code from in our v4 reference mapping vignette, we find anchors between reference and query in the reference precomputed supervised PCA (spca) space. We recommend the use of supervised PCA for CITE-seq reference dataset, and demonstrate how to compute this transformation in [v4 mapping vignette](). -time2_anchoring <- system.time({ -anchor <- FindTransferAnchors(reference = obj.ref, +We then transfer cell type labels from the reference to all three queries. Additionally, we project the query data onto the UMAP structure of the reference. + + + +```{r} +anchor <- FindTransferAnchors(reference = reference, query = object, reference.reduction = 'spca', normalization.method = 'SCT', - dims = 1:50, - k.filter = NA, - k.anchor = 5, - features = rownames(obj.ref[['spca']]@feature.loadings)) -}) - -time3_MapQuery <- system.time({ - - object <- MapQuery( + dims = 1:50) +object <- MapQuery( anchorset = anchor, query = object, - reference = obj.ref, + reference = reference, refdata = list( - l1.s5 = "celltype.l1", - l2.s5 = "celltype.l2" - ), - reduction.model = "wnn.umap" + celltype.l1 = "celltype.l1", + celltype.l2 = "celltype.l2" + ), + reduction.model = 'wnn.umap' ) -}) - -``` -```{r} -anchor ``` - + +## Explore the mapping results +We can now visualize the 2,700 query cells. They have been projected into a UMAP visualization defined by the reference, and each has received annotations at two levels of granularity (level 1, and level 2). + ```{r, fig.width=10, fig.height=6} -p1<- DimPlot(object, reduction = 'ref.umap', group.by = 'predicted.l2.s5',alpha = 0.8, label = T) + NoLegend() -p2<- DimPlot(object, reduction = 'ref.umap', group.by = 'celltype',alpha = 0.8, label = T) + NoLegend() +p1<- DimPlot(object, reduction = 'ref.umap', group.by = 'predicted.celltype.l2',alpha = 0.8, label = T) + NoLegend() +p2<- DimPlot(object, reduction = 'ref.umap', group.by = 'cell_type',alpha = 0.8, label = T) + NoLegend() p1 + p2 ``` ```{r, fig.width=10, fig.height=6} -p3 <-DimPlot(object, reduction = 'ref.umap', group.by = 'celltype',alpha = 0.8, label = T, split.by = 'batch', ncol = 3) + NoLegend() +p3 <- DimPlot(object, reduction = 'ref.umap', group.by = 'cell_type',alpha = 0.8, label = T, split.by = 'batch', ncol = 3) + NoLegend() +p3 <- DimPlot(object, reduction = 'ref.umap', group.by = 'predicted.celltype.l2',alpha = 0.8, label = T, split.by = 'batch', ncol = 3) + NoLegend() p3 ``` -## pseudo-bulk +## Differential analysis ```{r} bulk <- AverageExpression(object, method = 'aggregate', - return.seurat = T, + return.seurat = TRUE, slot = 'counts', assays = 'RNA', group.by = c("predicted.l2.s5", "patient", "disease_status_standard") From dc7f08b4a9107374fc1f1714c319fb3b24059e77 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Thu, 23 Mar 2023 18:31:55 -0400 Subject: [PATCH 553/979] adding new metadata to bpcells vignette --- vignettes/seurat5_bpcells_interaction_vignette.Rmd | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/vignettes/seurat5_bpcells_interaction_vignette.Rmd b/vignettes/seurat5_bpcells_interaction_vignette.Rmd index 039bcae7f..e5445ecad 100644 --- a/vignettes/seurat5_bpcells_interaction_vignette.Rmd +++ b/vignettes/seurat5_bpcells_interaction_vignette.Rmd @@ -127,12 +127,10 @@ metadata.list <- c() for (i in 1:length(files.set)) { path <- paste0(file.dir, files.set[i]) data <- open_matrix_anndata_hdf5(path) - write_matrix_dir(mat = data, dir = paste0(file.dir, - gsub(".h5ad", "", path), + write_matrix_dir(mat = data, dir = paste0(gsub(".h5ad", "", path), "_BP"), overwrite = TRUE) # Load in BP matrices - mat <- open_matrix_dir(dir = paste0(file.dir, - gsub(".h5ad", "", path), "_BP")) + mat <- open_matrix_dir(dir = paste0(gsub(".h5ad", "", path), "_BP")) mat <- Azimuth:::ConvertEnsembleToSymbol(mat = mat, species = "human") # Get metadata metadata.list[[i]] <- LoadH5ADobs(path = path) @@ -140,9 +138,13 @@ for (i in 1:length(files.set)) { } # Name layers names(data.list) <- c("ahern", "jin", "yoshida") -# Only pull out needed metadata files + +# Add Metadata +for (i in 1:length(metadata.list)){ + metadata.list[[i]]$publication <- names(data.list)[i] +} metadata.list <- lapply(metadata.list, function(x) { - x <- x[, c("sex", "cell_type", "donor_id", "disease")] + x <- x[, c("publication", "sex", "cell_type", "donor_id", "disease")] return(x) }) metadata <- Reduce(rbind, metadata.list) From c6c0b351897594673b504494531141e6656ed143 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 23 Mar 2023 21:10:07 -0400 Subject: [PATCH 554/979] update sketh mouse brain --- vignettes/seurat5_sketch_analysis.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vignettes/seurat5_sketch_analysis.Rmd b/vignettes/seurat5_sketch_analysis.Rmd index 1053f1f0f..5b842b982 100644 --- a/vignettes/seurat5_sketch_analysis.Rmd +++ b/vignettes/seurat5_sketch_analysis.Rmd @@ -138,14 +138,14 @@ x1 | x2 ## Perform iterative sub-clustering NOTE THAT THE CLUSTER IDS WILL CHANGE -Now that we have performed an initial analysis of the dataset, we can iteratively 'zoom-in' on a cell subtype of interest, extract all cells of this type, and perform iterative sub-clustering. For example, we can see that Dlx2+ interneuron precursors are defined by clusters 7, 9, 16, and 29. +Now that we have performed an initial analysis of the dataset, we can iteratively 'zoom-in' on a cell subtype of interest, extract all cells of this type, and perform iterative sub-clustering. For example, we can see that Dlx2+ interneuron precursors are defined by clusters 2, 15, 18, 28 and 40. ```{r} DefaultAssay(obj) <- 'sketch' VlnPlot(obj, 'Dlx2') ``` -We therefore extract all cells from the full on-disk dataset that are present in these clusters. There are XX,XXX of them. Since this is a manageable number, we can convert these data from on-disk storage into in-memory storage. We can then proceed with standard clustering. +We therefore extract all cells from the full on-disk dataset that are present in these clusters. There are 200,892 of them. Since this is a manageable number, we can convert these data from on-disk storage into in-memory storage. We can then proceed with standard clustering. ```{r} # subset cells in these clusters. Note that the data remains on-disk after subsetting From ec079bd35396c29780bcdc785a1fc0c8febe9166 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Thu, 23 Mar 2023 22:17:46 -0400 Subject: [PATCH 555/979] incorporate edits --- index.md | 38 +++++++++++++----------- vignettes/seurat5_integration.Rmd | 9 +++--- vignettes/seurat5_integration_bridge.Rmd | 1 - vignettes/seurat5_spatial_vignette_2.Rmd | 21 ++++++------- vignettes/spatial_vignette.Rmd | 16 ++++++---- 5 files changed, 47 insertions(+), 38 deletions(-) diff --git a/index.md b/index.md index 7925418e9..95bb47a44 100644 --- a/index.md +++ b/index.md @@ -1,25 +1,28 @@ ![](articles/assets/seurat_banner.jpg) -# Official release of Seurat 4.0 +# Beta release of Seurat v5 -We are excited to release Seurat v4.0! This update brings the following new features and functionality: +We are excited to release an initial beta version of Seurat v5! This update brings the following new features and functionality: -* **Integrative multimodal analysis.** The ability to make simultaneous measurements of multiple data types from the same cell, known as multimodal analysis, represents a new and exciting frontier for single-cell genomics. In Seurat v4, we introduce weighted nearest neighbor (WNN) analysis, an unsupervised strategy to learn the information content of each modality in each cell, and to define cellular state based on a weighted combination of both modalities. - In our new paper, we generate a CITE-seq dataset featuring paired measurements of the transcriptome and 228 surface proteins, and leverage WNN to define a multimodal reference of human PBMC. You can use WNN to analyze multimodal data from a variety of technologies, including CITE-seq, ASAP-seq, 10X Genomics ATAC + RNA, and SHARE-seq. +* **Analysis of sequencing and imaging-based spatial datasets.** Spatially resolved datasets are redefining our understanding of cellular interactions and the organization of human tissues. Both sequencing-based(i.e. Visium, SLIDE-seq, etc.), and imaging-based (MERFISH/Vizgen, Xenium, CosMX, etc.) technologies have unique advantages, and require tailored analytical methods and software infrastructure. In Seurat v5, we introduce flexible and diverse support for a wide variety of spatially resolved data types, and support for analytical techniqiues for scRNA-seq integration, deconvolution, and niche identification. - - Paper: [Integrated analysis of multimodal single-cell data](https://doi.org/10.1016/j.cell.2021.04.048) - - Vignette: [Multimodal clustering of a human bone marrow CITE-seq dataset](articles/weighted_nearest_neighbor_analysis.html) - - Portal: [Click here](https://atlas.fredhutch.org/nygc/multimodal-pbmc/) - - Dataset: [Download here](https://atlas.fredhutch.org/data/nygc/multimodal/pbmc_multimodal.h5seurat) + - Vignette: [Analysis of spatial datasets (Sequencing-based)](articles/seurat5_spatial_vignette.html) + - Vignette: [Analysis of spatial datasets (Imaging-based)](articles/seurat5_spatial_vignette_2.html)\ -* **Rapid mapping of query datasets to references.** We introduce Azimuth, a workflow to leverage high-quality reference datasets to rapidly map new scRNA-seq datasets (queries). For example, you can map any scRNA-seq dataset of human PBMC onto our reference, automating the process of visualization, clustering annotation, and differential expression. Azimuth can be run within Seurat, or using a standalone web application that requires no installation or programming experience. +* **Integrative multimodal analysis.** The cellular transcriptome is just one aspect of cellular identity, and recent technologies enable routine profiling of chromatin accessibility, histone modifications, and protein levels from single cells. In Seurat v5, we introduce 'bridge integration', a statistical method to integrate experiments measuring different modalities (i.e. separate scRNA-seq and scATAC-seq datasets), using a separate multiomic dataset as a molecular 'bridge'. For example, we demonstrate how to map scATAC-seq datasets onto scRNA-seq datasets, to assist users in interpreting and annotating data from new modalities.\ + We recognize that while the goal of matching shared cell types across datasets may be important for many problems, users may also be concerned about which method to use, or that integration could result in a loss of biological resolution. In Seurat v5, we also introduce flexible and streamlined workflows for the integration of multiple scRNA-seq datasets. This makes it easier to explore the results of different integration methods, and to compare these results to a workflow that excludes integration steps. - - Vignette: [Mapping scRNA-seq queries onto reference datasets](articles/multimodal_reference_mapping.html) - - Web app: [Automated mapping, visualization, and annotation of scRNA-seq datasets from human PBMC](https://azimuth.hubmapconsortium.org/) + - Paper: [Dictionary learning for integrative, multimodal, and scalable single-cell analysis](https://doi.org/10.1101/2022.02.24.481684) + - Vignette: [Cross-modality Bridge Integration](articles/seurat5_integration_bridge.html) + - Website: [Azimuth-ATAC, reference-mapping for scATAC-seq datasets](https://satijalab.github.io/azimuth/articles/run_azimuth_tutorial.html)\ -Additional speed and usability updates: We have made minor changes in v4, primarily to improve the performance of Seurat v4 on large datasets. These changes substantially improve the speed and memory requirements, but do not adversely impact downstream results. We provide a detailed description of key changes [here](articles/v4_changes.html). Users who wish to fully reproduce existing results can continue to do so by continuing to install Seurat v3. +* **Flexible, interactive, and highly scalable analsyis.** The size and scale of single-cell sequencing datasets is rapidly increasing, outpacing even Moore's law. In Seurat v5, we introduce new infrastructure and methods to analyze, interpret, and explore exciting datasets spanning millions of cells, even if they cannot be fully loaded into memory. We introduce support for 'sketch'-based analysis, where representative subsamples of a large dataset are stored in-memory to enable rapid and iterative analysis - while the full dataset remains accessible via on-disk storage.\ + We enable high-performance via the BPCells package, developed by Ben Parks in the Greenleaf Lab. The BPCells package enables high-performance analysis via innovative bit-packing compression techniques, optimized C++ code, and use of streamlined and lazy operations. -We believe that users who are familiar with Seurat v3 should experience a smooth transition to Seurat v4. While we have introduced extensive new functionality, existing workflows, functions, and syntax are largely unchanged in this update. In addition, Seurat objects that have been previously generated in Seurat v3 can be seamlessly loaded into Seurat v4 for further analysis. + - Vignette: [Sketch-based clustering of 1.3M brain cells (10x Genomics)](articles/seurat5_sketch_analysis.html) + - Vignette: [Sketch-based integration of 1M healthy and diabetic PBMC (Parse Biosciences)](articles/ParseBio_sketch_integration.html) + - BPCells Documentation: [Scaling Single Cell Analysis to Milllions of Cells](https://bnprks.github.io/BPCells/) + - Vignette: [Interacting with BPCell matrices in Seurat v5](articles/seurat5_bpcells_interaction_vignette.html) # About Seurat @@ -27,10 +30,11 @@ Seurat is an R package designed for QC, analysis, and exploration of single-cell If you use Seurat in your research, please considering citing: -* [Hao\*, Hao\*, et al., Cell 2021](https://doi.org/10.1016/j.cell.2021.04.048) [Seurat V4] -* [Stuart\*, Butler\*, et al., Cell 2019](https://www.cell.com/cell/fulltext/S0092-8674(19)30559-8) [Seurat V3] -* [Butler\* et al., Nat Biotechnol 2018](https://doi.org/10.1038/nbt.4096) [Seurat V2] -* [Satija\*, Farrell\*, et al., Nat Biotechnol 2015](https://doi.org/10.1038/nbt.3192) [Seurat V1] +* [Hao, et al., bioRxiv 2022](https://doi.org/10.1101/2022.02.24.481684) [Seurat v5] +* [Hao\*, Hao\*, et al., Cell 2021](https://doi.org/10.1016/j.cell.2021.04.048) [Seurat v4] +* [Stuart\*, Butler\*, et al., Cell 2019](https://www.cell.com/cell/fulltext/S0092-8674(19)30559-8) [Seurat v3] +* [Butler, et al., Nat Biotechnol 2018](https://doi.org/10.1038/nbt.4096) [Seurat v2] +* [Satija\*, Farrell\*, et al., Nat Biotechnol 2015](https://doi.org/10.1038/nbt.3192) [Seurat v1] All methods emphasize clear, attractive, and interpretable visualizations, and were designed to be [easily used](articles/get_started.html) by both dry-lab and wet-lab researchers. diff --git a/vignettes/seurat5_integration.Rmd b/vignettes/seurat5_integration.Rmd index 6cb6a65dc..c83fde1f9 100644 --- a/vignettes/seurat5_integration.Rmd +++ b/vignettes/seurat5_integration.Rmd @@ -37,6 +37,7 @@ library(SeuratData) library(SeuratWrappers) library(Azimuth) library(ggplot2) +library(patchwork) options(future.globals.maxSize = 1e9) options(Seurat.object.assay.version = "v5") ``` @@ -117,18 +118,18 @@ obj[["integrated.scvi"]] <- CreateDimReducObject(embeddings = scvi.reduc) ``` For any of the methods, we can now visualize and cluster the datasets. We show this for CCA integration and scVI, but you can do this for any method -```{r integratedprojections, fig.height=7, fig.width=16} +```{r integratedprojections, fig.height=16, fig.width=16} obj <- FindNeighbors(obj, reduction = 'integrated.cca', dims = 1:30) obj <- FindClusters(obj,resolution = 2, cluster.name = 'cca_clusters') obj <- RunUMAP(obj, reduction = "integrated.cca", dims = 1:30, reduction.name = 'umap.cca') -p1 <- DimPlot(obj, reduction="umap.cca", group.by=c("Method", "predicted.celltype.l2", "cca_clusters")) +p1 <- DimPlot(obj, reduction="umap.cca", group.by=c("Method", "predicted.celltype.l2", "cca_clusters"), combine=FALSE) obj <- FindNeighbors(obj, reduction = 'integrated.scvi', dims = 1:10) obj <- FindClusters(obj,resolution = 2, cluster.name = 'scvi_clusters') obj <- RunUMAP(obj, reduction = "integrated.scvi", dims = 1:10, reduction.name = 'umap.scvi') -p2 <- DimPlot(obj, reduction="umap.scvi", group.by=c("Method", "predicted.celltype.l2", "scvi_clusters")) +p2 <- DimPlot(obj, reduction="umap.scvi", group.by=c("Method", "predicted.celltype.l2", "scvi_clusters"), combine = FALSE) -p1 / p2 +wrap_plots(c(p1, p2), ncol=2) ``` We hope that by simplifying the process of performing integrative analysis, users can more carefully evaluate the biological information retained in the integrated dataset. For example, users can compare the expression of biological markers based on different clustering solutions, or visualize one method's clustering solution on different UMAP visualizations. ```{r vlnplots, fig.height=5, fig.width=16, warning=FALSE} diff --git a/vignettes/seurat5_integration_bridge.Rmd b/vignettes/seurat5_integration_bridge.Rmd index 4c3535520..f008f7ad1 100644 --- a/vignettes/seurat5_integration_bridge.Rmd +++ b/vignettes/seurat5_integration_bridge.Rmd @@ -47,7 +47,6 @@ Users can now automatically run bridge integration for PBMC and Bone Marrow scAT ```{r, message=FALSE, warning=FALSE} -library(remotes) library(Seurat) options(Seurat.object.assay.version = "v5") library(Signac) diff --git a/vignettes/seurat5_spatial_vignette_2.Rmd b/vignettes/seurat5_spatial_vignette_2.Rmd index f2b3db5ed..a700b6e0a 100644 --- a/vignettes/seurat5_spatial_vignette_2.Rmd +++ b/vignettes/seurat5_spatial_vignette_2.Rmd @@ -280,7 +280,8 @@ ImageDimPlot(xenium.obj, cols = "polychrome", size = 0.75) ``` Using the positional information of each cell, we compute spatial niches. -We use a cortex reference from the the Allen Brain Institute to annotate cells, so we first crop the dataset to the cortex. +We use a cortex reference from the the Allen Brain Institute to annotate cells, so we first crop the dataset to the cortex. The Allen Brain reference can be installed [here](https://www.dropbox.com/s/cuowvm4vrf65pvq/allen_cortex.rds?dl=1)). + Below, we use Slc17a7 expression to help determine the cortical region. ```{r, fig.width=5, fig.height=5, warning=FALSE} @@ -290,7 +291,7 @@ p1 ``` ```{r resolve.crop, fig.width=5, fig.height=7, warning=FALSE} -crop <- Crop(xenium.obj[["fov"]], x=c(600, 2100), y=c(900, 4700)) +crop <- Crop(xenium.obj[["fov"]], x = c(600, 2100), y = c(900, 4700)) xenium.obj[["crop"]] <- crop p2 <- ImageFeaturePlot( xenium.obj, @@ -302,15 +303,15 @@ p2 <- ImageFeaturePlot( p2 ``` -Annotation of spatial datasets can be tricky, and single cell methods are not always effective. Here, we use RCTD, which directly accounts for cell type mixing at each spot or segmentation, to annotate cells. For more details on RCTD, please see the [paper](https://doi.org/10.1038/s41587-021-00830-w). +While `FindTransferAnchors` can be used to integrate spot-level data from spatial transcriptomic datasets, Seurat v5 also includes support for the [Robust Cell Type Decomposition](https://www.nature.com/articles/s41587-021-00830-w), a computational approach to deconvolve spot-level data from spatial datasets, when provided with an scRNA-seq reference. RCTD has been shown to accurately annotate spatial data from a variety of technologies, including SLIDE-seq, Visium, and the 10x Xenium in-situ spatial platform. -First, we install the `spacexr` package from GitHub which implements RCTD. +To run RCTD, we first install the `spacexr` package from GitHub which implements RCTD. ```{r, rctd.install, eval=FALSE} devtools::install_github("dmcable/spacexr", build_vignettes = FALSE) ``` -Counts, cluster, and spot information is extracted from the Seurat query and reference objects to construct `Reference` and `SpatialRNA` objects used by RCTD for annotation. +Counts, cluster, and spot information is extracted from the Seurat query and reference objects to construct `Reference` and `SpatialRNA` objects used by RCTD for annotation. The output of the annotation is then added to the Seurat object. ```{r rctd.qeury, warning=FALSE} library(spacexr) @@ -323,6 +324,7 @@ query <- SpatialRNA(coords, query.counts, colSums(query.counts)) ``` ```{r rctd.reference, eval=FALSE} +# allen.corted.ref can be downloaded here: https://www.dropbox.com/s/cuowvm4vrf65pvq/allen_cortex.rds?dl=1 allen.cortex.ref <- readRDS("/home/hartmana/github/seurat-private/data/allen_cortex.rds") allen.cortex.ref <- UpdateSeuratObject(allen.cortex.ref) @@ -345,11 +347,8 @@ RCTD <- create.RCTD(query, reference, max_cores = 8) RCTD <- run.RCTD(RCTD, doublet_mode = "doublet") ``` -Many spot annotations contain multiple cell type markers, so we filter only to singlets and add the annotations to the Seurat object for downstream analysis. - ```{r niche.add.annotations, eval=FALSE} annotations.df <- RCTD@results$results_df -annotations.df <- annotations.df[annotations.df$spot_class == "singlet", ] annotations <- annotations.df$first_type names(annotations) <- rownames(annotations.df) xenium.obj$predicted.celltype <- annotations @@ -357,6 +356,8 @@ keep.cells <- Cells(xenium.obj)[!is.na(xenium.obj$predicted.celltype)] xenium.obj <- subset(xenium.obj, cells = keep.cells) ``` +While the previous analyses consider each cell independently, spatial data enables cells to be defined not just by their neighborhood, but also by their broader spatial context. In Seurat v5, we introduce support for 'niche' analysis of spatial data, which demarcates regions of tissue ('niches'), each of which is defined by a different composition of spatially adjacent cell types. Inspired by the method in [He et al, NBT 2022](https://www.nature.com/articles/s41587-022-01483-z), we consider the 'local neighborhood' for each cell - consisting of its `k.neighbor` spatially closest neighbors, and count the occurrences of each cell type present in this neighborhood. We then use k-means clustering to group cells that have similar neighborhoods together, into spatial niches. + We call the `BuildNicheAssay` function from within Seurat to construct a new assay called `niche` containing the cell type composition spatially neighboring each cell. A metadata column called `niches` is also returned, which contains cluster assignments based on the niche assay. ```{r build.niche.assay, eval=FALSE} @@ -373,7 +374,7 @@ xenium.obj <- BuildNicheAssay( xenium.obj <- readRDS("/brahms/hartmana/vignette_data/xenium_niches_presaved.rds") ``` -After clustering the cell type composition nearby each cell, the neuronal layers in the cortex are visually demarcated. +We can then group cells either by their cell type identity, or their niche identity. The niches identified clearly demarcate the neuronal layers in the cortex. ```{r, niche.dimplots, fig.width=8, fig.height=6, warning=FALSE} celltype.plot <- ImageDimPlot( @@ -387,7 +388,7 @@ niche.plot <- ImageDimPlot( xenium.obj, group.by = "niches", size = 1.5, - dark.background = F) + + dark.background = F) + ggtitle("Niches") + scale_fill_manual( values = c("#442288", "#6CA2EA", "#B5D33D", "#FED23F", "#EB7D5B")) diff --git a/vignettes/spatial_vignette.Rmd b/vignettes/spatial_vignette.Rmd index eda156be1..787847e12 100644 --- a/vignettes/spatial_vignette.Rmd +++ b/vignettes/spatial_vignette.Rmd @@ -454,9 +454,11 @@ Now we visualize the expression of the top 6 features identified by Moran's I. SpatialFeaturePlot(slide.seq, features = head(SpatiallyVariableFeatures(slide.seq, selection.method = "moransi"), 6), ncol = 3, alpha = c(0.1, 1), max.cutoff = "q95") ``` -We can also use RCTD for annotation which is published [here](https://doi.org/10.1038/s41587-021-00830-w). RCTD is able to decompose cell mixtures to accurately annotate spatial datasets. +## Spatial deconvolution using RCTD -First, we install the `spacexr` package from GitHub which implements RCTD. +While `FindTransferAnchors` can be used to integrate spot-level data from spatial transcriptomic datasets, Seurat v5 also includes support for the [Robust Cell Type Decomposition](https://www.nature.com/articles/s41587-021-00830-w), a computational approach to deconvolve spot-level data from spatial datasets, when provided with an scRNA-seq reference. RCTD has been shown to accurately annotate spatial data from a variety of technologies, including SLIDE-seq, Visium, and the 10x Xenium in-situ spatial platform. + +To run RCTD, we first install the `spacexr` package from GitHub which implements RCTD. ```{r, eval=FALSE} devtools::install_github("dmcable/spacexr", build_vignettes = FALSE) @@ -471,16 +473,18 @@ library(spacexr) ref <- readRDS("../data/mouse_hippocampus_reference.rds") ref <- UpdateSeuratObject(ref) Idents(ref) <- "celltype" -counts <- LayerData(ref, assay = "RNA", layer = "counts") + +# extract information to pass to the RCTD Reference function +counts <- ref[["RNA"]]$counts cluster <- as.factor(ref$celltype) names(cluster) <- colnames(ref) nUMI <- ref$nCount_RNA names(nUMI) <- colnames(ref) reference <- Reference(counts, cluster, nUMI) -# set up query +# set up query with the RCTD function SpatialRNA slide.seq <- SeuratData::LoadData("ssHippo") -counts <- LayerData(slide.seq, assay = "Spatial", layer = "counts") +counts <- slide.seq[["Spatial"]]$counts coords <- GetTissueCoordinates(slide.seq) colnames(coords) <- c("x", "y") coords[is.na(colnames(coords))] <- NULL @@ -495,7 +499,7 @@ RCTD <- run.RCTD(RCTD, doublet_mode = 'doublet') slide.seq <- AddMetaData(slide.seq, metadata = RCTD@results$results_df) ``` -Next, plot the RCTD annotations. Because we ran RCTD in doublet mode, the algorithm assigns a `first_type` and `second_type` for each barcode or spot +Next, plot the RCTD annotations. Because we ran RCTD in doublet mode, the algorithm assigns a `first_type` and `second_type` for each barcode or spot. ```{r rctd_results, fig.height=8, fig.width=14} p1 <- SpatialDimPlot(slide.seq, group.by = "first_type") From 42b0a7d391198eab2743510871d5bbbc27c8e60c Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Thu, 23 Mar 2023 22:28:12 -0400 Subject: [PATCH 556/979] rm extra paren --- vignettes/seurat5_spatial_vignette_2.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/seurat5_spatial_vignette_2.Rmd b/vignettes/seurat5_spatial_vignette_2.Rmd index a700b6e0a..f25391417 100644 --- a/vignettes/seurat5_spatial_vignette_2.Rmd +++ b/vignettes/seurat5_spatial_vignette_2.Rmd @@ -280,7 +280,7 @@ ImageDimPlot(xenium.obj, cols = "polychrome", size = 0.75) ``` Using the positional information of each cell, we compute spatial niches. -We use a cortex reference from the the Allen Brain Institute to annotate cells, so we first crop the dataset to the cortex. The Allen Brain reference can be installed [here](https://www.dropbox.com/s/cuowvm4vrf65pvq/allen_cortex.rds?dl=1)). +We use a cortex reference from the the Allen Brain Institute to annotate cells, so we first crop the dataset to the cortex. The Allen Brain reference can be installed [here](https://www.dropbox.com/s/cuowvm4vrf65pvq/allen_cortex.rds?dl=1). Below, we use Slc17a7 expression to help determine the cortical region. From c7addd64c9ba612ce6017d351fb8624f48c4941b Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Fri, 24 Mar 2023 01:07:14 -0400 Subject: [PATCH 557/979] update vignettes --- index.md | 2 +- vignettes/COVID_SCTMapping.Rmd | 8 +- vignettes/ParseBio_sketch_integration.Rmd | 130 ++++++++++-------- vignettes/get_started_v5.Rmd | 19 +++ .../seurat5_bpcells_interaction_vignette.Rmd | 10 +- vignettes/seurat5_spatial_vignette.Rmd | 20 +-- vignettes/vignettes_v5.yaml | 47 ++++--- 7 files changed, 137 insertions(+), 99 deletions(-) diff --git a/index.md b/index.md index 95bb47a44..6f2d34486 100644 --- a/index.md +++ b/index.md @@ -14,7 +14,7 @@ We are excited to release an initial beta version of Seurat v5! This update brin - Paper: [Dictionary learning for integrative, multimodal, and scalable single-cell analysis](https://doi.org/10.1101/2022.02.24.481684) - Vignette: [Cross-modality Bridge Integration](articles/seurat5_integration_bridge.html) - - Website: [Azimuth-ATAC, reference-mapping for scATAC-seq datasets](https://satijalab.github.io/azimuth/articles/run_azimuth_tutorial.html)\ + - Website: [Azimuth-ATAC, reference-mapping for scATAC-seq datasets](https://azimuth.hubmapconsortium.org/references/)\ * **Flexible, interactive, and highly scalable analsyis.** The size and scale of single-cell sequencing datasets is rapidly increasing, outpacing even Moore's law. In Seurat v5, we introduce new infrastructure and methods to analyze, interpret, and explore exciting datasets spanning millions of cells, even if they cannot be fully loaded into memory. We introduce support for 'sketch'-based analysis, where representative subsamples of a large dataset are stored in-memory to enable rapid and iterative analysis - while the full dataset remains accessible via on-disk storage.\  We enable high-performance via the BPCells package, developed by Ben Parks in the Greenleaf Lab. The BPCells package enables high-performance analysis via innovative bit-packing compression techniques, optimized C++ code, and use of streamlined and lazy operations. diff --git a/vignettes/COVID_SCTMapping.Rmd b/vignettes/COVID_SCTMapping.Rmd index 0661ff403..dd6cd32a6 100755 --- a/vignettes/COVID_SCTMapping.Rmd +++ b/vignettes/COVID_SCTMapping.Rmd @@ -9,7 +9,7 @@ date: 'Compiled: `r Sys.Date()`' --- -```{r setup, include=TRUE} +```{r setup, include=FALSE} all_times <- list() # store the time for each chunk knitr::knit_hooks$set(time_it = local({ now <- NULL @@ -53,7 +53,7 @@ In this vignette, we demonstrate how to use a previously established reference t * Annotate all query cells from multiple studies based on a set of reference-defined cell states * Aggregate the annotated cells into individuals for performing Pseudo-bulk differential analysis -To run this vignette please install Seurat v5. +To run this vignette please install Seurat v5 as shown [here](articles/install.html). @@ -62,7 +62,7 @@ We load the CITE-seq reference (download [here]()) from our Seurat v4 [paper](ht ```{r load.data} reference <- readRDS("/brahms/hartmana/vignette_data/pbmc_multimodal_2023.rds") -object <- readRDS("/brahms/hartmana/vignette_data/covid_mapping_merged_object.rds") +object <- readRDS("/brahms/hartmana/vignette_data/merged_covid_object.rds") object <- NormalizeData(object, verbose = FALSE) ``` @@ -87,7 +87,7 @@ object <- MapQuery( refdata = list( celltype.l1 = "celltype.l1", celltype.l2 = "celltype.l2" - ), + ), reduction.model = 'wnn.umap' ) ``` diff --git a/vignettes/ParseBio_sketch_integration.Rmd b/vignettes/ParseBio_sketch_integration.Rmd index 90922bf05..79dafaaee 100755 --- a/vignettes/ParseBio_sketch_integration.Rmd +++ b/vignettes/ParseBio_sketch_integration.Rmd @@ -8,7 +8,7 @@ output: date: 'Compiled: `r Sys.Date()`' --- -```{r setup, include=TRUE} +```{r setup, include=FALSE} all_times <- list() # store the time for each chunk knitr::knit_hooks$set(time_it = local({ now <- NULL @@ -32,84 +32,91 @@ knitr::opts_chunk$set( ) ``` -## load package - +The recent increase in publicly available single-cell datasets poses a significant challenge for integrative analysis. For example, multiple tissues have now been profiled across dozens of studies, representing hundreds of individuals and millions of cells. In [Hao et al, 2022](https://www.biorxiv.org/content/10.1101/2022.02.24.481684v1) proposed a dictionary learning based method, atomic sketch integration, could also enable efficient and large-scale integrative analysis. Our procedure enables the integration of large compendiums of datasets without ever needing to load the full scale of data into memory. In [our manuscript](https://www.biorxiv.org/content/10.1101/2022.02.24.481684v1) we use atomic sketch integration to integrate millions of scRNA-seq from human lung and human PBMC. + +In this vignette, we demonstrate how to use atomic sketch integration to harmonize scRNA-seq experiments 1M cells, though we have used this procedure to integrate datasets of 10M+ cells as well. We analyze a dataset from Parse Biosciences, in which PBMC from 24 human samples (12 healthy donors, 12 Type-1 diabetes donors), which is available [here](https://resources.parsebiosciences.com/dataset-wt-mega-one-million-pbmc-type-1-diabetes). +* Sample a representative subset of cells ('atoms') from each dataset +* Integrate the atoms from each dataset, and define a set of cell states +* Reconstruct (integrate) the full datasets, based on the atoms +* Annotate all cells in the full datasets +* Identify cell-type specific differences between healthy and diabetic patients +Prior to running this vignette, please [install Seurat v5](https://satijalab.org/seurat/articles/install.html), as well as the [BPCells](https://github.com/bnprks/BPCells) package, which we use for on-disk storage. You can read more about using BPCells in Seurat v5 [here](https://satijalab.org/seurat/articles/seurat5_bpcells_interaction_vignette.html). We also recommend reading the [Sketch-based analysis in Seurat v5](https://satijalab.org/seurat/articles/seurat5_sketch_analysis.html) vignette, which introduces the concept of on-disk and in-memory storage in Seurat v5. ```{r, warning=F, message=F} library(Seurat) library(BPCells) library(dplyr) +library(ggplot2) +# set this option when analyzing large datasets +options(future.globals.maxSize = 3e9) +options(Seurat.object.assay.version = "v5") ``` - -## load matrix - +## Create a Seurat object containing data from 24 patients +We downloaded the original dataset and donor metadata from [Parse Biosciences](https://resources.parsebiosciences.com/dataset-wt-mega-one-million-pbmc-type-1-diabetes), as an h5ad file. While the BPCells package can work directly with h5ad files, for optimal performance, we converted the dataset to the compressed sparse format used by BPCells, as described [here](LINKCONVERSIONVIGNETTE). +We create a Seurat object for this dataset. Since the input to `CreateSeuratObject` is a BPCells matrix, the data remains on-disk and is not loaded into memory. After creating the object, we split the dataset into 24 [layers](LINKTOVIGNETTE), one for each sample (i.e. patient), to facilitate integration. ```{r, warning=F, message=F} object <- readRDS("/brahms/hartmana/vignette_data/parse_1m_pbmc.rds") -object <- NormalizeData(object, verbose = FALSE) -options(Seurat.object.assay.calcn = FALSE) -object[['RNA']] <- split(object[['RNA']], f = object$sample) -object <- FindVariableFeatures(object, layer = 'counts', verbose = FALSE) -object <- SketchData(object = object, ncells = 50000, method = 'LeverageScore', sketched.assay = 'sketch') +object <- NormalizeData(object) +# split assay into 24 layers +object[['RNA']] <- split(object[['RNA']], f = object$sample) +object <- FindVariableFeatures(object, verbose = FALSE) +``` +## Sample representative cells from each dataset +Inspired by pioneering work aiming to identify ['sketches'](https://www.sciencedirect.com/science/article/pii/S2405471219301528) of scRNA-seq data, our first step is to sample a representative set of cells from each dataset. We compute a leverage score (estimate of ['statistical leverage'](https://arxiv.org/abs/1109.3843)) for each cell, which helps to identify cells that are likely to be member of rare subpopulations and ensure that these are included in our representative sample. Importantly, the estimation of leverage scores only requires data normalization, can be computed efficiently for sparse datasets, and does not require any intensive computation or dimensional reduction steps. +We load each object separately, perform basic preprocessing (normalization and variable feature selection), and select and store 5,000 representative cells from each dataset. Since there are 24 datasets, the sketched dataset now contains 120,000 cells. These cells are stored in a new `sketch` assay, and are loaded in-memory. +```{r, warning=F, message=F} +object <- SketchData(object = object, ncells = 5000, method = 'LeverageScore', sketched.assay = 'sketch') +object ``` -## integrate sketched assay +## Perform integration on the sketched cells across samples +Next we perform integrative analysis on the 'atoms' from each of the datasets. Here, we perform integration using the streamlined [Seurat v5 integration worfklow](https://satijalab.org/seurat/articles/seurat5_integration.html), and utilize the reference-based `RPCAIntegration` method. The function performs all corrections in low-dimensional space (rather than on the expression values themselves) to further improve speed and memory usage, and outputs a merged Seurat object where all cells have been placed in an integrated low-dimensional space (stored as `integrated.rpca`). +However, we emphasize that you can perform integration here using any analysis technique that places cells across datasets into a shared space. This includes CCA Integration, Harmony, and scVI. We demonstrate how to use these tools in Seurat v5 [here](https://satijalab.org/seurat/articles/seurat5_integration.html). ```{r} - -time5_SketchIntegration <- system.time({ - DefaultAssay(object) <- 'sketch' - object <- FindVariableFeatures(object, verbose = F, nfeatures = 2000) - features <- VariableFeatures(object) - object <- ScaleData(object, features = features, verbose = F) - object <- RunPCA(object, features = features, verbose = F) - DefaultAssay(object) <- 'sketch' - options(future.globals.maxSize = 3e9) - object <- IntegrateLayers(object, - method = RPCAIntegration, - orig = 'pca', - new.reduction = 'integrated.rpca', - dims = 1:30, - k.anchor = 20, +DefaultAssay(object) <- 'sketch' +object <- FindVariableFeatures(object, verbose = F) +object <- ScaleData(object, verbose = F) +object <- RunPCA(object, verbose = F) +# integrate the datasets +object <- IntegrateLayers(object, method = RPCAIntegration, orig = 'pca', + new.reduction = 'integrated.rpca', dims = 1:30, k.anchor = 20, reference = which(Layers(object, search = 'data') %in% c( 'data.H_3060')), verbose = F) - -}) +# cluster the integrated data +object <- FindNeighbors(object, reduction = 'integrated.rpca', dims = 1:30) +object <- FindClusters(object, resolution = 2) object <- RunUMAP(object, reduction = 'integrated.rpca', dims = 1:30, return.model = T, verbose = F) +# You can now annotate clusters using marker genes. +# We performed this step, and include the results in the 'sketch.celltype' metadata column +# NOTE change celltype.weight -> sketch.celltype in metadata plot.s1 <- DimPlot(object, group.by = 'sample', reduction = 'umap') + NoLegend() plot.s2 <- DimPlot(object, group.by = 'celltype.weight', reduction = 'umap') + NoLegend() ``` + ```{r} plot.s1 + plot.s2 - ``` - -## proporgate embeddings to full data +## Integrate the full datasets +Now that we have integrated the subset of atoms of each dataset, placing them each in an integrated low-dimensional space, we can now place each cell from each dataset in this space as well. We load the full datasets back in individually, and use the `ProjectIntegration` function to integrate all cells. After this function is run, the `integrated.rpca.full` space now embeds all cells in the dataset.Even though all cells in the dataset have been integrated together, the non-sketched cells are not loaded into memory. Users can still switch between the `sketch` (sketched cells, in-memory) and `RNA` (full dataset, on disk) for analysis. ```{r} -time6_UnSketch <- system.time({ - object <- IntegrateSketchEmbeddings(object = object, - atoms = 'sketch', - orig = 'RNA', - reduction = 'integrated.rpca' , - layers = Layers(object = object[['RNA']], search = 'data'), - features = features) - -}) - - +object <- ProjectIntegration(object = object, + sketched.assay = 'sketch', + assay = 'RNA', + reduction = 'integrated.rpca') ``` ```{r} -object <- RunUMAP(object, reduction = 'integrated.rpca.orig', dims = 1:30 , reduction.name = 'umap.orig', reduction.key = 'Uorig_') +object <- RunUMAP(object, reduction = 'integrated.rpca.full', dims = 1:30 , reduction.name = 'umap.full', reduction.key = 'UMAP_full_') ``` -```{r} -p1<- DimPlot(object, reduction = 'umap.orig', group.by = 'sample',alpha = 0.5) + NoLegend() -p2<- DimPlot(object, reduction = 'umap.orig', group.by = 'celltype.weight', label = T, alpha = 0.5) + NoLegend() -p1+p2 +```{r} +p1 <- DimPlot(object, reduction = 'umap.full', group.by = 'sample',alpha = 0.5) + NoLegend() +p2 <- DimPlot(object, reduction = 'umap.full', group.by = 'celltype.weight', label = T, alpha = 0.5) + NoLegend() +p1 + p2 ``` ## pseudo-bulk ```{r} - time8_bulk <- system.time( bulk <- AggregateExpression(object, return.seurat = T, slot = 'counts', @@ -117,11 +124,9 @@ time8_bulk <- system.time( bulk <- AggregateExpression(object, group.by = c("celltype.weight","sample","disease") ) ) - bulk$celltype <- sapply(strsplit(Cells(bulk), split = "_"), '[', 1) bulk$donor <- sapply(strsplit(Cells(bulk), split = "_"), '[', 2) bulk$disease <- sapply(strsplit(Cells(bulk), split = "_"), '[', 3) - marker.list <- list() celltype.set <- unique(bulk$celltype ) for (i in seq_along(celltype.set)) { @@ -135,7 +140,6 @@ for (i in seq_along(celltype.set)) { } names(marker.list) <- celltype.set ``` - ```{r} marker.list.filter <- lapply(marker.list, function(x) { if(nrow(x) > 0) { @@ -145,13 +149,25 @@ marker.list.filter <- lapply(marker.list, function(x) { return(x) } }) - ``` - ```{r} VlnPlot(bulk, features = 'FOXO3', group.by = 'celltype', split.by = 'disease') ``` +## computing time summary +```{r} +all_T <- ls(pattern = 'time') +overall <- sum(sapply(all_T, function(x) round(get(x)['elapsed'], digits = 3)))/60 +for (i in 1:length(all_T)) { + T_i <- get(all_T[i])['elapsed'] + if (T_i > 60) { + print(paste(all_T[i], round(T_i/60, digits = 1), 'mins')) + } else { + print(paste(all_T[i], round(T_i, digits = 1), 'secs')) + } +} +print(paste('Total time ', round(overall, digits = 1), 'mins' )) +``` ```{r,fig.height = 20, fig.width = 15} Idents(bulk) <- 'celltype' marker <- FindAllMarkers(object = bulk, only.pos = TRUE, verbose = FALSE) @@ -162,12 +178,6 @@ bulk <- ScaleData(bulk, features = top5$gene) DoHeatmap(bulk, features = top5$gene) + NoLegend() ``` -```{r save.img, include=TRUE} -library(ggplot2) -plot <- DoHeatmap(bulk, features = top5$gene) + NoLegend() + theme(axis.text.y = element_blank()) -ggsave(filename = "../output/images/ParseBio_sketch_integration.jpg", height = 7, width = 7, plot = plot, quality = 50) -``` - ```{r save.times, include=TRUE} print(as.data.frame(all_times)) write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/ParseBio_sketch_integration.csv") diff --git a/vignettes/get_started_v5.Rmd b/vignettes/get_started_v5.Rmd index 28a65e1f1..7f18dc10a 100644 --- a/vignettes/get_started_v5.Rmd +++ b/vignettes/get_started_v5.Rmd @@ -119,6 +119,25 @@ Below we demonstrate new functionality introduced in Seurat 5 including: * Spatial infrastructure to analyze public datasets from multiple technologies * Integration using CCA, RPCA, scVI, and Harmony in a common framework +## Large Dataset Analysis + + ```{r results='asis', echo=FALSE, warning=FALSE, message = FALSE} make_vignette_card_section(vdat = vdat, cat = 2) ``` + +## Integration + +Perform integration across conditions using Seurat-based and community methods. + +```{r results='asis', echo=FALSE, warning=FALSE, message = FALSE} +make_vignette_card_section(vdat = vdat, cat = 3) +``` + +## Spatial Analysis + +Use Seurat to analyze data from imaging-based and sequencing-based spatial technologies. + +```{r results='asis', echo=FALSE, warning=FALSE, message = FALSE} +make_vignette_card_section(vdat = vdat, cat = 4) +``` diff --git a/vignettes/seurat5_bpcells_interaction_vignette.Rmd b/vignettes/seurat5_bpcells_interaction_vignette.Rmd index e5445ecad..72874e8c0 100644 --- a/vignettes/seurat5_bpcells_interaction_vignette.Rmd +++ b/vignettes/seurat5_bpcells_interaction_vignette.Rmd @@ -158,7 +158,7 @@ merged.object <- CreateSeuratObject(counts = data.list, meta.data = metadata) merged.object ``` -```{r save_merged} +```{r save_merged, eval=FALSE} saveRDS(merged.object, file = "obj.Rds", destdir = "/brahms/hartmana/vignette_data/bpcells/merged_object") ``` @@ -168,7 +168,13 @@ Here, we show how to load a 1 million cell data set from Parse Biosciences and c ```{r} parse.data <- open_matrix_anndata_hdf5("/brahms/hartmana/vignette_data/h5ad_files/ParseBio_PBMC.h5ad") +``` + +```{r, eval=FALSE} write_matrix_dir(mat = parse.data, dir = "/brahms/hartmana/vignette_data/bpcells/parse_1m_pbmc") +``` + +```{r} parse.mat <- open_matrix_dir(dir = "/brahms/hartmana/vignette_data/bpcells/parse_1m_pbmc") metadata <- readRDS("/brahms/hartmana/vignette_data/ParseBio_PBMC_meta.rds") @@ -177,7 +183,7 @@ metadata$disease <- sapply(strsplit(x = metadata$sample, split = "_"), "[", 1) parse.object <- CreateSeuratObject(counts = parse.mat, meta.data = metadata) ``` -```{r save_parse} +```{r save_parse, eval=FALSE} saveRDS(parse.object, file = "obj.Rds", destdir = "/brahms/hartmana/vignette_data/bpcells/parse_object") ``` diff --git a/vignettes/seurat5_spatial_vignette.Rmd b/vignettes/seurat5_spatial_vignette.Rmd index 28a1f8e20..a0d88d5ab 100644 --- a/vignettes/seurat5_spatial_vignette.Rmd +++ b/vignettes/seurat5_spatial_vignette.Rmd @@ -377,7 +377,6 @@ InstallData("ssHippo") ```{r data.ss} slide.seq <- LoadData('ssHippo') -slide.seq[['Spatial']] <- as(slide.seq[['Spatial']], Class = 'Assay5') ``` ## Data preprocessing @@ -394,7 +393,7 @@ wrap_plots(plot1, plot2) We then normalize the data using [sctransform](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1874-1) and perform a standard scRNA-seq dimensionality reduction and clustering workflow. ```{r preprocess.ss} -slide.seq <- SCTransform(slide.seq, assay = "Spatial", ncells = 3000, verbose = FALSE) +slide.seq <- SCTransform(slide.seq, assay = "Spatial", ncells = 3000, n_genes=nrow(slide.seq[["Spatial"]]), verbose = FALSE) slide.seq <- RunPCA(slide.seq, assay = "SCT") slide.seq <- RunUMAP(slide.seq, dims = 1:30) slide.seq <- FindNeighbors(slide.seq, dims = 1:30) @@ -459,9 +458,12 @@ Now we visualize the expression of the top 6 features identified by Moran's I. SpatialFeaturePlot(slide.seq, features = head(SpatiallyVariableFeatures(slide.seq, selection.method = "moransi"), 6), ncol = 3, alpha = c(0.1, 1), max.cutoff = "q95") ``` -We can also use RCTD for annotation which is published [here](https://doi.org/10.1038/s41587-021-00830-w). RCTD is able to decompose cell mixtures to accurately annotate spatial datasets. -First, we install the `spacexr` package from GitHub which implements RCTD. +## Spatial deconvolution using RCTD + +While `FindTransferAnchors` can be used to integrate spot-level data from spatial transcriptomic datasets, Seurat v5 also includes support for the [Robust Cell Type Decomposition](https://www.nature.com/articles/s41587-021-00830-w), a computational approach to deconvolve spot-level data from spatial datasets, when provided with an scRNA-seq reference. RCTD has been shown to accurately annotate spatial data from a variety of technologies, including SLIDE-seq, Visium, and the 10x Xenium in-situ spatial platform. + +To run RCTD, we first install the `spacexr` package from GitHub which implements RCTD. ```{r, eval=FALSE} devtools::install_github("dmcable/spacexr", build_vignettes = FALSE) @@ -476,16 +478,18 @@ library(spacexr) ref <- readRDS("../data/mouse_hippocampus_reference.rds") ref <- UpdateSeuratObject(ref) Idents(ref) <- "celltype" -counts <- LayerData(ref, assay = "RNA", layer = "counts") + +# extract information to pass to the RCTD Reference function +counts <- ref[["RNA"]]$counts cluster <- as.factor(ref$celltype) names(cluster) <- colnames(ref) nUMI <- ref$nCount_RNA names(nUMI) <- colnames(ref) reference <- Reference(counts, cluster, nUMI) -# set up query +# set up query with the RCTD function SpatialRNA slide.seq <- SeuratData::LoadData("ssHippo") -counts <- LayerData(slide.seq, assay = "Spatial", layer = "counts") +counts <- slide.seq[["Spatial"]]$counts coords <- GetTissueCoordinates(slide.seq) colnames(coords) <- c("x", "y") coords[is.na(colnames(coords))] <- NULL @@ -500,7 +504,7 @@ RCTD <- run.RCTD(RCTD, doublet_mode = 'doublet') slide.seq <- AddMetaData(slide.seq, metadata = RCTD@results$results_df) ``` -Next, plot the RCTD annotations. Because we ran RCTD in doublet mode, the algorithm assigns a `first_type` and `second_type` for each barcode or spot +Next, plot the RCTD annotations. Because we ran RCTD in doublet mode, the algorithm assigns a `first_type` and `second_type` for each barcode or spot. ```{r rctd_results, fig.height=8, fig.width=14} p1 <- SpatialDimPlot(slide.seq, group.by = "first_type") diff --git a/vignettes/vignettes_v5.yaml b/vignettes/vignettes_v5.yaml index 7bbb7fda8..c5ba1e16e 100644 --- a/vignettes/vignettes_v5.yaml +++ b/vignettes/vignettes_v5.yaml @@ -1,15 +1,9 @@ -- category: Introduction to Seurat 5 +- category: Introduction vignettes: - - title: Install Seurat 5 - name: install_seurat5 - summary: | - Install Seurat 5 and suggested dependencies. - image: SeuratV5.png - - title: Seurat 5 Assay name: seurat5_assay summary: | - Explore the new assay structure introduced in Seurat 5. + Explore the new assay structure introduced in Seurat v5. image: assay.png - title: BPCells Interaction @@ -18,13 +12,8 @@ Load and save large on-disk matrices using BPCells. image: bpcells.png -- category: Analysis in Seurat 5 +- category: Large dataset analysis vignettes: - - title: Integration - name: seurat5_integration - summary: | - Integrate datasets in Seurat 5 using a variety of methods. - image: integration_seurat5.jpg - title: COVID Mapping name: COVID_SCTMapping @@ -32,18 +21,20 @@ Map PBMC datasets from COVID-19 patients to a healthy PBMC reference. image: COVID_SCTMapping.png - - title: Analysis of spatial datasets (Imaging-based) - name: seurat5_spatial_vignette_2 - summary: | - Learn to explore spatially-resolved data from multiplexed imaging technologies, including MERSCOPE, Xenium, CosMx SMI, and CODEX. - image: spatial_vignette_2.jpg - - title: Sketch Clustering name: seurat5_sketch_analysis summary: | Analyze a 1.3 million cell mouse brain dataset using on-disk capabilities powered by BPCells. image: sketch.png +- category: Integration + vignettes: + - title: Integration + name: seurat5_integration + summary: | + Integrate scRNA-seq datasets using a variety of computational methods. + image: integration_seurat5.jpg + - title: Sketch Integration name: ParseBio_sketch_integration summary: | @@ -56,8 +47,16 @@ Map scATAC-seq onto an scRNA-seq reference using a multi-omic bridge dataset. image: bridge_integration.png - - title: Run Azimuth - name: seurat5_run_azimuth +- category: Spatial + vignettes: + - title: Analysis of spatial datasets (Imaging-based) + name: seurat5_spatial_vignette_2 + summary: | + Learn to explore spatially-resolved data from multiplexed imaging technologies, including MERSCOPE, Xenium, CosMx SMI, and CODEX. + image: spatial_vignette_2.jpg + + - title: Analysis of spatial datasets (Sequencing-based) + name: spatial_vignette summary: | - Annotate cells locally using Azimuth. - image: azimuth.png + Learn to explore spatially-resolved transcriptomic data with examples from 10x Visium and Slide-seq v2. + image: spatial_vignette_ttr.jpg From cf8d1bf1f0a323fb23cdb2b9e1ed00037069d38b Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Fri, 24 Mar 2023 01:08:48 -0400 Subject: [PATCH 558/979] fix plot sizes --- vignettes/seurat5_sketch_analysis.Rmd | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/vignettes/seurat5_sketch_analysis.Rmd b/vignettes/seurat5_sketch_analysis.Rmd index 5b842b982..815fc7270 100644 --- a/vignettes/seurat5_sketch_analysis.Rmd +++ b/vignettes/seurat5_sketch_analysis.Rmd @@ -45,7 +45,7 @@ We store sketched cells (in-memory) and the full dataset (on-disk) as two assays * Support for 'bit-packing' compression and infrastructure We demonstrate the on-disk capabilities in Seurat v5 using the [BPCells package](https://github.com/bnprks/BPCells) developed by Ben Parks in the Greenleaf Lab. This package utilizes bit-packing compression and optimized, streaming-compatible C++ code to substantially improve I/O and computational performance when working with on-disk data. -To run this vignette please install Seurat v5, using the installation instructions found [here](LINK). Additionally, you will need to install the `BPcells` package, using the installation instructions found [here](LINK). +To run this vignette please install Seurat v5, using the installation instructions found [here](https://satijalab.org/seurat/articles/install.html). Additionally, you will need to install the `BPcells` package, using the installation instructions found [here](https://bnprks.github.io/BPCells/#installation). ```{r, warning=FALSE, message=FALSE} library(Seurat) library(BPCells) @@ -126,8 +126,11 @@ p <- DimPlot(obj, label = T, label.size=8, reduction = "ref.umap", group.by = "c ggsave(filename = "../output/images/MouseBrain_sketch_clustering.jpg", height = 7, width = 7, plot = p, quality = 50) ``` -```{r} -DimPlot(obj, label = T, reduction = 'ref.umap', group.by = 'cluster_full', alpha = 0.1) + NoLegend() +```{r, fig.width=5, fig.height=5} +DimPlot(obj, label = T, reduction = 'ref.umap', group.by = 'cluster_full', alpha = 0.1) + NoLegend() +``` + +```{r, fig.width=10, fig.height=5} # visualize gene expression on the sketched cells (fast) and the full dataset (slower) DefaultAssay(obj) <- 'sketch' x1 <- FeaturePlot(obj, 'C1qa') @@ -137,7 +140,7 @@ x1 | x2 ``` ## Perform iterative sub-clustering -NOTE THAT THE CLUSTER IDS WILL CHANGE + Now that we have performed an initial analysis of the dataset, we can iteratively 'zoom-in' on a cell subtype of interest, extract all cells of this type, and perform iterative sub-clustering. For example, we can see that Dlx2+ interneuron precursors are defined by clusters 2, 15, 18, 28 and 40. ```{r} @@ -161,14 +164,14 @@ obj.sub <- FindNeighbors(obj.sub, dims = 1:30) obj.sub <- FindClusters(obj.sub) ``` -```{r} +```{r, fig.width=5, fig.height=5} DimPlot(obj.sub, label = T) + NoLegend() ``` Note that we can start to see distinct interneuron lineages emerging in this dataset. We can see a clear separation of interneuron precursors that originated from the medial ganglionic eminence (Lhx6) or caudal ganglionic eminence (Nr2f2). We can further see the emergence of Sst (Sst) and Pvalb (Mef2c)-committed interneurons, and a CGE-derived Meis2-expressing progenitor population. These results closely mirror our findings from [Mayer*, Hafemeister*, Bandler* et al, Nature 2018](https://www.nature.com/articles/nature25999), where we enriched for interneuron precursors using a Dlx6a-cre fate-mapping strategy. Here, we obtain similar results using only computational enrichment, enabled by the large size of the original dataset. -```{r save.times, include=FALSE} +```{r save.times, include=FALSE, eval=FALSE} print(as.data.frame(all_times)) write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/seurat5_sketch_analysis.csv") ``` From a95955438f4c2b82f8b9fd04381c4d53cb9547ea Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Fri, 24 Mar 2023 09:11:51 -0400 Subject: [PATCH 559/979] update nstart in BuildNicheAssay clustering step --- R/utilities.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/utilities.R b/R/utilities.R index 86b086d1c..7acb49356 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -2872,7 +2872,7 @@ BuildNicheAssay <- function( results <- kmeans( x = t(object[[assay]]@scale.data), centers = niches.k, - iter.max = 100 + nstart = 30 ) object$niches <- results[["cluster"]] From 104110447648672a58ff51493686391ee206ab0d Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 24 Mar 2023 13:03:43 -0400 Subject: [PATCH 560/979] update covid --- vignettes/COVID_SCTMapping.Rmd | 77 +++++++++++++++------------------- 1 file changed, 33 insertions(+), 44 deletions(-) diff --git a/vignettes/COVID_SCTMapping.Rmd b/vignettes/COVID_SCTMapping.Rmd index dd6cd32a6..e1b186da1 100755 --- a/vignettes/COVID_SCTMapping.Rmd +++ b/vignettes/COVID_SCTMapping.Rmd @@ -37,6 +37,7 @@ knitr::opts_chunk$set( library(Seurat) library(BPCells) library(dplyr) +library(ggplot2) options(future.globals.maxSize = 1e9) ``` @@ -44,7 +45,7 @@ options(future.globals.maxSize = 1e9) ## Introduction: Reference mapping analysis in Seurat v5 In numerous studies profiling multiple tissues across hundreds of individuals and millions of cells, the single-cell reference mapping approach offers a robust and consistent method for annotating all those publicly available single-cell datasets. This vignette demonstrates the process of mapping multiple query scRNA PBMC datasets onto [our annotated CITE-seq reference of 162,000 PBMC measured with 228 antibodies](https://doi.org/10.1016/j.cell.2021.04.048) -We download three datasets from [CZI cellxgene collections](https://cellxgene.cziscience.com/collections):[Ahern et al. (2022) Nature](https://cellxgene.cziscience.com/collections/8f126edf-5405-4731-8374-b5ce11f53e82), [Jin et al. (2021) Science](https://cellxgene.cziscience.com/collections/b9fc3d70-5a72-4479-a046-c2cc1ab19efc), and [Yoshida et al. (2022) Nature](https://cellxgene.cziscience.com/collections/03f821b4-87be-4ff4-b65a-b5fc00061da7). We demonstrate how to create a Seurat object with BPCells matrix in our BPCells interaction vignette. +We download three datasets (1,498,064 cells and 277 donors) from [CZI cellxgene collections](https://cellxgene.cziscience.com/collections):[Ahern et al. (2022) Nature](https://cellxgene.cziscience.com/collections/8f126edf-5405-4731-8374-b5ce11f53e82), [Jin et al. (2021) Science](https://cellxgene.cziscience.com/collections/b9fc3d70-5a72-4479-a046-c2cc1ab19efc), and [Yoshida et al. (2022) Nature](https://cellxgene.cziscience.com/collections/03f821b4-87be-4ff4-b65a-b5fc00061da7). We demonstrate how to create a Seurat object with BPCells matrix in our BPCells interaction vignette. We have previously demonstrated the use of the reference-mapping approach for annotating cell labels in a single query dataset. With Seurat v5, we have substantially improved the speed, memory efficiency, and user-friendliness for mapping a large number of query datasets to the same reference. @@ -64,13 +65,12 @@ We load the CITE-seq reference (download [here]()) from our Seurat v4 [paper](ht reference <- readRDS("/brahms/hartmana/vignette_data/pbmc_multimodal_2023.rds") object <- readRDS("/brahms/hartmana/vignette_data/merged_covid_object.rds") object <- NormalizeData(object, verbose = FALSE) + ``` ## Mapping -Using the same code from in our v4 reference mapping vignette, we find anchors between reference and query in the reference precomputed supervised PCA (spca) space. We recommend the use of supervised PCA for CITE-seq reference dataset, and demonstrate how to compute this transformation in [v4 mapping vignette](). - -We then transfer cell type labels from the reference to all three queries. Additionally, we project the query data onto the UMAP structure of the reference. +Using the same code from in our v4 reference mapping vignette, we find anchors between reference and query in the reference precomputed supervised PCA (spca) space. We recommend the use of supervised PCA for CITE-seq reference dataset, and demonstrate how to compute this transformation in [v4 mapping vignette](). The difference from v4 reference mapping vignette is that we map three dataset together @@ -97,66 +97,55 @@ We can now visualize the 2,700 query cells. They have been projected into a UMAP ```{r, fig.width=10, fig.height=6} -p1<- DimPlot(object, reduction = 'ref.umap', group.by = 'predicted.celltype.l2',alpha = 0.8, label = T) + NoLegend() -p2<- DimPlot(object, reduction = 'ref.umap', group.by = 'cell_type',alpha = 0.8, label = T) + NoLegend() -p1 + p2 +DimPlot(object, reduction = 'ref.umap', group.by = 'cell_type',alpha = 0.1, label = TRUE, split.by = 'publication', ncol = 3) + NoLegend() ``` ```{r, fig.width=10, fig.height=6} -p3 <- DimPlot(object, reduction = 'ref.umap', group.by = 'cell_type',alpha = 0.8, label = T, split.by = 'batch', ncol = 3) + NoLegend() -p3 <- DimPlot(object, reduction = 'ref.umap', group.by = 'predicted.celltype.l2',alpha = 0.8, label = T, split.by = 'batch', ncol = 3) + NoLegend() -p3 +DimPlot(object, reduction = 'ref.umap', group.by = 'predicted.celltype.l2',alpha = 0.1, label = TRUE, split.by = 'publication', ncol = 3) + NoLegend() ``` - -## Differential analysis + +## Differential composition analysis +```{r} +df_comp <- as.data.frame.matrix(table(object$donor_id, object$predicted.celltype.l2)) +df_comp <- df_comp[rowSums(df_comp)> 50,] +df_comp_ratio <- sweep(x = df_comp, MARGIN = 1, STATS = rowSums(df_comp), FUN = '/') + +df_disease <- as.data.frame.matrix(table(object$donor_id, object$disease)) +df_disease$disease <- 'other' +df_disease$disease[df_disease$normal!=0] <- 'normal' +df_disease$disease[df_disease$`COVID-19`!=0] <- 'COVID-19' +df_disease$disease <- factor(df_disease$disease, levels = c('normal','COVID-19','other')) + +obj.comp <- CreateSeuratObject(counts = t(df_comp_ratio), meta.data = df_disease) +obj.comp <- subset(obj.comp, subset = disease %in% c('normal','COVID-19')) +``` +```{r, fig.width=10, fig.height=6} +VlnPlot(obj.comp, features = c("MAIT",'Plasmablast'), group.by = 'disease', slot = 'counts', cols = c("#377eb8", "#e41a1c")) + ylab('relative abundance') + +``` +## Differential expression analysis ```{r} bulk <- AverageExpression(object, method = 'aggregate', return.seurat = TRUE, slot = 'counts', assays = 'RNA', - group.by = c("predicted.l2.s5", "patient", "disease_status_standard") + group.by = c("predicted.celltype.l2", "donor_id", "disease") ) bulk$celltype <- sapply(strsplit(Cells(bulk), split = "_"), '[', 1) bulk$donor <- sapply(strsplit(Cells(bulk), split = "_"), '[', 2) bulk$disease <- sapply(strsplit(Cells(bulk), split = "_"), '[', 3) -bulk <- subset(bulk, subset = disease != 'other') -``` - -```{r} -marker.list <- list() -celltype.set <- unique(bulk$celltype ) -for (i in seq_along(celltype.set)) { - bulk.i <- subset(bulk, subset = celltype == celltype.set[i]) - Idents(bulk.i) <- 'disease' - if (any(table(bulk.i$disease) < 3)) { - marker.list[[i]] <- EmptyDF(n = 0) - } else { - marker.list[[i]] <- FindMarkers(bulk.i, ident.1 = 'COVID-19',ident.2 = 'healthy', slot = 'counts', test.use = 'DESeq2', verbose = F ) - } - -} -names(marker.list) <- celltype.set ``` -```{r} -marker.list.filter <- lapply(marker.list, function(x) { - if(nrow(x) > 0) { - x <- x[x$p_val_adj < 0.01 & !is.na(x$p_val_adj ),] - } - if (nrow(x) > 0) { - return(x) - } -}) - -``` ```{r} -bulk$disease <- factor(bulk$disease, levels = c('healthy', 'COVID-19')) +bulk <- subset(bulk, subset = disease %in% c('normal', 'COVID-19') ) +bulk <- subset(bulk, subset = celltype != c('Doublet') ) +bulk$disease <- factor(bulk$disease, levels = c('normal', 'COVID-19')) ``` -```{r, fig.width=10, fig.height=5} -VlnPlot(bulk, features = 'MX1', group.by = 'celltype', split.by = 'disease', cols = c("#377eb8", "#e41a1c")) +```{r, fig.width=10, fig.height=8} +VlnPlot(bulk, features = c('IFI6','ISG15', 'IFIT5'), group.by = 'celltype', split.by = 'disease', cols = c("#377eb8", "#e41a1c"), ncol = 1) ``` ```{r save.img, include=TRUE} From 5635e9d091894e4bf116e4588b915e337b14a695 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Fri, 24 Mar 2023 13:24:44 -0400 Subject: [PATCH 561/979] adding as() to bpcells vignette --- vignettes/seurat5_bpcells_interaction_vignette.Rmd | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/vignettes/seurat5_bpcells_interaction_vignette.Rmd b/vignettes/seurat5_bpcells_interaction_vignette.Rmd index 72874e8c0..02acacb02 100644 --- a/vignettes/seurat5_bpcells_interaction_vignette.Rmd +++ b/vignettes/seurat5_bpcells_interaction_vignette.Rmd @@ -111,6 +111,13 @@ saveRDS(brain, file = "obj.Rds", destdir = "/brahms/hartmana/vignette_data/bpcel ``` +If needed, a layer with an on-disk matrix can be converted to an in-memory matrix using the `as()` function. For the purposes of this demo, we'll subset the object so that it takes up less space in memory. +```{r} +brain <- subset(brain, downsample = 1000) +brain[["RNA"]]$counts <- as(object = brain[["RNA"]]$counts, Class = "dgCMatrix") +``` + + ## Load data from multiple h5ad files You can also download data from multiple matrices. In this section, we create a Seurat object using multiple peripheral blood mononuclear cell (PBMC) samples that are freely available for downlaod from CZI [here](https://cellxgene.cziscience.com/collections). We download data from [Ahern et al. (2022) Nature](https://cellxgene.cziscience.com/collections/8f126edf-5405-4731-8374-b5ce11f53e82), [Jin et al. (2021) Science](https://cellxgene.cziscience.com/collections/b9fc3d70-5a72-4479-a046-c2cc1ab19efc), and [Yoshida et al. (2022) Nature](https://cellxgene.cziscience.com/collections/03f821b4-87be-4ff4-b65a-b5fc00061da7). We use the BPCells function to read h5ad files. From 6b6df324d57c660c10fd5caa19f462026eaa6da7 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 24 Mar 2023 13:43:24 -0400 Subject: [PATCH 562/979] covid update --- vignettes/COVID_SCTMapping.Rmd | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/vignettes/COVID_SCTMapping.Rmd b/vignettes/COVID_SCTMapping.Rmd index e1b186da1..e5162ca2a 100755 --- a/vignettes/COVID_SCTMapping.Rmd +++ b/vignettes/COVID_SCTMapping.Rmd @@ -70,9 +70,7 @@ object <- NormalizeData(object, verbose = FALSE) ## Mapping -Using the same code from in our v4 reference mapping vignette, we find anchors between reference and query in the reference precomputed supervised PCA (spca) space. We recommend the use of supervised PCA for CITE-seq reference dataset, and demonstrate how to compute this transformation in [v4 mapping vignette](). The difference from v4 reference mapping vignette is that we map three dataset together - - +Using the same code from in our v4 reference mapping vignette, we find anchors between reference and query in the reference precomputed supervised PCA (spca) space. We recommend the use of supervised PCA for CITE-seq reference dataset, and demonstrate how to compute this transformation in [v4 mapping vignette](). The distinction between the v4 reference mapping vignette and this method is that we simultaneously map three datasets calling of `FindTransferAnchors` and `MapQuery` once. ```{r} anchor <- FindTransferAnchors(reference = reference, @@ -93,7 +91,8 @@ object <- MapQuery( ``` ## Explore the mapping results -We can now visualize the 2,700 query cells. They have been projected into a UMAP visualization defined by the reference, and each has received annotations at two levels of granularity (level 1, and level 2). + +We can now visualize the 1.5 million query cells from three studies, which have been projected into a UMAP visualization defined by the reference. Each cell has received consistent annotations at two levels of granularity (level 1 and level 2). The incoherence of their original annotations (cell_type) prevents us from directly performing integrative analysis across these studies. However, our mapping results annotate the various datasets using a single set of curated annotations (predicted.celltype.l1 and predicted.celltype.l2), enabling cross-study differential compositional and transcriptome analyses. ```{r, fig.width=10, fig.height=6} @@ -104,6 +103,8 @@ DimPlot(object, reduction = 'ref.umap', group.by = 'predicted.celltype.l2',alpha ``` ## Differential composition analysis + + ```{r} df_comp <- as.data.frame.matrix(table(object$donor_id, object$predicted.celltype.l2)) df_comp <- df_comp[rowSums(df_comp)> 50,] @@ -123,6 +124,7 @@ VlnPlot(obj.comp, features = c("MAIT",'Plasmablast'), group.by = 'disease', slo ``` ## Differential expression analysis + ```{r} bulk <- AverageExpression(object, method = 'aggregate', From 0d9e5471b764d677f460217fd0cd86b0f66b1a71 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 24 Mar 2023 13:48:12 -0400 Subject: [PATCH 563/979] update integration vignette --- vignettes/seurat5_integration.Rmd | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/vignettes/seurat5_integration.Rmd b/vignettes/seurat5_integration.Rmd index c83fde1f9..90453399b 100644 --- a/vignettes/seurat5_integration.Rmd +++ b/vignettes/seurat5_integration.Rmd @@ -81,6 +81,9 @@ obj <- RunUMAP(obj, dims = 1:30, reduction = 'pca', reduction.name = 'umap.unint # cell type annotations were previously added by Azimuth DimPlot(obj, reduction = 'umap.unintegrated', group.by=c('Method','predicted.celltype.l2')) ``` + +## Perform streamlined (one-line) integrative analysis + Seurat v5 enables streamlined integrative analysis using the `IntegrateLayers` function. The method currently supports five integration methods. Each of these methods performs integration in low-dimensional space, and returns a dimensional reduction (i.e. `integrated.rpca`) that aims to co-embed shared cell types across batches: * Anchor-based CCA integration (`method=CCAIntegration`) @@ -89,7 +92,10 @@ Seurat v5 enables streamlined integrative analysis using the `IntegrateLayers` f * FastMNN (`method= FastMNNIntegration`) * scVI (`method=scVIIntegration`) -Note that scVI integration requires `reticulate` which can be installed from CRAN (`install.packages("reticulate")`) as well as `scvi-tools` and its dependencies installed in a conda environment. Please see scVI installation instructions [here](https://docs.scvi-tools.org/en/stable/installation.html). +Note that you can find more detail on each method, and any installation prerequisites, in Seurat's documentation (for example, `?scVIIntegration`). For example, scVI integration requires `reticulate` which can be installed from CRAN (`install.packages("reticulate")`) as well as `scvi-tools` and its dependencies installed in a conda environment. Please see scVI installation instructions [here](https://docs.scvi-tools.org/en/stable/installation.html). + +Each of the following lines perform a new integration using a single line of code: + ```{r integratelayerscca} obj <- IntegrateLayers(object = obj, method = CCAIntegration, verbose = F, new.reduction = 'integrated.cca') ``` @@ -118,6 +124,7 @@ obj[["integrated.scvi"]] <- CreateDimReducObject(embeddings = scvi.reduc) ``` For any of the methods, we can now visualize and cluster the datasets. We show this for CCA integration and scVI, but you can do this for any method + ```{r integratedprojections, fig.height=16, fig.width=16} obj <- FindNeighbors(obj, reduction = 'integrated.cca', dims = 1:30) obj <- FindClusters(obj,resolution = 2, cluster.name = 'cca_clusters') @@ -131,7 +138,9 @@ p2 <- DimPlot(obj, reduction="umap.scvi", group.by=c("Method", "predicted.cellty wrap_plots(c(p1, p2), ncol=2) ``` + We hope that by simplifying the process of performing integrative analysis, users can more carefully evaluate the biological information retained in the integrated dataset. For example, users can compare the expression of biological markers based on different clustering solutions, or visualize one method's clustering solution on different UMAP visualizations. + ```{r vlnplots, fig.height=5, fig.width=16, warning=FALSE} p1 <- VlnPlot(obj, features = "rna_CD8A", group.by = 'cca_clusters') + NoLegend() + ggtitle("CD8A - Unintegrated Clusters") p2 <- VlnPlot(obj, "rna_CD8A", group.by = 'cca_clusters') + NoLegend() + ggtitle("CD8A - CCA Clusters") @@ -146,3 +155,10 @@ p5 <- DimPlot(obj, reduction="umap.rpca", group.by=c("cca_clusters")) p6 <- DimPlot(obj, reduction="umap.scvi", group.by=c("cca_clusters")) p4 | p5 | p6 ``` + +Once integrative analysis is complete, you can rejoin the layers - which collapses the individual datasets together and recreates the original `counts` and `data` layers. You will need to do this before performing any differential expression analysis. However, you can always resplit the layers in case you would like to reperform integrative analysis. + +```{r joinlayers} +obj <- JoinLayers(obj) +obj +``` From 06215cfcdcef33d748aa8ca04d894de1ca01b45e Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 24 Mar 2023 14:10:47 -0400 Subject: [PATCH 564/979] update mouse brain 1.3M --- vignettes/seurat5_sketch_analysis.Rmd | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/vignettes/seurat5_sketch_analysis.Rmd b/vignettes/seurat5_sketch_analysis.Rmd index 815fc7270..0410e4ccd 100644 --- a/vignettes/seurat5_sketch_analysis.Rmd +++ b/vignettes/seurat5_sketch_analysis.Rmd @@ -34,6 +34,7 @@ knitr::opts_chunk$set( ## Intro: Sketch-based analysis in Seurat v5 As single-cell sequencing technologies continue to improve in scalability in throughput, the generation of datasets spanning a million or more cells is becoming increasingly routine. In Seurat v5, we introduce new infrastructure and methods to analyze, interpret, and explore these exciting datasets. + In this vignette, we introduce a sketch-based analysis workflow to analyze a 1.3 million cell dataset of the developing mouse brain, freely available from 10x Genomics. Analyzing datasets of this size with standard workflows can be challenging, slow, and memory-intensive. Here we introduce an alternative workflow that is highly scalable, even to datasets ranging beyond 10 million cells in size. Our 'sketch-based' workflow involves three new features in Seurat v5 @@ -46,6 +47,7 @@ We store sketched cells (in-memory) and the full dataset (on-disk) as two assays We demonstrate the on-disk capabilities in Seurat v5 using the [BPCells package](https://github.com/bnprks/BPCells) developed by Ben Parks in the Greenleaf Lab. This package utilizes bit-packing compression and optimized, streaming-compatible C++ code to substantially improve I/O and computational performance when working with on-disk data. To run this vignette please install Seurat v5, using the installation instructions found [here](https://satijalab.org/seurat/articles/install.html). Additionally, you will need to install the `BPcells` package, using the installation instructions found [here](https://bnprks.github.io/BPCells/#installation). + ```{r, warning=FALSE, message=FALSE} library(Seurat) library(BPCells) @@ -53,9 +55,11 @@ library(ggplot2) # needs to be set for large dataset analysis options(future.globals.maxSize = 1e9) ``` + ## Create a Seurat object with a v5 assay for on-disk storage We start by loading the 1.3M dataset from 10x Genomics using the `open_matrix_dir` function from `BPCells`. Note that this function does not load the dataset into memory, but instead, creates a connection to the data stored on-disk. We then store this on-disk representation in the Seurat object. Note that in our [Introduction to on-disk storage vignette](https://satijalab.org/seurat/articles/seurat5_bpcells_interaction_vignette.html), we demonstrate how to create this on-disk representation. + ```{r} # specify that you would like to create a Seurat v5 assay # note that we require setting this option to ensure that existing pipelines are not affected @@ -68,9 +72,12 @@ obj # Note that since the data is stored on-disk, the object size easily fits in-memory (<1GB) format(object.size(obj), units = 'Mb') ``` + ## 'Sketch' a subset of cells, and load these into memory We select a subset ('sketch') of 50,000 cells (out of 1.3M). Rather than sampling all cells with uniform probability, we compute and sample based off a 'leverage score' for each cell, which reflects the magnitude of its contribution to the gene-covariance matrix, and its importance to the overall dataset. In [Hao et al, 2022](https://www.biorxiv.org/content/10.1101/2022.02.24.481684v1.full), we demonstrate that the leverage score is highest for rare populations in a dataset. Therefore, our sketched set of 50,000 cells will oversample rare populations, retaining the biological complexity of the sample while drastically compressing the dataset. + The function `SketchData` takes a normalized single-cell dataset (stored either on-disk or in-memory), and a set of variable features. It returns a Seurat object with a new assay (`sketch`), consisting of 50,000 cells, but these cells are now stored in-memory. Users can now easily switch between the in-memory and on-desk representation just by changing the default assay. + ```{r, warning=FALSE, message=FALSE} obj <- NormalizeData(obj) obj <- FindVariableFeatures(obj) @@ -83,8 +90,10 @@ DefaultAssay(obj) <- 'sketch' ``` ## Perform clustering on the sketched dataset + Now that we have compressed the dataset, we can perform standard clustering and visualization of a 50,000 cell dataset. After clustering, we can see groups of cells that clearly correspond to precursors of distinct lineages, including endothelial cells (Igfbp7), Excitatory (Neurod6) and Inhibitory (Dlx2) neurons, Intermediate Progenitors (Eomes), Radial Glia (Vim), Cajal-Retzius cells (Reln), Oligodendroytes (Olig1), and extremely rare populations of macrophages (C1qa) that were oversampled in our sketched data. + ```{r, warning=FALSE, message=FALSE, fig.width=5, fig.height=5} DefaultAssay(obj) <- 'sketch' obj <- FindVariableFeatures(obj) @@ -153,8 +162,12 @@ We therefore extract all cells from the full on-disk dataset that are present in ```{r} # subset cells in these clusters. Note that the data remains on-disk after subsetting obj.sub <- subset(obj, subset = cluster_full %in% c(2, 15, 18, 28, 40)) +DefaultAssay(obj.sub) <- 'RNA' + # now convert the RNA assay (previously on-disk) into an in-memory representation (sparse Matrix) -obj.sub[['RNA']] <- CastAssay(object = obj.sub[['RNA']], to = 'dgCMatrix') +# we only convert the data layer, and keep the counts on-disk +obj.sub[['RNA']]$data <- as(obj.sub[['RNA']]$data, Class = 'dgCMatrix') + # recluster the cells obj.sub <- FindVariableFeatures(obj.sub) obj.sub <- ScaleData(obj.sub) @@ -171,6 +184,10 @@ DimPlot(obj.sub, label = T) + NoLegend() Note that we can start to see distinct interneuron lineages emerging in this dataset. We can see a clear separation of interneuron precursors that originated from the medial ganglionic eminence (Lhx6) or caudal ganglionic eminence (Nr2f2). We can further see the emergence of Sst (Sst) and Pvalb (Mef2c)-committed interneurons, and a CGE-derived Meis2-expressing progenitor population. These results closely mirror our findings from [Mayer*, Hafemeister*, Bandler* et al, Nature 2018](https://www.nature.com/articles/nature25999), where we enriched for interneuron precursors using a Dlx6a-cre fate-mapping strategy. Here, we obtain similar results using only computational enrichment, enabled by the large size of the original dataset. +```{r,fig.height = 7, fig.width = 15} +FeaturePlot(obj, c('Dlx2', 'Lhx6', 'Nr2f2', 'Sst', 'Pvalb', 'Meis2', 'Vip', 'Dlx6os1'), ncol = 4) +``` + ```{r save.times, include=FALSE, eval=FALSE} print(as.data.frame(all_times)) write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/seurat5_sketch_analysis.csv") @@ -181,4 +198,4 @@ write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/seurat5_ske ```{r} sessionInfo() ``` -
    +
    \ No newline at end of file From 88375f65c388dcdca1df4656195f399a83a47f2f Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 24 Mar 2023 15:32:13 -0400 Subject: [PATCH 565/979] change orig to orig.reduction intelayers --- R/integration5.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/R/integration5.R b/R/integration5.R index 5324b08f5..4c7c03bf9 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -344,7 +344,7 @@ attr(x = JointPCAIntegration, which = 'Seurat.method') <- 'integration' #' #' @param object A \code{\link[SeuratObject]{Seurat}} object #' @param method Integration method function -#' @param orig Name of dimensional reduction for correction +#' @param orig.reduction Name of dimensional reduction for correction #' @param group.by Name of meta data to group cells by; defaults to splits #' assay layers #' @param assay Name of assay for integration @@ -368,7 +368,7 @@ attr(x = JointPCAIntegration, which = 'Seurat.method') <- 'integration' IntegrateLayers <- function( object, method, - orig = 'pca', + orig.reduction = 'pca', group.by = NULL, assay = NULL, features = NULL, @@ -418,13 +418,13 @@ IntegrateLayers <- function( if (!length(x = features)) { abort(message = "None of the features provided are found in this assay") } - if (!is.null(orig)) { + if (!is.null(orig.reduction)) { # Check our dimensional reduction - orig <- orig %||% DefaultDimReduc(object = object, assay = assay) - if (!orig %in% Reductions(object = object)) { - abort(message = paste(sQuote(x = orig), 'is not a dimensional reduction')) + orig.reduction <- orig.reduction %||% DefaultDimReduc(object = object, assay = assay) + if (!orig.reduction %in% Reductions(object = object)) { + abort(message = paste(sQuote(x = orig.reduction), 'is not a dimensional reduction')) } - obj.orig <- object[[orig]] + obj.orig <- object[[orig.reduction]] if (is.null(x = DefaultAssay(object = obj.orig))) { DefaultAssay(object = obj.orig) <- assay } From 7d75e77ce799f5a93990ebcaaa61000252e5cae0 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 24 Mar 2023 15:40:19 -0400 Subject: [PATCH 566/979] fix typo 1.3M mouse --- vignettes/seurat5_sketch_analysis.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/seurat5_sketch_analysis.Rmd b/vignettes/seurat5_sketch_analysis.Rmd index 0410e4ccd..1fb322be9 100644 --- a/vignettes/seurat5_sketch_analysis.Rmd +++ b/vignettes/seurat5_sketch_analysis.Rmd @@ -185,7 +185,7 @@ Note that we can start to see distinct interneuron lineages emerging in this dat These results closely mirror our findings from [Mayer*, Hafemeister*, Bandler* et al, Nature 2018](https://www.nature.com/articles/nature25999), where we enriched for interneuron precursors using a Dlx6a-cre fate-mapping strategy. Here, we obtain similar results using only computational enrichment, enabled by the large size of the original dataset. ```{r,fig.height = 7, fig.width = 15} -FeaturePlot(obj, c('Dlx2', 'Lhx6', 'Nr2f2', 'Sst', 'Pvalb', 'Meis2', 'Vip', 'Dlx6os1'), ncol = 4) +FeaturePlot(obj.sub, c('Dlx2', 'Lhx6', 'Nr2f2', 'Sst', 'Pvalb', 'Meis2', 'Vip', 'Dlx6os1'), ncol = 4) ``` ```{r save.times, include=FALSE, eval=FALSE} From 92960cb21df8157aa3c8007b69a7489e6dc634fc Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 24 Mar 2023 15:49:20 -0400 Subject: [PATCH 567/979] update docu --- man/FindClusters.Rd | 3 +++ man/IntegrateLayers.Rd | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/man/FindClusters.Rd b/man/FindClusters.Rd index 2a69cad65..513cf8f36 100644 --- a/man/FindClusters.Rd +++ b/man/FindClusters.Rd @@ -29,6 +29,7 @@ FindClusters(object, ...) \method{FindClusters}{Seurat}( object, graph.name = NULL, + cluster.name = NULL, modularity.fxn = 1, initial.membership = NULL, node.sizes = NULL, @@ -81,6 +82,8 @@ Specify the ABSOLUTE path.} \item{verbose}{Print output} \item{graph.name}{Name of graph to use for the clustering algorithm} + +\item{cluster.name}{Name of output clusters} } \value{ Returns a Seurat object where the idents have been updated with new cluster info; diff --git a/man/IntegrateLayers.Rd b/man/IntegrateLayers.Rd index 895045fb8..fcda66333 100644 --- a/man/IntegrateLayers.Rd +++ b/man/IntegrateLayers.Rd @@ -7,7 +7,7 @@ IntegrateLayers( object, method, - orig = "pca", + orig.reduction = "pca", group.by = NULL, assay = NULL, features = NULL, @@ -21,7 +21,7 @@ IntegrateLayers( \item{method}{Integration method function} -\item{orig}{Name of dimensional reduction for correction} +\item{orig.reduction}{Name of dimensional reduction for correction} \item{group.by}{Name of meta data to group cells by; defaults to splits assay layers} From 69b3238351b977a43d744d378def7cc2466d5115 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 24 Mar 2023 15:56:11 -0400 Subject: [PATCH 568/979] update s5 integration vig --- vignettes/seurat5_integration.Rmd | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/vignettes/seurat5_integration.Rmd b/vignettes/seurat5_integration.Rmd index 90453399b..ffe95fd49 100644 --- a/vignettes/seurat5_integration.Rmd +++ b/vignettes/seurat5_integration.Rmd @@ -97,24 +97,24 @@ Note that you can find more detail on each method, and any installation prerequi Each of the following lines perform a new integration using a single line of code: ```{r integratelayerscca} -obj <- IntegrateLayers(object = obj, method = CCAIntegration, verbose = F, new.reduction = 'integrated.cca') +obj <- IntegrateLayers(object = obj, method = CCAIntegration, orig.reduction = "pca", new.reduction = 'integrated.cca', verbose = FALSE) ``` ```{r integratelayersrpca} -obj <- IntegrateLayers(object = obj, method = RPCAIntegration, verbose = F, new.reduction = 'integrated.rpca') +obj <- IntegrateLayers(object = obj, method = RPCAIntegration, orig.reduction = "pca", new.reduction = 'integrated.rpca', verbose = FALSE) ``` ```{r integratelayersharmony} -obj <- IntegrateLayers(object = obj, method = HarmonyIntegration, verbose = F, new.reduction = 'harmony') +obj <- IntegrateLayers(object = obj, method = HarmonyIntegration, orig.reduction = "pca", new.reduction = 'harmony', verbose = FALSE) ``` ```{r integratelayersfastmnn} -obj <- IntegrateLayers(object = obj, method = FastMNNIntegration, verbose = F, new.reduction = 'integrated.mnn') +obj <- IntegrateLayers(object = obj, method = FastMNNIntegration, new.reduction = 'integrated.mnn', verbose = FALSE) ``` ```{r integratelayersscvi, eval=FALSE} -obj <- IntegrateLayers(object = obj, method = scVIIntegration, verbose = F, new.reduction = 'integrated.scvi', - conda_env = '../miniconda3/envs/scvi-env') +obj <- IntegrateLayers(object = obj, method = scVIIntegration, new.reduction = 'integrated.scvi', + conda_env = '../miniconda3/envs/scvi-env', verbose = FALSE) ``` ```{r addscvi, include=FALSE} From c96c4ab3ea44d085a469b66b157799eab44da348 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 24 Mar 2023 16:21:22 -0400 Subject: [PATCH 569/979] fix docu bug --- NAMESPACE | 1 + R/roxygen.R | 1 + 2 files changed, 2 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 34923ac91..f85bf99b2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -802,6 +802,7 @@ importFrom(rlang,invoke) importFrom(rlang,is_integerish) importFrom(rlang,is_na) importFrom(rlang,is_quosure) +importFrom(rlang,is_scalar_character) importFrom(rlang,is_scalar_integerish) importFrom(rlang,quo_get_env) importFrom(rlang,quo_get_expr) diff --git a/R/roxygen.R b/R/roxygen.R index ab17e5845..41579e2b5 100644 --- a/R/roxygen.R +++ b/R/roxygen.R @@ -3,6 +3,7 @@ NULL #' @importFrom utils lsf.str +#' @importFrom rlang is_scalar_character #' #' @export #' From 2b8a9065d924ce537476c36e85cca20f7e7352b2 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 24 Mar 2023 16:47:03 -0400 Subject: [PATCH 570/979] add package print info --- R/zzz.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/R/zzz.R b/R/zzz.R index 05ef192c5..5c20e595d 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -87,6 +87,11 @@ AttachDeps <- function(deps) { #' .onAttach <- function(libname, pkgname) { AttachDeps(deps = c('SeuratObject')) + message("Loading Seurat v5 beta version \n", + "To maintain compatibility with previous workflows, new Seurat objects ", + "will use the previous object structure by default\n", + "To use new Seurat v5 assays: Please run: ", + "options(Seurat.object.assay.version = 'v5')") return(invisible(x = NULL)) } From 4e02a543967c837f39cdb9c9cf621122034f01e0 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Fri, 24 Mar 2023 17:29:10 -0400 Subject: [PATCH 571/979] adding examples to integration and lots of other documentation --- R/integration5.R | 57 +++++++++ man/BridgeReferenceSet-class.Rd | 25 ++++ man/BuildNicheAssay.Rd | 37 ++++++ man/CCAIntegration.Rd | 18 +++ man/CalcDispersion.Rd | 19 +++ man/CreateCategoryMatrix.Rd | 15 +++ man/DISP.Rd | 11 ++ man/FetchResiduals_reference.Rd | 17 +++ man/FindBridgeIntegrationAnchors.Rd | 63 ++++++++++ man/FindBridgeTransferAnchors.Rd | 59 ++++++++++ man/HarmonyIntegration.Rd | 16 +++ man/MVP.Rd | 18 +++ man/PrepareBridgeReference.Rd | 82 +++++++++++++ man/ProjectCellEmbeddings.Rd | 143 +++++++++++++++++++++++ man/ProjectData.Rd | 25 ++++ man/ProjectIntegration.Rd | 67 +++++++++++ man/PseudobulkExpression.Rd | 19 +++ man/RPCAIntegration.Rd | 26 +++++ man/TransferSketchLabels.Rd | 23 ++++ man/UnSketchEmbeddings.Rd | 17 +++ vignettes/seurat5_integration_bridge.Rmd | 2 +- 21 files changed, 758 insertions(+), 1 deletion(-) create mode 100644 man/BridgeReferenceSet-class.Rd create mode 100644 man/BuildNicheAssay.Rd create mode 100644 man/CalcDispersion.Rd create mode 100644 man/CreateCategoryMatrix.Rd create mode 100644 man/DISP.Rd create mode 100644 man/FetchResiduals_reference.Rd create mode 100644 man/FindBridgeIntegrationAnchors.Rd create mode 100644 man/FindBridgeTransferAnchors.Rd create mode 100644 man/MVP.Rd create mode 100644 man/PrepareBridgeReference.Rd create mode 100644 man/ProjectCellEmbeddings.Rd create mode 100644 man/ProjectData.Rd create mode 100644 man/ProjectIntegration.Rd create mode 100644 man/PseudobulkExpression.Rd create mode 100644 man/TransferSketchLabels.Rd create mode 100644 man/UnSketchEmbeddings.Rd diff --git a/R/integration5.R b/R/integration5.R index 4c7c03bf9..9101ca25e 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -33,6 +33,21 @@ NULL # @templateVar pkg harmony # @template note-reqdpkg #' +#' @examples +#' \dontrun{ +#' # Preprocessing +#' obj <- LoadData("pbmcsca") +#' obj[["RNA"]] <- split(obj[["RNA"]], f = obj$Method) +#' obj <- NormalizeData(obj) +#' obj <- FindVariableFeatures(obj) +#' obj <- ScaleData(obj) +#' obj <- RunPCA(obj) +#' +#' # After preprocessing, we integrate layers with added parameters specific to Harmony: +#' obj <- IntegrateLayers(object = obj, method = HarmonyIntegration, +#' new.reduction = 'integrated.', verbose = FALSE, theta = 2, labmbda = 1, sigma = 0.1) +#' } +#' #' @export #' #' @concept integration @@ -117,6 +132,23 @@ attr(x = HarmonyIntegration, which = 'Seurat.method') <- 'integration' #' #' @inheritParams FindIntegrationAnchors #' @export +#' +#' @examples +#' \dontrun{ +#' # Preprocessing +#' obj <- LoadData("pbmcsca") +#' obj[["RNA"]] <- split(obj[["RNA"]], f = obj$Method) +#' obj <- NormalizeData(obj) +#' obj <- FindVariableFeatures(obj) +#' obj <- ScaleData(obj) +#' obj <- RunPCA(obj) +#' +#' # After preprocessing, we integrate layers. +#' # We can also modify parameters specific to CCAIntegration, such as k.anchor: +#' obj <- IntegrateLayers(object = obj, method = CCAIntegration, +#' orig.reduction = "pca", new.reduction = 'integrated.', +#' k.anchor = 6, verbose = FALSE) +#' } #' CCAIntegration <- function( object = NULL, @@ -183,6 +215,31 @@ attr(x = CCAIntegration, which = 'Seurat.method') <- 'integration' #' Seurat-RPCA Integration #' +#' @examples +#' \dontrun{ +#' # Preprocessing +#' obj <- LoadData("pbmcsca") +#' obj[["RNA"]] <- split(obj[["RNA"]], f = obj$Method) +#' obj <- NormalizeData(obj) +#' obj <- FindVariableFeatures(obj) +#' obj <- ScaleData(obj) +#' obj <- RunPCA(obj) +#' +#' # After preprocessing, we run integration +#' +#' # Reference-based Integration +#' # Here, we use the first layer as a reference and modify the k.anchor parameter. +#' obj <- IntegrateLayers(object = obj, method = RPCAIntegration, +#' orig.reduction = "pca", new.reduction = 'integrated.rpca', +#' reference = 'data.Smart-seq2', k.anchor = 7, verbose = FALSE) +#' +#' # Alternatively, we can integrate SCTransformed data +#' obj <- SCTransform(object = obj) +#' obj <- IntegrateLayers(object = obj, method = RPCAIntegration, +#' orig.reduction = "pca", new.reduction = 'integrated.rpca', +#' assay = "SCT", verbose = FALSE) +#' } +#' #' @inheritParams FindIntegrationAnchors #' @export #' diff --git a/man/BridgeReferenceSet-class.Rd b/man/BridgeReferenceSet-class.Rd new file mode 100644 index 000000000..b3faf5b90 --- /dev/null +++ b/man/BridgeReferenceSet-class.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/objects.R +\docType{class} +\name{BridgeReferenceSet-class} +\alias{BridgeReferenceSet-class} +\alias{BridgeReferenceSet} +\title{The BridgeReferenceSet Class +The BridgeReferenceSet is an output from PrepareBridgeReference} +\description{ +The BridgeReferenceSet Class +The BridgeReferenceSet is an output from PrepareBridgeReference +} +\section{Slots}{ + +\describe{ +\item{\code{bridge}}{The multi-omic object} + +\item{\code{reference}}{The Reference object only containing bridge representation assay} + +\item{\code{params}}{A list of parameters used in the PrepareBridgeReference} + +\item{\code{command}}{Store log of parameters that were used} +}} + +\concept{objects} diff --git a/man/BuildNicheAssay.Rd b/man/BuildNicheAssay.Rd new file mode 100644 index 000000000..0568a2b93 --- /dev/null +++ b/man/BuildNicheAssay.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utilities.R +\name{BuildNicheAssay} +\alias{BuildNicheAssay} +\title{Construct an assay for spatial niche analysis} +\usage{ +BuildNicheAssay( + object, + fov, + group.by, + assay = "niche", + neighbors.k = 20, + niches.k = 4 +) +} +\arguments{ +\item{object}{A Seurat object} + +\item{fov}{FOV object to gather cell positions from} + +\item{group.by}{Cell classifications to count in spatial neighborhood} + +\item{assay}{Name for spatial neighborhoods assay} + +\item{neighbors.k}{Number of neighbors to consider for each cell} + +\item{niches.k}{Number of clusters to return based on the niche assay} +} +\value{ +Seurat object containing a new assay +} +\description{ +This function will construct a new assay where each feature is a +cell label The values represents the sum of a particular cell label +neighboring a given cell. +} +\concept{clustering} diff --git a/man/CCAIntegration.Rd b/man/CCAIntegration.Rd index 797b1147a..3c560da45 100644 --- a/man/CCAIntegration.Rd +++ b/man/CCAIntegration.Rd @@ -47,3 +47,21 @@ search space} \description{ Seurat-CCA Integration } +\examples{ +\dontrun{ +# Preprocessing +obj <- LoadData("pbmcsca") +obj[["RNA"]] <- split(obj[["RNA"]], f = obj$Method) +obj <- NormalizeData(obj) +obj <- FindVariableFeatures(obj) +obj <- ScaleData(obj) +obj <- RunPCA(obj) + +# After preprocessing, we integrate layers. +# We can also modify parameters specific to CCAIntegration, such as k.anchor: +obj <- IntegrateLayers(object = obj, method = CCAIntegration, + orig.reduction = "pca", new.reduction = 'integrated.', + k.anchor = 6, verbose = FALSE) +} + +} diff --git a/man/CalcDispersion.Rd b/man/CalcDispersion.Rd new file mode 100644 index 000000000..72ca1a6d1 --- /dev/null +++ b/man/CalcDispersion.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/preprocessing5.R +\name{CalcDispersion} +\alias{CalcDispersion} +\title{Calculate dispersion of features} +\usage{ +CalcDispersion( + object, + mean.function = FastExpMean, + dispersion.function = FastLogVMR, + num.bin = 20, + binning.method = "equal_width", + verbose = TRUE, + ... +) +} +\description{ +Calculate dispersion of features +} diff --git a/man/CreateCategoryMatrix.Rd b/man/CreateCategoryMatrix.Rd new file mode 100644 index 000000000..768bd9ef3 --- /dev/null +++ b/man/CreateCategoryMatrix.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utilities.R +\name{CreateCategoryMatrix} +\alias{CreateCategoryMatrix} +\title{Create one hot matrix for a given label} +\usage{ +CreateCategoryMatrix( + labels, + method = c("aggregate", "average"), + cells.name = NULL +) +} +\description{ +Create one hot matrix for a given label +} diff --git a/man/DISP.Rd b/man/DISP.Rd new file mode 100644 index 000000000..25a772eef --- /dev/null +++ b/man/DISP.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/preprocessing5.R +\name{DISP} +\alias{DISP} +\title{Find variable features based on dispersion} +\usage{ +DISP(data, nselect = 2000L, verbose = TRUE, ...) +} +\description{ +Find variable features based on dispersion +} diff --git a/man/FetchResiduals_reference.Rd b/man/FetchResiduals_reference.Rd new file mode 100644 index 000000000..54e9ca09f --- /dev/null +++ b/man/FetchResiduals_reference.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/preprocessing5.R +\name{FetchResiduals_reference} +\alias{FetchResiduals_reference} +\title{temporal function to get residuals from reference} +\usage{ +FetchResiduals_reference( + object, + reference.SCT.model = NULL, + features = NULL, + nCount_UMI = NULL, + verbose = FALSE +) +} +\description{ +temporal function to get residuals from reference +} diff --git a/man/FindBridgeIntegrationAnchors.Rd b/man/FindBridgeIntegrationAnchors.Rd new file mode 100644 index 000000000..cb725fe19 --- /dev/null +++ b/man/FindBridgeIntegrationAnchors.Rd @@ -0,0 +1,63 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/integration.R +\name{FindBridgeIntegrationAnchors} +\alias{FindBridgeIntegrationAnchors} +\title{Find integration bridge anchors between query and extended bridge-reference} +\usage{ +FindBridgeIntegrationAnchors( + extended.reference, + query, + query.assay = NULL, + dims = 1:30, + scale = FALSE, + reduction = c("lsiproject", "pcaproject"), + integration.reduction = c("direct", "cca"), + verbose = TRUE +) +} +\arguments{ +\item{extended.reference}{BridgeReferenceSet object generated from +\code{\link{PrepareBridgeReference}}} + +\item{query}{A query Seurat object} + +\item{query.assay}{Assay name for query-bridge integration} + +\item{dims}{Number of dimensions for query-bridge integration} + +\item{scale}{Determine if scale the query data for projection} + +\item{reduction}{Dimensional reduction to perform when finding anchors. +Options are: +\itemize{ + \item{pcaproject: Project the PCA from the bridge onto the query. We + recommend using PCA when bridge and query datasets are from scRNA-seq} + \item{lsiproject: Project the LSI from the bridge onto the query. We + recommend using LSI when bridge and query datasets are from scATAC-seq or scCUT&TAG data. + This requires that LSI or supervised LSI has been computed for the bridge dataset, and the + same features (eg, peaks or genome bins) are present in both the bridge + and query. +} +}} + +\item{integration.reduction}{Dimensional reduction to perform when finding anchors +between query and reference. +Options are: +\itemize{ + \item{direct: find anchors directly on the bridge representation space} + \item{cca: perform cca on the on the bridge representation space and then find anchors +} +}} + +\item{verbose}{Print messages and progress} +} +\value{ +Returns an \code{AnchorSet} object that can be used as input to +\code{\link{IntegrateEmbeddings}}. +} +\description{ +Find a set of anchors between unimodal query and the other unimodal reference +using a pre-computed \code{\link{BridgeReferenceSet}}. +These integration anchors can later be used to integrate query and reference +using the \code{\link{IntegrateEmbeddings}} object. +} diff --git a/man/FindBridgeTransferAnchors.Rd b/man/FindBridgeTransferAnchors.Rd new file mode 100644 index 000000000..a0c40eb22 --- /dev/null +++ b/man/FindBridgeTransferAnchors.Rd @@ -0,0 +1,59 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/integration.R +\name{FindBridgeTransferAnchors} +\alias{FindBridgeTransferAnchors} +\title{Find bridge anchors between query and extended bridge-reference} +\usage{ +FindBridgeTransferAnchors( + extended.reference, + query, + query.assay = NULL, + dims = 1:30, + scale = FALSE, + reduction = c("lsiproject", "pcaproject"), + bridge.reduction = c("direct", "cca"), + verbose = TRUE +) +} +\arguments{ +\item{extended.reference}{BridgeReferenceSet object generated from +\code{\link{PrepareBridgeReference}}} + +\item{query}{A query Seurat object} + +\item{query.assay}{Assay name for query-bridge integration} + +\item{dims}{Number of dimensions for query-bridge integration} + +\item{scale}{Determine if scale the query data for projection} + +\item{reduction}{Dimensional reduction to perform when finding anchors. +Options are: +\itemize{ + \item{pcaproject: Project the PCA from the bridge onto the query. We + recommend using PCA when bridge and query datasets are from scRNA-seq} + \item{lsiproject: Project the LSI from the bridge onto the query. We + recommend using LSI when bridge and query datasets are from scATAC-seq or scCUT&TAG data. + This requires that LSI or supervised LSI has been computed for the bridge dataset, and the + same features (eg, peaks or genome bins) are present in both the bridge + and query. +} +}} + +\item{verbose}{Print messages and progress} +} +\value{ +Returns an \code{AnchorSet} object that can be used as input to +\code{\link{TransferData}}, \code{\link{IntegrateEmbeddings}} and +\code{\link{MapQuery}}. +} +\description{ +Find a set of anchors between unimodal query and the other unimodal reference +using a pre-computed \code{\link{BridgeReferenceSet}}. +This function performs three steps: +1. Harmonize the bridge and query cells in the bridge query reduction space +2. Construct the bridge dictionary representations for query cells +3. Find a set of anchors between query and reference in the bridge graph laplacian eigenspace +These anchors can later be used to integrate embeddings or transfer data from the reference to +query object using the \code{\link{MapQuery}} object. +} diff --git a/man/HarmonyIntegration.Rd b/man/HarmonyIntegration.Rd index 839ebdc7e..2ba95976f 100644 --- a/man/HarmonyIntegration.Rd +++ b/man/HarmonyIntegration.Rd @@ -93,6 +93,22 @@ Harmony Integration This function requires the \href{https://cran.r-project.org/package=harmony}{\pkg{harmony}} package to be installed +} +\examples{ +\dontrun{ +# Preprocessing +obj <- LoadData("pbmcsca") +obj[["RNA"]] <- split(obj[["RNA"]], f = obj$Method) +obj <- NormalizeData(obj) +obj <- FindVariableFeatures(obj) +obj <- ScaleData(obj) +obj <- RunPCA(obj) + +# After preprocessing, we integrate layers with added parameters specific to Harmony: +obj <- IntegrateLayers(object = obj, method = HarmonyIntegration, + new.reduction = 'integrated.', verbose = FALSE, theta = 2, labmbda = 1, sigma = 0.1) +} + } \seealso{ \code{\link[harmony:HarmonyMatrix]{harmony::HarmonyMatrix}()} diff --git a/man/MVP.Rd b/man/MVP.Rd new file mode 100644 index 000000000..abc90a2d9 --- /dev/null +++ b/man/MVP.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/preprocessing5.R +\name{MVP} +\alias{MVP} +\title{Find variable features based on mean.var.plot} +\usage{ +MVP( + data, + verbose = TRUE, + nselect = 2000L, + mean.cutoff = c(0.1, 8), + dispersion.cutoff = c(1, Inf), + ... +) +} +\description{ +Find variable features based on mean.var.plot +} diff --git a/man/PrepareBridgeReference.Rd b/man/PrepareBridgeReference.Rd new file mode 100644 index 000000000..bc4f9a5c5 --- /dev/null +++ b/man/PrepareBridgeReference.Rd @@ -0,0 +1,82 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/integration.R +\name{PrepareBridgeReference} +\alias{PrepareBridgeReference} +\title{Prepare the bridge and reference datasets} +\usage{ +PrepareBridgeReference( + reference, + bridge, + reference.reduction = "pca", + reference.dims = 1:50, + normalization.method = c("SCT", "LogNormalize"), + reference.assay = NULL, + bridge.ref.assay = "RNA", + bridge.query.assay = "ATAC", + supervised.reduction = c("slsi", "spca", NULL), + bridge.query.reduction = NULL, + bridge.query.features = NULL, + laplacian.reduction.name = "lap", + laplacian.reduction.key = "lap_", + laplacian.reduction.dims = 1:50, + verbose = TRUE +) +} +\arguments{ +\item{reference}{A reference Seurat object} + +\item{bridge}{A multi-omic bridge Seurat object} + +\item{reference.reduction}{Name of dimensional reduction of the reference object (default is 'pca')} + +\item{reference.dims}{Number of dimensions used for the reference.reduction (default is 50)} + +\item{normalization.method}{Name of normalization method used: LogNormalize +or SCT} + +\item{reference.assay}{Assay name for reference (default is \code{\link{DefaultAssay}})} + +\item{bridge.ref.assay}{Assay name for bridge used for reference mapping. RNA by default} + +\item{bridge.query.assay}{Assay name for bridge used for query mapping. ATAC by default} + +\item{supervised.reduction}{Type of supervised dimensional reduction to be performed +for integrating the bridge and query. +#' Options are: +\itemize{ + \item{slsi: Perform supervised LSI as the dimensional reduction for + the bridge-query integration} + \item{spca: Perform supervised PCA as the dimensional reduction for + the bridge-query integration} + \item{NULL: no supervised dimensional reduction will be calculated. + bridge.query.reduction is used for the bridge-query integration} +}} + +\item{bridge.query.reduction}{Name of dimensions used for the bridge-query harmonization. +'bridge.query.reduction' and 'supervised.reduction' cannot be NULL together.} + +\item{bridge.query.features}{Features used for bridge query dimensional reduction +(default is NULL which uses VariableFeatures from the bridge object)} + +\item{laplacian.reduction.name}{Name of dimensional reduction name of graph laplacian eigenspace (default is 'lap')} + +\item{laplacian.reduction.key}{Dimensional reduction key (default is 'lap_')} + +\item{laplacian.reduction.dims}{Number of dimensions used for graph laplacian eigenspace (default is 50)} + +\item{verbose}{Print progress and message (default is TRUE)} +} +\value{ +Returns a \code{BridgeReferenceSet} that can be used as input to + \code{\link{FindBridgeTransferAnchors}}. +The parameters used are stored in the \code{BridgeReferenceSet} as well +} +\description{ +Preprocess the multi-omic bridge and unimodal reference datasets into +an extended reference. +This function performs the following three steps: +1. Performs within-modality harmonization between bridge and reference +2. Performs dimensional reduction on the SNN graph of bridge datasets via +Laplacian Eigendecomposition +3. Constructs a bridge dictionary representation for unimodal reference cells +} diff --git a/man/ProjectCellEmbeddings.Rd b/man/ProjectCellEmbeddings.Rd new file mode 100644 index 000000000..3e1f37206 --- /dev/null +++ b/man/ProjectCellEmbeddings.Rd @@ -0,0 +1,143 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generics.R, R/integration.R +\name{ProjectCellEmbeddings} +\alias{ProjectCellEmbeddings} +\alias{ProjectCellEmbeddings.Seurat} +\alias{ProjectCellEmbeddings.Assay} +\alias{ProjectCellEmbeddings.SCTAssay} +\alias{ProjectCellEmbeddings.StdAssay} +\alias{ProjectCellEmbeddings.default} +\alias{ProjectCellEmbeddings.IterableMatrix} +\alias{ProjectCellEmbeddings.DelayedMatrix} +\title{Project query data to the reference dimensional reduction} +\usage{ +ProjectCellEmbeddings(query, ...) + +\method{ProjectCellEmbeddings}{Seurat}( + query, + reference, + query.assay = NULL, + reference.assay = NULL, + reduction = "pca", + dims = 1:50, + normalization.method = c("LogNormalize", "SCT"), + scale = TRUE, + verbose = TRUE, + nCount_UMI = NULL, + feature.mean = NULL, + feature.sd = NULL +) + +\method{ProjectCellEmbeddings}{Assay}( + query, + reference, + reference.assay = NULL, + reduction = "pca", + dims = 1:50, + scale = TRUE, + normalization.method = NULL, + verbose = TRUE, + nCount_UMI = NULL, + feature.mean = NULL, + feature.sd = NULL +) + +\method{ProjectCellEmbeddings}{SCTAssay}( + query, + reference, + reference.assay = NULL, + reduction = "pca", + dims = 1:50, + scale = TRUE, + normalization.method = NULL, + verbose = TRUE, + nCount_UMI = NULL, + feature.mean = NULL, + feature.sd = NULL +) + +\method{ProjectCellEmbeddings}{StdAssay}( + query, + reference, + reference.assay = NULL, + reduction = "pca", + dims = 1:50, + scale = TRUE, + normalization.method = NULL, + verbose = TRUE, + nCount_UMI = NULL, + feature.mean = NULL, + feature.sd = NULL +) + +\method{ProjectCellEmbeddings}{default}( + query, + reference, + reference.assay = NULL, + reduction = "pca", + dims = 1:50, + scale = TRUE, + normalization.method = NULL, + verbose = TRUE, + features = NULL, + nCount_UMI = NULL, + feature.mean = NULL, + feature.sd = NULL +) + +\method{ProjectCellEmbeddings}{IterableMatrix}( + query, + reference, + reference.assay = NULL, + reduction = "pca", + dims = 1:50, + scale = TRUE, + normalization.method = NULL, + verbose = TRUE, + features = features, + nCount_UMI = NULL, + feature.mean = NULL, + feature.sd = NULL, + block.size = 10000 +) + +\method{ProjectCellEmbeddings}{DelayedMatrix}( + query.data, + block.size = 1e+09, + reference, + assay = NULL, + reduction, + normalization.method = NULL, + dims = NULL, + feature.mean = NULL, + feature.sd = NULL +) +} +\arguments{ +\item{query}{An object for query cells} + +\item{reference}{An object for reference cells} + +\item{query.assay}{Assay name for query object} + +\item{reference.assay}{Assay name for reference object} + +\item{reduction}{Name of dimensional reduction from reference object} + +\item{dims}{Dimensions used for reference dimensional reduction} + +\item{scale}{Determine if scale query data based on reference data variance} + +\item{verbose}{Print progress} + +\item{feature.mean}{Mean of features in reference} + +\item{feature.sd}{Standard variance of features in reference} +} +\value{ +A matrix with projected cell embeddings +} +\description{ +Project query data to the reference dimensional reduction +} +\keyword{internal} diff --git a/man/ProjectData.Rd b/man/ProjectData.Rd new file mode 100644 index 000000000..6be416e3b --- /dev/null +++ b/man/ProjectData.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sketching.R +\name{ProjectData} +\alias{ProjectData} +\title{Project full data to the sketch assay} +\usage{ +ProjectData( + object, + assay = "RNA", + sketched.assay = "sketch", + sketched.reduction, + full.reduction, + dims, + normalization.method = c("LogNormalize", "SCT"), + refdata = NULL, + k.weight = 50, + umap.model = NULL, + recompute.neighbors = FALSE, + recompute.weights = FALSE, + verbose = TRUE +) +} +\description{ +Project full data to the sketch assay +} diff --git a/man/ProjectIntegration.Rd b/man/ProjectIntegration.Rd new file mode 100644 index 000000000..f4a5cc2cc --- /dev/null +++ b/man/ProjectIntegration.Rd @@ -0,0 +1,67 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/integration.R +\name{ProjectIntegration} +\alias{ProjectIntegration} +\title{Integrate embeddings from the integrated sketched.assay} +\usage{ +ProjectIntegration( + object, + sketched.assay = "sketch", + assay = "RNA", + reduction = "integrated_dr", + features = NULL, + layers = "data", + reduction.name = NULL, + reduction.key = NULL, + method = c("sketch", "data"), + ratio = 0.8, + sketched.layers = NULL, + seed = 123, + verbose = TRUE +) +} +\arguments{ +\item{object}{A Seurat object with all cells for one dataset} + +\item{sketched.assay}{Assay name for sketched-cell expression (default is 'sketch')} + +\item{assay}{Assay name for original expression (default is 'RNA')} + +\item{reduction}{Dimensional reduction name for batch-corrected embeddings +in the sketched object (default is 'integrated_dr')} + +\item{features}{Features used for atomic sketch integration} + +\item{layers}{Names of layers for correction.} + +\item{reduction.name}{Name to save new reduction as; defaults to +\code{paste0(reduction, '.orig')}} + +\item{reduction.key}{Key for new dimensional reduction; defaults to creating +one from \code{reduction.name}} + +\item{method}{Methods to construct sketch-cell representation +for all cells (default is 'sketch'). Can be one of: +\itemize{ + \item \dQuote{\code{sketch}}: Use random sketched data slot + \item \dQuote{\code{data}}: Use data slot +}} + +\item{ratio}{Sketch ratio of data slot when \code{dictionary.method} is set +to \dQuote{\code{sketch}}; defaults to 0.8} + +\item{verbose}{Print progress and message} +} +\value{ +Returns a Seurat object with an integrated dimensional reduction +} +\description{ +The main steps of this procedure are outlined below. For a more detailed +description of the methodology, please see Hao, et al Biorxiv 2022: +\doi{10.1101/2022.02.24.481684} +} +\details{ +First learn a atom dictionary representation to reconstruct each cell. +Then, using this dictionary representation, +reconstruct the embeddings of each cell from the integrated atoms. +} diff --git a/man/PseudobulkExpression.Rd b/man/PseudobulkExpression.Rd new file mode 100644 index 000000000..d082dcb1a --- /dev/null +++ b/man/PseudobulkExpression.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generics.R +\name{PseudobulkExpression} +\alias{PseudobulkExpression} +\title{Pseudobulk Expression} +\usage{ +PseudobulkExpression(object, ...) +} +\arguments{ +\item{object}{An assay} + +\item{...}{Arguments passed to other methods} +} +\value{ +Returns object after normalization +} +\description{ +Normalize the count data present in a given assay. +} diff --git a/man/RPCAIntegration.Rd b/man/RPCAIntegration.Rd index b97c91d82..a6a5124b0 100644 --- a/man/RPCAIntegration.Rd +++ b/man/RPCAIntegration.Rd @@ -47,3 +47,29 @@ search space} \description{ Seurat-RPCA Integration } +\examples{ +\dontrun{ +# Preprocessing +obj <- LoadData("pbmcsca") +obj[["RNA"]] <- split(obj[["RNA"]], f = obj$Method) +obj <- NormalizeData(obj) +obj <- FindVariableFeatures(obj) +obj <- ScaleData(obj) +obj <- RunPCA(obj) + +# After preprocessing, we run integration + +# Reference-based Integration +# Here, we use the first layer as a reference and modify the k.anchor parameter. +obj <- IntegrateLayers(object = obj, method = RPCAIntegration, + orig.reduction = "pca", new.reduction = 'integrated.rpca', + reference = 'data.Smart-seq2', k.anchor = 7, verbose = FALSE) + +# Alternatively, we can integrate SCTransformed data +obj <- SCTransform(object = obj) +obj <- IntegrateLayers(object = obj, method = RPCAIntegration, + orig.reduction = "pca", new.reduction = 'integrated.rpca', + assay = "SCT", verbose = FALSE) +} + +} diff --git a/man/TransferSketchLabels.Rd b/man/TransferSketchLabels.Rd new file mode 100644 index 000000000..7c3fb1306 --- /dev/null +++ b/man/TransferSketchLabels.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sketching.R +\name{TransferSketchLabels} +\alias{TransferSketchLabels} +\title{Transfer data from sketch data to full data} +\usage{ +TransferSketchLabels( + object, + atoms = "sketch", + reduction, + dims, + refdata = NULL, + k = 50, + reduction.model = NULL, + neighbors = NULL, + recompute.neighbors = FALSE, + recompute.weights = FALSE, + verbose = TRUE +) +} +\description{ +Transfer data from sketch data to full data +} diff --git a/man/UnSketchEmbeddings.Rd b/man/UnSketchEmbeddings.Rd new file mode 100644 index 000000000..cc128c4c3 --- /dev/null +++ b/man/UnSketchEmbeddings.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/integration.R +\name{UnSketchEmbeddings} +\alias{UnSketchEmbeddings} +\title{Transfer embeddings from sketched cells to the full data} +\usage{ +UnSketchEmbeddings( + atom.data, + atom.cells = NULL, + orig.data, + embeddings, + sketch.matrix = NULL +) +} +\description{ +Transfer embeddings from sketched cells to the full data +} diff --git a/vignettes/seurat5_integration_bridge.Rmd b/vignettes/seurat5_integration_bridge.Rmd index f008f7ad1..1b4eb2b98 100644 --- a/vignettes/seurat5_integration_bridge.Rmd +++ b/vignettes/seurat5_integration_bridge.Rmd @@ -43,7 +43,7 @@ In this vignette we demonstrate: * Exploring and assessing the resulting annotations ### Azimuth ATAC for Bridge Integration -Users can now automatically run bridge integration for PBMC and Bone Marrow scATAC-seq queries with the newly released Azimuth ATAC workflow. For more details, see the section on ATAC data in this [vignette](https://satijalab.github.io/azimuth/articles/run_azimuth_tutorial.html). +Users can now automatically run bridge integration for PBMC and Bone Marrow scATAC-seq queries with the newly released Azimuth ATAC workflow on the [Azimuth website](https://azimuth.hubmapconsortium.org/) or in R. For more details on running locally in R, see the section on ATAC data in this [vignette](https://satijalab.github.io/azimuth/articles/run_azimuth_tutorial.html). ```{r, message=FALSE, warning=FALSE} From 4aeaddcf78785f186b5b1b8027369fd7ce58a6ab Mon Sep 17 00:00:00 2001 From: rsatija Date: Fri, 24 Mar 2023 17:32:33 -0400 Subject: [PATCH 572/979] Update README.md in v5 branch --- README.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 427ab3a20..610fec83e 100644 --- a/README.md +++ b/README.md @@ -3,10 +3,16 @@ [![CRAN Version](https://www.r-pkg.org/badges/version/Seurat)](https://cran.r-project.org/package=Seurat) [![CRAN Downloads](https://cranlogs.r-pkg.org/badges/Seurat)](https://cran.r-project.org/package=Seurat) -# Seurat v4.3.0 +# Seurat v5 beta Seurat is an R toolkit for single cell genomics, developed and maintained by the Satija Lab at NYGC. +We are excited to release an initial beta version of Seurat v5! This updates introduces new functionality for spatial, multimodal, and scalable single-cell analysis. + +Seurat v5 is backwards-compatible with previous versions, so that users will continue to be able to re-run existing workflows. + +As v5 is still in beta, the CRAN installation install.packages("Seurat") will continue to install Seurat v4, but users can opt-in to test Seurat v5 by following the instructions in our [INSTALL PAGE](https://satijalab.org/seurat/articles/install). + Instructions, documentation, and tutorials can be found at: * https://satijalab.org/seurat From eb63d0e1b53c2d17ff8c2e859211247f2f87ef85 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Fri, 24 Mar 2023 18:26:04 -0400 Subject: [PATCH 573/979] updating documentation examples --- R/integration5.R | 54 +++++++++++++++++++++++++++++++-------- man/CCAIntegration.Rd | 18 ------------- man/HarmonyIntegration.Rd | 18 ++++++++++--- man/RPCAIntegration.Rd | 18 ++++++++++--- 4 files changed, 72 insertions(+), 36 deletions(-) diff --git a/R/integration5.R b/R/integration5.R index 9101ca25e..bfc4ccae5 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -43,11 +43,23 @@ NULL #' obj <- ScaleData(obj) #' obj <- RunPCA(obj) #' -#' # After preprocessing, we integrate layers with added parameters specific to Harmony: -#' obj <- IntegrateLayers(object = obj, method = HarmonyIntegration, -#' new.reduction = 'integrated.', verbose = FALSE, theta = 2, labmbda = 1, sigma = 0.1) +#' # After preprocessing, we integrate layers with added parameters specific to Harmony: +#' obj <- IntegrateLayers(object = obj, method = HarmonyIntegration, orig.reduction = "pca", +#' new.reduction = 'harmony', verbose = FALSE) +#' +#' # Modifying Parameters +#' # We can also add arguments specific to Harmony such as theta, to give more diverse clusters +#' obj <- IntegrateLayers(object = obj, method = HarmonyIntegration, orig.reduction = "pca", +#' new.reduction = 'harmony', verbose = FALSE, theta = 3) #' } #' +#' # Integrating SCTransformed data +#' obj <- SCTransform(object = obj) +#' obj <- IntegrateLayers(object = obj, method = HarmonyIntegration, +#' orig.reduction = "pca", new.reduction = 'harmony', +#' assay = "SCT", verbose = FALSE) +#' +#' #' @export #' #' @concept integration @@ -144,11 +156,21 @@ attr(x = HarmonyIntegration, which = 'Seurat.method') <- 'integration' #' obj <- RunPCA(obj) #' #' # After preprocessing, we integrate layers. -#' # We can also modify parameters specific to CCAIntegration, such as k.anchor: #' obj <- IntegrateLayers(object = obj, method = CCAIntegration, -#' orig.reduction = "pca", new.reduction = 'integrated.', -#' k.anchor = 6, verbose = FALSE) -#' } +#' orig.reduction = "pca", new.reduction = 'integrated.cca', +#' verbose = FALSE) +#' +#' # Modifying parameters +#' # We can also specify parameters such as `k.anchor` to increase the strength of integration +#' obj <- IntegrateLayers(object = obj, method = CCAIntegration, +#' orig.reduction = "pca", new.reduction = 'integrated.cca', +#' k.anchor = 20, verbose = FALSE) +#' +#' # Integrating SCTransformed data +#' obj <- SCTransform(object = obj) +#' obj <- IntegrateLayers(object = obj, method = CCAIntegration, +#' orig.reduction = "pca", new.reduction = 'integrated.cca', +#' assay = "SCT", verbose = FALSE) #' CCAIntegration <- function( object = NULL, @@ -226,14 +248,24 @@ attr(x = CCAIntegration, which = 'Seurat.method') <- 'integration' #' obj <- RunPCA(obj) #' #' # After preprocessing, we run integration -#' +#' obj <- IntegrateLayers(object = obj, method = RPCAIntegration, +#' orig.reduction = "pca", new.reduction = 'integrated.rpca', +#' verbose = FALSE) +#' #' # Reference-based Integration -#' # Here, we use the first layer as a reference and modify the k.anchor parameter. +#' # Here, we use the first layer as a reference for integraion +#' # Thus, we only identify anchors between the reference and the rest of the datasets, saving computational resources +#' obj <- IntegrateLayers(object = obj, method = RPCAIntegration, +#' orig.reduction = "pca", new.reduction = 'integrated.rpca', +#' reference = 1, verbose = FALSE) +#' +#' # Modifying parameters +#' # We can also specify parameters such as `k.anchor` to increase the strength of integration #' obj <- IntegrateLayers(object = obj, method = RPCAIntegration, #' orig.reduction = "pca", new.reduction = 'integrated.rpca', -#' reference = 'data.Smart-seq2', k.anchor = 7, verbose = FALSE) +#' k.anchor = 20, verbose = FALSE) #' -#' # Alternatively, we can integrate SCTransformed data +#' # Integrating SCTransformed data #' obj <- SCTransform(object = obj) #' obj <- IntegrateLayers(object = obj, method = RPCAIntegration, #' orig.reduction = "pca", new.reduction = 'integrated.rpca', diff --git a/man/CCAIntegration.Rd b/man/CCAIntegration.Rd index 3c560da45..797b1147a 100644 --- a/man/CCAIntegration.Rd +++ b/man/CCAIntegration.Rd @@ -47,21 +47,3 @@ search space} \description{ Seurat-CCA Integration } -\examples{ -\dontrun{ -# Preprocessing -obj <- LoadData("pbmcsca") -obj[["RNA"]] <- split(obj[["RNA"]], f = obj$Method) -obj <- NormalizeData(obj) -obj <- FindVariableFeatures(obj) -obj <- ScaleData(obj) -obj <- RunPCA(obj) - -# After preprocessing, we integrate layers. -# We can also modify parameters specific to CCAIntegration, such as k.anchor: -obj <- IntegrateLayers(object = obj, method = CCAIntegration, - orig.reduction = "pca", new.reduction = 'integrated.', - k.anchor = 6, verbose = FALSE) -} - -} diff --git a/man/HarmonyIntegration.Rd b/man/HarmonyIntegration.Rd index 2ba95976f..54ff30e6b 100644 --- a/man/HarmonyIntegration.Rd +++ b/man/HarmonyIntegration.Rd @@ -104,11 +104,23 @@ obj <- FindVariableFeatures(obj) obj <- ScaleData(obj) obj <- RunPCA(obj) -# After preprocessing, we integrate layers with added parameters specific to Harmony: -obj <- IntegrateLayers(object = obj, method = HarmonyIntegration, - new.reduction = 'integrated.', verbose = FALSE, theta = 2, labmbda = 1, sigma = 0.1) +# After preprocessing, we integrate layers with added parameters specific to Harmony: +obj <- IntegrateLayers(object = obj, method = HarmonyIntegration, orig.reduction = "pca", + new.reduction = 'harmony', verbose = FALSE) + +# Modifying Parameters +# We can also add arguments specific to Harmony such as theta, to give more diverse clusters +obj <- IntegrateLayers(object = obj, method = HarmonyIntegration, orig.reduction = "pca", + new.reduction = 'harmony', verbose = FALSE, theta = 3) } +# Integrating SCTransformed data +obj <- SCTransform(object = obj) +obj <- IntegrateLayers(object = obj, method = HarmonyIntegration, + orig.reduction = "pca", new.reduction = 'harmony', + assay = "SCT", verbose = FALSE) + + } \seealso{ \code{\link[harmony:HarmonyMatrix]{harmony::HarmonyMatrix}()} diff --git a/man/RPCAIntegration.Rd b/man/RPCAIntegration.Rd index a6a5124b0..b13c22a4f 100644 --- a/man/RPCAIntegration.Rd +++ b/man/RPCAIntegration.Rd @@ -58,14 +58,24 @@ obj <- ScaleData(obj) obj <- RunPCA(obj) # After preprocessing, we run integration - +obj <- IntegrateLayers(object = obj, method = RPCAIntegration, + orig.reduction = "pca", new.reduction = 'integrated.rpca', + verbose = FALSE) + # Reference-based Integration -# Here, we use the first layer as a reference and modify the k.anchor parameter. +# Here, we use the first layer as a reference for integraion +# Thus, we only identify anchors between the reference and the rest of the datasets, saving computational resources +obj <- IntegrateLayers(object = obj, method = RPCAIntegration, + orig.reduction = "pca", new.reduction = 'integrated.rpca', + reference = 1, verbose = FALSE) + +# Modifying parameters +# We can also specify parameters such as `k.anchor` to increase the strength of integration obj <- IntegrateLayers(object = obj, method = RPCAIntegration, orig.reduction = "pca", new.reduction = 'integrated.rpca', - reference = 'data.Smart-seq2', k.anchor = 7, verbose = FALSE) + k.anchor = 20, verbose = FALSE) -# Alternatively, we can integrate SCTransformed data +# Integrating SCTransformed data obj <- SCTransform(object = obj) obj <- IntegrateLayers(object = obj, method = RPCAIntegration, orig.reduction = "pca", new.reduction = 'integrated.rpca', From 4694c4def24f77a92580ba29c841a9aa890b11dd Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 24 Mar 2023 22:58:19 -0400 Subject: [PATCH 574/979] update sketch inte --- vignettes/ParseBio_sketch_integration.Rmd | 117 ++++++++++------------ 1 file changed, 52 insertions(+), 65 deletions(-) diff --git a/vignettes/ParseBio_sketch_integration.Rmd b/vignettes/ParseBio_sketch_integration.Rmd index 79dafaaee..83b40a64a 100755 --- a/vignettes/ParseBio_sketch_integration.Rmd +++ b/vignettes/ParseBio_sketch_integration.Rmd @@ -54,7 +54,11 @@ options(Seurat.object.assay.version = "v5") We downloaded the original dataset and donor metadata from [Parse Biosciences](https://resources.parsebiosciences.com/dataset-wt-mega-one-million-pbmc-type-1-diabetes), as an h5ad file. While the BPCells package can work directly with h5ad files, for optimal performance, we converted the dataset to the compressed sparse format used by BPCells, as described [here](LINKCONVERSIONVIGNETTE). We create a Seurat object for this dataset. Since the input to `CreateSeuratObject` is a BPCells matrix, the data remains on-disk and is not loaded into memory. After creating the object, we split the dataset into 24 [layers](LINKTOVIGNETTE), one for each sample (i.e. patient), to facilitate integration. ```{r, warning=F, message=F} -object <- readRDS("/brahms/hartmana/vignette_data/parse_1m_pbmc.rds") +parse.mat <- open_matrix_dir(dir = "/brahms/hartmana/vignette_data/bpcells/parse_1m_pbmc") +# need to move +metadata <- readRDS("/brahms/haoy/vignette_data/ParseBio_PBMC_meta.rds") +object <- CreateSeuratObject(counts = parse.mat, meta.data = metadata) + object <- NormalizeData(object) # split assay into 24 layers object[['RNA']] <- split(object[['RNA']], f = object$sample) @@ -85,11 +89,18 @@ object <- IntegrateLayers(object, method = RPCAIntegration, orig = 'pca', object <- FindNeighbors(object, reduction = 'integrated.rpca', dims = 1:30) object <- FindClusters(object, resolution = 2) object <- RunUMAP(object, reduction = 'integrated.rpca', dims = 1:30, return.model = T, verbose = F) + +# you can now rejoin the layers in the sketched assay +# this is required to perform differential expression +object <- JoinLayers(object) +c10_markers <- FindMarkers(object = object, 10, max.cells.per.ident = 500, only.pos = TRUE) +head(c10_markers) + # You can now annotate clusters using marker genes. # We performed this step, and include the results in the 'sketch.celltype' metadata column -# NOTE change celltype.weight -> sketch.celltype in metadata + plot.s1 <- DimPlot(object, group.by = 'sample', reduction = 'umap') + NoLegend() -plot.s2 <- DimPlot(object, group.by = 'celltype.weight', reduction = 'umap') + NoLegend() +plot.s2 <- DimPlot(object, group.by = 'celltype.manual', reduction = 'umap') + NoLegend() ``` ```{r} @@ -98,7 +109,13 @@ plot.s1 + plot.s2 ## Integrate the full datasets Now that we have integrated the subset of atoms of each dataset, placing them each in an integrated low-dimensional space, we can now place each cell from each dataset in this space as well. We load the full datasets back in individually, and use the `ProjectIntegration` function to integrate all cells. After this function is run, the `integrated.rpca.full` space now embeds all cells in the dataset.Even though all cells in the dataset have been integrated together, the non-sketched cells are not loaded into memory. Users can still switch between the `sketch` (sketched cells, in-memory) and `RNA` (full dataset, on disk) for analysis. + ```{r} + +# resplit the sketched cell assay into layers +# this is required to project the integration onto all cells +object[['sketch']] <- split(object[['sketch']], f = object$sample) + object <- ProjectIntegration(object = object, sketched.assay = 'sketch', assay = 'RNA', @@ -111,81 +128,51 @@ object <- RunUMAP(object, reduction = 'integrated.rpca.full', dims = 1:30 , red ```{r} p1 <- DimPlot(object, reduction = 'umap.full', group.by = 'sample',alpha = 0.5) + NoLegend() -p2 <- DimPlot(object, reduction = 'umap.full', group.by = 'celltype.weight', label = T, alpha = 0.5) + NoLegend() +p2 <- DimPlot(object, reduction = 'umap.full', group.by = 'celltype.manual', label = T, alpha = 0.5) + NoLegend() p1 + p2 ``` -## pseudo-bulk +## Compare healthy and diabetic samples + +By integrating all samples together, we can now compare healthy and diabetic cells in matched cell states. To maximize statistical power, we want to use all cells - not just the sketched cells - to perform this analysis. As recommended by [Soneson et al.](https://www.nature.com/articles/s41467-020-19894-4), we use an aggregation-based (pseudobulk) workflow. We aggregate all cells within the same cell type and sample using the `AggregateExpression` function. This returns a Seurat object where each 'cell' represents the pseudobulk profile of one cell type in one individual. + +After we aggregate cells, we can perform celltype-specific differential expression between healthy and diabetic samples using DESeq2. We demonstrate this for CD14 monocytes. + ```{r} -time8_bulk <- system.time( bulk <- AggregateExpression(object, - return.seurat = T, - slot = 'counts', - assays = 'RNA', - group.by = c("celltype.weight","sample","disease") - ) -) +bulk <- AggregateExpression(object, return.seurat = T, slot = 'counts', + assays = 'RNA', group.by = c("celltype.manual","sample")) + +# each sample is an individual-specific celltype-specific pseudobulk profile +tail(Cells(bulk)) + bulk$celltype <- sapply(strsplit(Cells(bulk), split = "_"), '[', 1) bulk$donor <- sapply(strsplit(Cells(bulk), split = "_"), '[', 2) -bulk$disease <- sapply(strsplit(Cells(bulk), split = "_"), '[', 3) -marker.list <- list() -celltype.set <- unique(bulk$celltype ) -for (i in seq_along(celltype.set)) { - bulk.i <- subset(bulk, subset = celltype == celltype.set[i]) - Idents(bulk.i) <- 'disease' - if (any(table(bulk.i$disease) < 3)) { - marker.list[[i]] <- EmptyDF(n = 0) - } else { - marker.list[[i]] <- FindMarkers(bulk.i, ident.1 = 'D',ident.2 = 'H', slot = 'counts', test.use = 'DESeq2', verbose = F ) - } -} -names(marker.list) <- celltype.set -``` -```{r} -marker.list.filter <- lapply(marker.list, function(x) { - if(nrow(x) > 0) { - x <- x[x$p_val_adj < 0.01 & !is.na(x$p_val_adj ),] - } - if (nrow(x) > 0) { - return(x) - } -}) -``` -```{r} -VlnPlot(bulk, features = 'FOXO3', group.by = 'celltype', split.by = 'disease') -``` +bulk$disease <- sapply(strsplit(bulk$donor, split = "-"), '[', 1) + +cd14.bulk <- subset(bulk,celltype == "CD14 Mono") +Idents(cd14.bulk) <- 'disease' +de_markers <- FindMarkers(cd14.bulk, ident.1 = 'D',ident.2 = 'H', slot = 'counts', test.use = 'DESeq2', verbose = F ) +de_markers$gene <- rownames(de_markers) +ggplot(de_markers, aes(avg_log2FC, -log10(p_val_adj))) + geom_point(size=0.5, alpha=0.5) + theme_bw() + ylab("-log10(p-value)")+geom_text_repel(aes(label = ifelse(p_val_adj<0.01, gene, "")),colour = 'red', size = 3) -## computing time summary -```{r} -all_T <- ls(pattern = 'time') -overall <- sum(sapply(all_T, function(x) round(get(x)['elapsed'], digits = 3)))/60 -for (i in 1:length(all_T)) { - T_i <- get(all_T[i])['elapsed'] - if (T_i > 60) { - print(paste(all_T[i], round(T_i/60, digits = 1), 'mins')) - } else { - print(paste(all_T[i], round(T_i, digits = 1), 'secs')) - } -} -print(paste('Total time ', round(overall, digits = 1), 'mins' )) -``` -```{r,fig.height = 20, fig.width = 15} -Idents(bulk) <- 'celltype' -marker <- FindAllMarkers(object = bulk, only.pos = TRUE, verbose = FALSE) -marker %>% - group_by(cluster) %>% - top_n(n = -5, wt = p_val) -> top5 -bulk <- ScaleData(bulk, features = top5$gene) -DoHeatmap(bulk, features = top5$gene) + NoLegend() ``` -```{r save.times, include=TRUE} -print(as.data.frame(all_times)) -write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/ParseBio_sketch_integration.csv") +We do not necessarily expect to see a strong transcriptomic signature of diabetes in the blood, but our analyses reveals multiple genes that are up-regulated in diabetic patients, and are consistent across multiple individuals. Some of these genes, including the complement subcomponent C1R, have been [previously associated with diabetes](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6927818/). Others, including the transcription factor SPDEF and the receptor RAPSN, are also diabetic biomarkers in multiple cell types, including CD14 monocytes. + +```{r,height = 15, width=6} +# each dot represents a pseudobulk average from an individual + +p1 <- VlnPlot(bulk, features = c("C1R"),group.by = 'celltype', split.by = 'disease') +p2 <- VlnPlot(bulk, features = c("C1R"),group.by = 'celltype', split.by = 'disease') +p3 <- VlnPlot(bulk, features = c("C1R"),group.by = 'celltype', split.by = 'disease') +p1+p2+p3 + ``` +
    **Session Info** ```{r} sessionInfo() ``` -
    +
    \ No newline at end of file From 5453561322e1962dee5cda865e5152fddff3f729 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sat, 25 Mar 2023 08:40:44 -0400 Subject: [PATCH 575/979] update sketch mouse features --- vignettes/seurat5_sketch_analysis.Rmd | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vignettes/seurat5_sketch_analysis.Rmd b/vignettes/seurat5_sketch_analysis.Rmd index 1fb322be9..61937b093 100644 --- a/vignettes/seurat5_sketch_analysis.Rmd +++ b/vignettes/seurat5_sketch_analysis.Rmd @@ -105,8 +105,8 @@ obj <- RunUMAP(obj, dims = 1:50, return.model = T) DimPlot(obj, label = T, reduction = 'umap') + NoLegend() ``` -```{r,fig.height = 7, fig.width = 15} -FeaturePlot(obj, c('Igfbp7', 'Neurod6', 'Dlx2', 'Eomes', 'Vim', 'Reln', 'Olig1', 'C1qa'), ncol = 4) +```{r,fig.height = 7, fig.width = 10} +FeaturePlot(obj, c('Igfbp7', 'Neurod6', 'Dlx2','Gad2', 'Eomes', 'Vim', 'Reln', 'Olig1', 'C1qa'), ncol = 3) ``` ## Extend results to the full datasets @@ -184,8 +184,8 @@ DimPlot(obj.sub, label = T) + NoLegend() Note that we can start to see distinct interneuron lineages emerging in this dataset. We can see a clear separation of interneuron precursors that originated from the medial ganglionic eminence (Lhx6) or caudal ganglionic eminence (Nr2f2). We can further see the emergence of Sst (Sst) and Pvalb (Mef2c)-committed interneurons, and a CGE-derived Meis2-expressing progenitor population. These results closely mirror our findings from [Mayer*, Hafemeister*, Bandler* et al, Nature 2018](https://www.nature.com/articles/nature25999), where we enriched for interneuron precursors using a Dlx6a-cre fate-mapping strategy. Here, we obtain similar results using only computational enrichment, enabled by the large size of the original dataset. -```{r,fig.height = 7, fig.width = 15} -FeaturePlot(obj.sub, c('Dlx2', 'Lhx6', 'Nr2f2', 'Sst', 'Pvalb', 'Meis2', 'Vip', 'Dlx6os1'), ncol = 4) +```{r,fig.height = 7, fig.width = 10} +FeaturePlot(obj.sub, c('Dlx2', 'Gad2', 'Lhx6', 'Nr2f2', 'Sst', 'Pvalb', 'Meis2', 'Vip', 'Dlx6os1'), ncol = 3) ``` ```{r save.times, include=FALSE, eval=FALSE} From 0b3f09a125d38bf5cb02b342513e8fd771cd4e9b Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sat, 25 Mar 2023 08:45:32 -0400 Subject: [PATCH 576/979] update sketch inte --- vignettes/ParseBio_sketch_integration.Rmd | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/vignettes/ParseBio_sketch_integration.Rmd b/vignettes/ParseBio_sketch_integration.Rmd index 83b40a64a..6bf4962c0 100755 --- a/vignettes/ParseBio_sketch_integration.Rmd +++ b/vignettes/ParseBio_sketch_integration.Rmd @@ -89,10 +89,12 @@ object <- IntegrateLayers(object, method = RPCAIntegration, orig = 'pca', object <- FindNeighbors(object, reduction = 'integrated.rpca', dims = 1:30) object <- FindClusters(object, resolution = 2) object <- RunUMAP(object, reduction = 'integrated.rpca', dims = 1:30, return.model = T, verbose = F) +``` +```{r} # you can now rejoin the layers in the sketched assay # this is required to perform differential expression -object <- JoinLayers(object) +object[['sketch']] <- JoinLayers(object[['sketch']]) c10_markers <- FindMarkers(object = object, 10, max.cells.per.ident = 500, only.pos = TRUE) head(c10_markers) @@ -103,7 +105,7 @@ plot.s1 <- DimPlot(object, group.by = 'sample', reduction = 'umap') + NoLegend() plot.s2 <- DimPlot(object, group.by = 'celltype.manual', reduction = 'umap') + NoLegend() ``` -```{r} +```{r, fig.width=10, fig.height=5} plot.s1 + plot.s2 ``` @@ -126,7 +128,7 @@ object <- ProjectIntegration(object = object, object <- RunUMAP(object, reduction = 'integrated.rpca.full', dims = 1:30 , reduction.name = 'umap.full', reduction.key = 'UMAP_full_') ``` -```{r} +```{r,fig.width=10, fig.height=5} p1 <- DimPlot(object, reduction = 'umap.full', group.by = 'sample',alpha = 0.5) + NoLegend() p2 <- DimPlot(object, reduction = 'umap.full', group.by = 'celltype.manual', label = T, alpha = 0.5) + NoLegend() p1 + p2 From 38e2286e7af88a1c1408fbbbded359f04f86927e Mon Sep 17 00:00:00 2001 From: Gesmira Date: Sat, 25 Mar 2023 10:48:57 -0400 Subject: [PATCH 577/979] adding SeuratData:: to docu --- R/integration5.R | 6 +++--- man/HarmonyIntegration.Rd | 2 +- man/RPCAIntegration.Rd | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/R/integration5.R b/R/integration5.R index bfc4ccae5..0e913c7eb 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -36,7 +36,7 @@ NULL #' @examples #' \dontrun{ #' # Preprocessing -#' obj <- LoadData("pbmcsca") +#' obj <- SeuratData::LoadData("pbmcsca") #' obj[["RNA"]] <- split(obj[["RNA"]], f = obj$Method) #' obj <- NormalizeData(obj) #' obj <- FindVariableFeatures(obj) @@ -148,7 +148,7 @@ attr(x = HarmonyIntegration, which = 'Seurat.method') <- 'integration' #' @examples #' \dontrun{ #' # Preprocessing -#' obj <- LoadData("pbmcsca") +#' obj <- SeuratData::LoadData("pbmcsca") #' obj[["RNA"]] <- split(obj[["RNA"]], f = obj$Method) #' obj <- NormalizeData(obj) #' obj <- FindVariableFeatures(obj) @@ -240,7 +240,7 @@ attr(x = CCAIntegration, which = 'Seurat.method') <- 'integration' #' @examples #' \dontrun{ #' # Preprocessing -#' obj <- LoadData("pbmcsca") +#' obj <- SeuratData::LoadData("pbmcsca") #' obj[["RNA"]] <- split(obj[["RNA"]], f = obj$Method) #' obj <- NormalizeData(obj) #' obj <- FindVariableFeatures(obj) diff --git a/man/HarmonyIntegration.Rd b/man/HarmonyIntegration.Rd index 54ff30e6b..82e382f1e 100644 --- a/man/HarmonyIntegration.Rd +++ b/man/HarmonyIntegration.Rd @@ -97,7 +97,7 @@ to be installed \examples{ \dontrun{ # Preprocessing -obj <- LoadData("pbmcsca") +obj <- SeuratData::LoadData("pbmcsca") obj[["RNA"]] <- split(obj[["RNA"]], f = obj$Method) obj <- NormalizeData(obj) obj <- FindVariableFeatures(obj) diff --git a/man/RPCAIntegration.Rd b/man/RPCAIntegration.Rd index b13c22a4f..f7d6d60ea 100644 --- a/man/RPCAIntegration.Rd +++ b/man/RPCAIntegration.Rd @@ -50,7 +50,7 @@ Seurat-RPCA Integration \examples{ \dontrun{ # Preprocessing -obj <- LoadData("pbmcsca") +obj <- SeuratData::LoadData("pbmcsca") obj[["RNA"]] <- split(obj[["RNA"]], f = obj$Method) obj <- NormalizeData(obj) obj <- FindVariableFeatures(obj) From 32d25d872c44aa5e2eed89f079b2d344e070c60d Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Sat, 25 Mar 2023 13:28:26 -0400 Subject: [PATCH 578/979] update niche reference --- vignettes/seurat5_spatial_vignette_2.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/seurat5_spatial_vignette_2.Rmd b/vignettes/seurat5_spatial_vignette_2.Rmd index f25391417..7a04b971f 100644 --- a/vignettes/seurat5_spatial_vignette_2.Rmd +++ b/vignettes/seurat5_spatial_vignette_2.Rmd @@ -356,7 +356,7 @@ keep.cells <- Cells(xenium.obj)[!is.na(xenium.obj$predicted.celltype)] xenium.obj <- subset(xenium.obj, cells = keep.cells) ``` -While the previous analyses consider each cell independently, spatial data enables cells to be defined not just by their neighborhood, but also by their broader spatial context. In Seurat v5, we introduce support for 'niche' analysis of spatial data, which demarcates regions of tissue ('niches'), each of which is defined by a different composition of spatially adjacent cell types. Inspired by the method in [He et al, NBT 2022](https://www.nature.com/articles/s41587-022-01483-z), we consider the 'local neighborhood' for each cell - consisting of its `k.neighbor` spatially closest neighbors, and count the occurrences of each cell type present in this neighborhood. We then use k-means clustering to group cells that have similar neighborhoods together, into spatial niches. +While the previous analyses consider each cell independently, spatial data enables cells to be defined not just by their neighborhood, but also by their broader spatial context. In Seurat v5, we introduce support for 'niche' analysis of spatial data, which demarcates regions of tissue ('niches'), each of which is defined by a different composition of spatially adjacent cell types. Inspired by methods in [Goltsev et al, Cell 2018](https://www.sciencedirect.com/science/article/pii/S0092867418309048) and [He et al, NBT 2022](https://www.nature.com/articles/s41587-022-01483-z), we consider the 'local neighborhood' for each cell - consisting of its `k.neighbor` spatially closest neighbors, and count the occurrences of each cell type present in this neighborhood. We then use k-means clustering to group cells that have similar neighborhoods together, into spatial niches. We call the `BuildNicheAssay` function from within Seurat to construct a new assay called `niche` containing the cell type composition spatially neighboring each cell. A metadata column called `niches` is also returned, which contains cluster assignments based on the niche assay. From b8e38e3d9d580ea14ef27a3b4d1b5468b94254a8 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sat, 25 Mar 2023 13:55:58 -0400 Subject: [PATCH 579/979] fix mouse brain gene --- vignettes/seurat5_sketch_analysis.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/seurat5_sketch_analysis.Rmd b/vignettes/seurat5_sketch_analysis.Rmd index 61937b093..4088a990e 100644 --- a/vignettes/seurat5_sketch_analysis.Rmd +++ b/vignettes/seurat5_sketch_analysis.Rmd @@ -185,7 +185,7 @@ Note that we can start to see distinct interneuron lineages emerging in this dat These results closely mirror our findings from [Mayer*, Hafemeister*, Bandler* et al, Nature 2018](https://www.nature.com/articles/nature25999), where we enriched for interneuron precursors using a Dlx6a-cre fate-mapping strategy. Here, we obtain similar results using only computational enrichment, enabled by the large size of the original dataset. ```{r,fig.height = 7, fig.width = 10} -FeaturePlot(obj.sub, c('Dlx2', 'Gad2', 'Lhx6', 'Nr2f2', 'Sst', 'Pvalb', 'Meis2', 'Vip', 'Dlx6os1'), ncol = 3) +FeaturePlot(obj.sub, c('Dlx2', 'Gad2', 'Lhx6', 'Nr2f2', 'Sst', 'Mef2c', 'Meis2', 'Id2', 'Dlx6os1'), ncol = 3) ``` ```{r save.times, include=FALSE, eval=FALSE} From d75c06b4d87eaef394d3628fdc473410e4c14480 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sat, 25 Mar 2023 17:22:17 -0400 Subject: [PATCH 580/979] parseBio inte edits --- vignettes/ParseBio_sketch_integration.Rmd | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vignettes/ParseBio_sketch_integration.Rmd b/vignettes/ParseBio_sketch_integration.Rmd index 6bf4962c0..f35cc2661 100755 --- a/vignettes/ParseBio_sketch_integration.Rmd +++ b/vignettes/ParseBio_sketch_integration.Rmd @@ -46,6 +46,7 @@ library(Seurat) library(BPCells) library(dplyr) library(ggplot2) +library(ggrepel) # set this option when analyzing large datasets options(future.globals.maxSize = 3e9) options(Seurat.object.assay.version = "v5") @@ -161,7 +162,7 @@ ggplot(de_markers, aes(avg_log2FC, -log10(p_val_adj))) + geom_point(size=0.5, al We do not necessarily expect to see a strong transcriptomic signature of diabetes in the blood, but our analyses reveals multiple genes that are up-regulated in diabetic patients, and are consistent across multiple individuals. Some of these genes, including the complement subcomponent C1R, have been [previously associated with diabetes](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6927818/). Others, including the transcription factor SPDEF and the receptor RAPSN, are also diabetic biomarkers in multiple cell types, including CD14 monocytes. -```{r,height = 15, width=6} +```{r,height = 25, width=6} # each dot represents a pseudobulk average from an individual p1 <- VlnPlot(bulk, features = c("C1R"),group.by = 'celltype', split.by = 'disease') From 8a9c2348fe9ede65608288a3bae1f94d5ec19277 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sat, 25 Mar 2023 19:07:38 -0400 Subject: [PATCH 581/979] set sct v2 default --- R/preprocessing.R | 10 ++++++++++ vignettes/ParseBio_sketch_integration.Rmd | 11 +++++------ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index 8213a5d0f..1e23c6eb0 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -2986,6 +2986,9 @@ SampleUMI <- function( #' @param do.center Whether to center residuals to have mean zero; default is TRUE #' @param clip.range Range to clip the residuals to; default is \code{c(-sqrt(n/30), sqrt(n/30))}, #' where n is the number of cells +#' @param vst.flavor When set to 'v2' sets method = glmGamPoi_offset, n_cells=2000, +#' and exclude_poisson = TRUE which causes the model to learn theta and intercept +#' only besides excluding poisson genes from learning and regularization #' @param conserve.memory If set to TRUE the residual matrix for all genes is never #' created in full; useful for large data sets, but will take longer to run; #' this will also set return.only.var.genes to TRUE; default is FALSE @@ -3024,6 +3027,7 @@ SCTransform.default <- function( do.scale = FALSE, do.center = TRUE, clip.range = c(-sqrt(x = ncol(x = umi) / 30), sqrt(x = ncol(x = umi) / 30)), + vst.flavor = 'v2', conserve.memory = FALSE, return.only.var.genes = TRUE, seed.use = 1448145, @@ -3082,6 +3086,8 @@ SCTransform.default <- function( immediate. = TRUE ) } + + vst.args[['vst.flavor']] <- vst.flavor vst.args[['umi']] <- umi vst.args[['cell_attr']] <- cell.attr vst.args[['verbosity']] <- as.numeric(x = verbose) * 2 @@ -3279,6 +3285,7 @@ SCTransform.Assay <- function( do.scale = FALSE, do.center = TRUE, clip.range = c(-sqrt(x = ncol(x = object) / 30), sqrt(x = ncol(x = object) / 30)), + vst.flavor = 'v2', conserve.memory = FALSE, return.only.var.genes = TRUE, seed.use = 1448145, @@ -3305,6 +3312,7 @@ SCTransform.Assay <- function( do.scale = do.scale, do.center = do.center, clip.range = clip.range, + vst.flavor = vst.flavor, conserve.memory = conserve.memory, return.only.var.genes = return.only.var.genes, seed.use = seed.use, @@ -3369,6 +3377,7 @@ SCTransform.Seurat <- function( do.scale = FALSE, do.center = TRUE, clip.range = c(-sqrt(x = ncol(x = object[[assay]]) / 30), sqrt(x = ncol(x = object[[assay]]) / 30)), + vst.flavor = "v2", conserve.memory = FALSE, return.only.var.genes = TRUE, seed.use = 1448145, @@ -3393,6 +3402,7 @@ SCTransform.Seurat <- function( do.scale = do.scale, do.center = do.center, clip.range = clip.range, + vst.flavor = vst.flavor, conserve.memory = conserve.memory, return.only.var.genes = return.only.var.genes, seed.use = seed.use, diff --git a/vignettes/ParseBio_sketch_integration.Rmd b/vignettes/ParseBio_sketch_integration.Rmd index f35cc2661..17248cb6b 100755 --- a/vignettes/ParseBio_sketch_integration.Rmd +++ b/vignettes/ParseBio_sketch_integration.Rmd @@ -143,7 +143,7 @@ After we aggregate cells, we can perform celltype-specific differential expressi ```{r} bulk <- AggregateExpression(object, return.seurat = T, slot = 'counts', - assays = 'RNA', group.by = c("celltype.manual","sample")) + assays = 'RNA', group.by = c("celltype.manual","sample", 'disease')) # each sample is an individual-specific celltype-specific pseudobulk profile tail(Cells(bulk)) @@ -151,6 +151,8 @@ tail(Cells(bulk)) bulk$celltype <- sapply(strsplit(Cells(bulk), split = "_"), '[', 1) bulk$donor <- sapply(strsplit(Cells(bulk), split = "_"), '[', 2) bulk$disease <- sapply(strsplit(bulk$donor, split = "-"), '[', 1) +bulk$disease <- factor(x = bulk$disease, levels = c('H', 'D')) + cd14.bulk <- subset(bulk,celltype == "CD14 Mono") Idents(cd14.bulk) <- 'disease' @@ -162,13 +164,10 @@ ggplot(de_markers, aes(avg_log2FC, -log10(p_val_adj))) + geom_point(size=0.5, al We do not necessarily expect to see a strong transcriptomic signature of diabetes in the blood, but our analyses reveals multiple genes that are up-regulated in diabetic patients, and are consistent across multiple individuals. Some of these genes, including the complement subcomponent C1R, have been [previously associated with diabetes](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6927818/). Others, including the transcription factor SPDEF and the receptor RAPSN, are also diabetic biomarkers in multiple cell types, including CD14 monocytes. -```{r,height = 25, width=6} +```{r,height = 12, width=6} # each dot represents a pseudobulk average from an individual +VlnPlot(bulk, features = c("NAMPTP1"),group.by = 'celltype', split.by = 'disease', cols = c('#377eb8','#e41a1c')) -p1 <- VlnPlot(bulk, features = c("C1R"),group.by = 'celltype', split.by = 'disease') -p2 <- VlnPlot(bulk, features = c("C1R"),group.by = 'celltype', split.by = 'disease') -p3 <- VlnPlot(bulk, features = c("C1R"),group.by = 'celltype', split.by = 'disease') -p1+p2+p3 ``` From cfa236c28d9c5539ea042da92f5617ef9336af75 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sat, 25 Mar 2023 19:18:21 -0400 Subject: [PATCH 582/979] error message multi layers findMarker --- R/differential_expression.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/differential_expression.R b/R/differential_expression.R index e206d01dd..2dd9033e5 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -641,6 +641,9 @@ FindMarkers.Assay <- function( yes = 'counts', no = slot ) + if (length(x = Layers(object = object, search = slot)) > 1) { + stop(slot, ' layers are not joined. Please run JoinLayers') + } data.use <- GetAssayData(object = object, slot = data.slot) counts <- switch( EXPR = data.slot, From 78e59e5f2b3b881d3d93c38a2e2ec95043ebc53c Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sat, 25 Mar 2023 19:29:30 -0400 Subject: [PATCH 583/979] update pseudocount --- R/differential_expression.R | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/R/differential_expression.R b/R/differential_expression.R index 2dd9033e5..f46437266 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -505,12 +505,11 @@ FindMarkers.default <- function( latent.vars = NULL, min.cells.feature = 3, min.cells.group = 3, - pseudocount.use = 1, + pseudocount.use = 0.1, fc.results = NULL, densify = FALSE, ... ) { - pseudocount.use <- pseudocount.use %||% 1 ValidateCellGroups( object = object, cells.1 = cells.1, @@ -627,7 +626,7 @@ FindMarkers.Assay <- function( latent.vars = NULL, min.cells.feature = 3, min.cells.group = 3, - pseudocount.use = 1, + pseudocount.use = 0.1, mean.fxn = NULL, fc.name = NULL, base = 2, @@ -635,7 +634,6 @@ FindMarkers.Assay <- function( norm.method = NULL, ... ) { - pseudocount.use <- pseudocount.use %||% 1 data.slot <- ifelse( test = test.use %in% DEmethods_counts(), yes = 'counts', @@ -717,7 +715,7 @@ FindMarkers.SCTAssay <- function( latent.vars = NULL, min.cells.feature = 3, min.cells.group = 3, - pseudocount.use = 1, + pseudocount.use = 0.1, mean.fxn = NULL, fc.name = NULL, base = 2, @@ -725,7 +723,6 @@ FindMarkers.SCTAssay <- function( recorrect_umi = TRUE, ... ) { - pseudocount.use <- pseudocount.use %||% 1 data.slot <- ifelse( test = test.use %in% DEmethods_counts(), yes = 'counts', @@ -826,14 +823,13 @@ FindMarkers.DimReduc <- function( latent.vars = NULL, min.cells.feature = 3, min.cells.group = 3, - pseudocount.use = 1, + pseudocount.use = 0.1, mean.fxn = rowMeans, fc.name = NULL, densify = FALSE, ... ) { - pseudocount.use <- pseudocount.use %||% 1 if (test.use %in% DEmethods_counts()) { stop("The following tests cannot be used for differential expression on a reduction as they assume a count model: ", paste(DEmethods_counts(), collapse=", ")) @@ -1100,14 +1096,13 @@ FoldChange.Assay <- function( cells.2, features = NULL, slot = "data", - pseudocount.use = 1, + pseudocount.use = 0.1, fc.name = NULL, mean.fxn = NULL, base = 2, norm.method = NULL, ... ) { - pseudocount.use <- pseudocount.use %||% 1 data <- GetAssayData(object = object, slot = slot) default.mean.fxn <- function(x) { return(log(x = rowMeans(x = x) + pseudocount.use, base = base)) @@ -1161,12 +1156,11 @@ FoldChange.DimReduc <- function( cells.2, features = NULL, slot = NULL, - pseudocount.use = 1, + pseudocount.use = 0.1, fc.name = NULL, mean.fxn = NULL, ... ) { - pseudocount.use <- pseudocount.use %||% 1 mean.fxn <- mean.fxn %||% rowMeans fc.name <- fc.name %||% "avg_diff" data <- t(x = Embeddings(object = object)) @@ -1214,7 +1208,7 @@ FoldChange.Seurat <- function( slot = 'data', reduction = NULL, features = NULL, - pseudocount.use = NULL, + pseudocount.use = 0.1, mean.fxn = NULL, base = 2, fc.name = NULL, From 5bf96d930de6fa243ccf37156cc23d2390096cde Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sat, 25 Mar 2023 19:39:38 -0400 Subject: [PATCH 584/979] update pseudo count use --- R/differential_expression.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/differential_expression.R b/R/differential_expression.R index f46437266..b0333566d 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -474,7 +474,7 @@ FindConservedMarkers <- function( #' of the two groups, currently only used for poisson and negative binomial tests #' @param min.cells.group Minimum number of cells in one of the groups #' @param pseudocount.use Pseudocount to add to averaged expression values when -#' calculating logFC. 1 by default. +#' calculating logFC. 0.1 by default. #' @param fc.results data.frame from FoldChange #' @param densify Convert the sparse matrix to a dense form before running the DE test. This can provide speedups but might require higher memory; default is FALSE #' @@ -934,6 +934,7 @@ FindMarkers.Seurat <- function( reduction = NULL, features = NULL, logfc.threshold = 0.25, + pseudocount.use = 0.1, test.use = "wilcox", min.pct = 0.1, min.diff.pct = -Inf, @@ -1022,6 +1023,7 @@ FindMarkers.Seurat <- function( cells.2 = cells$cells.2, features = features, logfc.threshold = logfc.threshold, + pseudocount.use = pseudocount.use, test.use = test.use, min.pct = min.pct, min.diff.pct = min.diff.pct, From fee3d8b8630fe6c9c3b18b7409cf85a53a8e6af4 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sat, 25 Mar 2023 19:41:14 -0400 Subject: [PATCH 585/979] update docu --- man/FindMarkers.Rd | 11 ++++++----- man/FoldChange.Rd | 6 +++--- man/SCTransform.Rd | 7 +++++++ 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/man/FindMarkers.Rd b/man/FindMarkers.Rd index 160c69c0d..b53f4087e 100644 --- a/man/FindMarkers.Rd +++ b/man/FindMarkers.Rd @@ -30,7 +30,7 @@ FindMarkers(object, ...) latent.vars = NULL, min.cells.feature = 3, min.cells.group = 3, - pseudocount.use = 1, + pseudocount.use = 0.1, fc.results = NULL, densify = FALSE, ... @@ -53,7 +53,7 @@ FindMarkers(object, ...) latent.vars = NULL, min.cells.feature = 3, min.cells.group = 3, - pseudocount.use = 1, + pseudocount.use = 0.1, mean.fxn = NULL, fc.name = NULL, base = 2, @@ -79,7 +79,7 @@ FindMarkers(object, ...) latent.vars = NULL, min.cells.feature = 3, min.cells.group = 3, - pseudocount.use = 1, + pseudocount.use = 0.1, mean.fxn = NULL, fc.name = NULL, base = 2, @@ -104,7 +104,7 @@ FindMarkers(object, ...) latent.vars = NULL, min.cells.feature = 3, min.cells.group = 3, - pseudocount.use = 1, + pseudocount.use = 0.1, mean.fxn = rowMeans, fc.name = NULL, densify = FALSE, @@ -122,6 +122,7 @@ FindMarkers(object, ...) reduction = NULL, features = NULL, logfc.threshold = 0.25, + pseudocount.use = 0.1, test.use = "wilcox", min.pct = 0.1, min.diff.pct = -Inf, @@ -227,7 +228,7 @@ of the two groups, currently only used for poisson and negative binomial tests} \item{min.cells.group}{Minimum number of cells in one of the groups} \item{pseudocount.use}{Pseudocount to add to averaged expression values when -calculating logFC. 1 by default.} +calculating logFC. 0.1 by default.} \item{fc.results}{data.frame from FoldChange} diff --git a/man/FoldChange.Rd b/man/FoldChange.Rd index edda396b5..ec8cc72b4 100644 --- a/man/FoldChange.Rd +++ b/man/FoldChange.Rd @@ -18,7 +18,7 @@ FoldChange(object, ...) cells.2, features = NULL, slot = "data", - pseudocount.use = 1, + pseudocount.use = 0.1, fc.name = NULL, mean.fxn = NULL, base = 2, @@ -32,7 +32,7 @@ FoldChange(object, ...) cells.2, features = NULL, slot = NULL, - pseudocount.use = 1, + pseudocount.use = 0.1, fc.name = NULL, mean.fxn = NULL, ... @@ -48,7 +48,7 @@ FoldChange(object, ...) slot = "data", reduction = NULL, features = NULL, - pseudocount.use = NULL, + pseudocount.use = 0.1, mean.fxn = NULL, base = 2, fc.name = NULL, diff --git a/man/SCTransform.Rd b/man/SCTransform.Rd index 849f51180..c4cd0a40b 100644 --- a/man/SCTransform.Rd +++ b/man/SCTransform.Rd @@ -22,6 +22,7 @@ SCTransform(object, ...) do.scale = FALSE, do.center = TRUE, clip.range = c(-sqrt(x = ncol(x = umi)/30), sqrt(x = ncol(x = umi)/30)), + vst.flavor = "v2", conserve.memory = FALSE, return.only.var.genes = TRUE, seed.use = 1448145, @@ -42,6 +43,7 @@ SCTransform(object, ...) do.scale = FALSE, do.center = TRUE, clip.range = c(-sqrt(x = ncol(x = object)/30), sqrt(x = ncol(x = object)/30)), + vst.flavor = "v2", conserve.memory = FALSE, return.only.var.genes = TRUE, seed.use = 1448145, @@ -64,6 +66,7 @@ SCTransform(object, ...) do.center = TRUE, clip.range = c(-sqrt(x = ncol(x = object[[assay]])/30), sqrt(x = ncol(x = object[[assay]])/30)), + vst.flavor = "v2", conserve.memory = FALSE, return.only.var.genes = TRUE, seed.use = 1448145, @@ -109,6 +112,10 @@ regression. For example, percent.mito. Default is NULL} \item{clip.range}{Range to clip the residuals to; default is \code{c(-sqrt(n/30), sqrt(n/30))}, where n is the number of cells} +\item{vst.flavor}{When set to 'v2' sets method = glmGamPoi_offset, n_cells=2000, +and exclude_poisson = TRUE which causes the model to learn theta and intercept +only besides excluding poisson genes from learning and regularization} + \item{conserve.memory}{If set to TRUE the residual matrix for all genes is never created in full; useful for large data sets, but will take longer to run; this will also set return.only.var.genes to TRUE; default is FALSE} From 7f56268c7a177883e99ce447cb8f85c629a71944 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sat, 25 Mar 2023 21:46:48 -0400 Subject: [PATCH 586/979] update parseBio inte --- vignettes/ParseBio_sketch_integration.Rmd | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/vignettes/ParseBio_sketch_integration.Rmd b/vignettes/ParseBio_sketch_integration.Rmd index 17248cb6b..0498478ee 100755 --- a/vignettes/ParseBio_sketch_integration.Rmd +++ b/vignettes/ParseBio_sketch_integration.Rmd @@ -96,14 +96,14 @@ object <- RunUMAP(object, reduction = 'integrated.rpca', dims = 1:30, return.mo # you can now rejoin the layers in the sketched assay # this is required to perform differential expression object[['sketch']] <- JoinLayers(object[['sketch']]) -c10_markers <- FindMarkers(object = object, 10, max.cells.per.ident = 500, only.pos = TRUE) +c10_markers <- FindMarkers(object = object, ident.1 = 10, max.cells.per.ident = 500, only.pos = TRUE) head(c10_markers) # You can now annotate clusters using marker genes. # We performed this step, and include the results in the 'sketch.celltype' metadata column -plot.s1 <- DimPlot(object, group.by = 'sample', reduction = 'umap') + NoLegend() -plot.s2 <- DimPlot(object, group.by = 'celltype.manual', reduction = 'umap') + NoLegend() +plot.s1 <- DimPlot(object, group.by = 'sample', reduction = 'umap') +plot.s2 <- DimPlot(object, group.by = 'celltype.manual', reduction = 'umap') ``` ```{r, fig.width=10, fig.height=5} @@ -130,8 +130,8 @@ object <- RunUMAP(object, reduction = 'integrated.rpca.full', dims = 1:30 , red ``` ```{r,fig.width=10, fig.height=5} -p1 <- DimPlot(object, reduction = 'umap.full', group.by = 'sample',alpha = 0.5) + NoLegend() -p2 <- DimPlot(object, reduction = 'umap.full', group.by = 'celltype.manual', label = T, alpha = 0.5) + NoLegend() +p1 <- DimPlot(object, reduction = 'umap.full', group.by = 'sample',alpha = 0.1) + NoLegend() +p2 <- DimPlot(object, reduction = 'umap.full', group.by = 'celltype.manual', label = T, alpha = 0.1) + NoLegend() p1 + p2 ``` @@ -166,7 +166,7 @@ We do not necessarily expect to see a strong transcriptomic signature of diabete ```{r,height = 12, width=6} # each dot represents a pseudobulk average from an individual -VlnPlot(bulk, features = c("NAMPTP1"),group.by = 'celltype', split.by = 'disease', cols = c('#377eb8','#e41a1c')) +VlnPlot(bulk, features = c("C1R"),group.by = 'celltype', split.by = 'disease', cols = c('#377eb8','#e41a1c')) ``` From fdfd673b01578d5f03786069851752e946cd2cf2 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Sun, 26 Mar 2023 16:13:41 -0400 Subject: [PATCH 587/979] add essential commands; update main page format --- _pkgdown.yaml | 4 +- index.md | 28 +- vignettes/get_started_v5.Rmd | 31 +-- vignettes/seurat5_essential_commands.Rmd | 327 ++++++----------------- vignettes/vignettes_v5.yaml | 81 +++--- 5 files changed, 163 insertions(+), 308 deletions(-) diff --git a/_pkgdown.yaml b/_pkgdown.yaml index bf124eaf5..da41f2eb6 100644 --- a/_pkgdown.yaml +++ b/_pkgdown.yaml @@ -15,10 +15,10 @@ navbar: left: - text: "Install" href: articles/install.html - - text: "Get started" - href: articles/get_started.html - text: "Seurat v5" href: articles/get_started_v5.html + - text: "Get started" + href: articles/get_started.html - text: "Vignettes" menu: - text: Introductory Vignettes diff --git a/index.md b/index.md index 6f2d34486..ad2694402 100644 --- a/index.md +++ b/index.md @@ -1,30 +1,36 @@ ![](articles/assets/seurat_banner.jpg) -# Beta release of Seurat v5 +## **Beta release of Seurat v5** We are excited to release an initial beta version of Seurat v5! This update brings the following new features and functionality: -* **Analysis of sequencing and imaging-based spatial datasets.** Spatially resolved datasets are redefining our understanding of cellular interactions and the organization of human tissues. Both sequencing-based(i.e. Visium, SLIDE-seq, etc.), and imaging-based (MERFISH/Vizgen, Xenium, CosMX, etc.) technologies have unique advantages, and require tailored analytical methods and software infrastructure. In Seurat v5, we introduce flexible and diverse support for a wide variety of spatially resolved data types, and support for analytical techniqiues for scRNA-seq integration, deconvolution, and niche identification. +* **Analysis of sequencing and imaging-based spatial datasets:** Spatially resolved datasets are redefining our understanding of cellular interactions and the organization of human tissues. Both sequencing-based(i.e. Visium, SLIDE-seq, etc.), and imaging-based (MERFISH/Vizgen, Xenium, CosMX, etc.) technologies have unique advantages, and require tailored analytical methods and software infrastructure. In Seurat v5, we introduce flexible and diverse support for a wide variety of spatially resolved data types, and support for analytical techniqiues for scRNA-seq integration, deconvolution, and niche identification. - Vignette: [Analysis of spatial datasets (Sequencing-based)](articles/seurat5_spatial_vignette.html) - Vignette: [Analysis of spatial datasets (Imaging-based)](articles/seurat5_spatial_vignette_2.html)\ - -* **Integrative multimodal analysis.** The cellular transcriptome is just one aspect of cellular identity, and recent technologies enable routine profiling of chromatin accessibility, histone modifications, and protein levels from single cells. In Seurat v5, we introduce 'bridge integration', a statistical method to integrate experiments measuring different modalities (i.e. separate scRNA-seq and scATAC-seq datasets), using a separate multiomic dataset as a molecular 'bridge'. For example, we demonstrate how to map scATAC-seq datasets onto scRNA-seq datasets, to assist users in interpreting and annotating data from new modalities.\ - We recognize that while the goal of matching shared cell types across datasets may be important for many problems, users may also be concerned about which method to use, or that integration could result in a loss of biological resolution. In Seurat v5, we also introduce flexible and streamlined workflows for the integration of multiple scRNA-seq datasets. This makes it easier to explore the results of different integration methods, and to compare these results to a workflow that excludes integration steps. +\ +* **Integrative multimodal analysis:** The cellular transcriptome is just one aspect of cellular identity, and recent technologies enable routine profiling of chromatin accessibility, histone modifications, and protein levels from single cells. In Seurat v5, we introduce 'bridge integration', a statistical method to integrate experiments measuring different modalities (i.e. separate scRNA-seq and scATAC-seq datasets), using a separate multiomic dataset as a molecular 'bridge'. For example, we demonstrate how to map scATAC-seq datasets onto scRNA-seq datasets, to assist users in interpreting and annotating data from new modalities.\ +\ +We recognize that while the goal of matching shared cell types across datasets may be important for many problems, users may also be concerned about which method to use, or that integration could result in a loss of biological resolution. In Seurat v5, we also introduce flexible and streamlined workflows for the integration of multiple scRNA-seq datasets. This makes it easier to explore the results of different integration methods, and to compare these results to a workflow that excludes integration steps. - Paper: [Dictionary learning for integrative, multimodal, and scalable single-cell analysis](https://doi.org/10.1101/2022.02.24.481684) - - Vignette: [Cross-modality Bridge Integration](articles/seurat5_integration_bridge.html) + - Vignette: [Streamlined integration of scRNA-seq data](articles/seurat5_integration.html) + - Vignette: [Cross-modality bridge integration](articles/seurat5_integration_bridge.html) - Website: [Azimuth-ATAC, reference-mapping for scATAC-seq datasets](https://azimuth.hubmapconsortium.org/references/)\ - -* **Flexible, interactive, and highly scalable analsyis.** The size and scale of single-cell sequencing datasets is rapidly increasing, outpacing even Moore's law. In Seurat v5, we introduce new infrastructure and methods to analyze, interpret, and explore exciting datasets spanning millions of cells, even if they cannot be fully loaded into memory. We introduce support for 'sketch'-based analysis, where representative subsamples of a large dataset are stored in-memory to enable rapid and iterative analysis - while the full dataset remains accessible via on-disk storage.\ - We enable high-performance via the BPCells package, developed by Ben Parks in the Greenleaf Lab. The BPCells package enables high-performance analysis via innovative bit-packing compression techniques, optimized C++ code, and use of streamlined and lazy operations. +\ +* **Flexible, interactive, and highly scalable analsyis:** The size and scale of single-cell sequencing datasets is rapidly increasing, outpacing even Moore's law. In Seurat v5, we introduce new infrastructure and methods to analyze, interpret, and explore exciting datasets spanning millions of cells, even if they cannot be fully loaded into memory. We introduce support for 'sketch'-based analysis, where representative subsamples of a large dataset are stored in-memory to enable rapid and iterative analysis - while the full dataset remains accessible via on-disk storage.\ +\ +We enable high-performance via the BPCells package, developed by Ben Parks in the Greenleaf Lab. The BPCells package enables high-performance analysis via innovative bit-packing compression techniques, optimized C++ code, and use of streamlined and lazy operations. - Vignette: [Sketch-based clustering of 1.3M brain cells (10x Genomics)](articles/seurat5_sketch_analysis.html) - Vignette: [Sketch-based integration of 1M healthy and diabetic PBMC (Parse Biosciences)](articles/ParseBio_sketch_integration.html) - - BPCells Documentation: [Scaling Single Cell Analysis to Milllions of Cells](https://bnprks.github.io/BPCells/) + - Vignette: [Mapping 1.5M cells from multiple studies to an Azimuth reference](articles/COVID_SCTMapping.html) - Vignette: [Interacting with BPCell matrices in Seurat v5](articles/seurat5_bpcells_interaction_vignette.html) + - BPCells R Package: [Scaling Single Cell Analysis to Millions of Cells](https://bnprks.github.io/BPCells/)\ +\ +* **Backwards compatibility:** While Seurat v5 introduces new functionality, we have ensured that the software is backwards-compatible with previous versions, so that users will continue to be able to re-run existing workflows. As v5 is still in beta, the CRAN installation (`install.packages("Seurat")`) will continue to install Seurat v4, but users can opt-in to test Seurat v5 by following the instructions in our [install page](install.html).\ -# About Seurat +## **About Seurat** Seurat is an R package designed for QC, analysis, and exploration of single-cell RNA-seq data. Seurat aims to enable users to identify and interpret sources of heterogeneity from single-cell transcriptomic measurements, and to integrate diverse types of single-cell data. diff --git a/vignettes/get_started_v5.Rmd b/vignettes/get_started_v5.Rmd index 7f18dc10a..3643560d5 100644 --- a/vignettes/get_started_v5.Rmd +++ b/vignettes/get_started_v5.Rmd @@ -1,5 +1,5 @@ --- -title: "Getting started with Seurat 5" +title: "Introduction to Seurat v5" output: html_document: theme: united @@ -102,41 +102,38 @@ vdat <- read_yaml(file = "vignettes_v5.yaml") ``` -# Introduction to Seurat 5 +We provide a series of vignettes, tutorials, and analysis walkthroughs to help users get started with Seurat v5. These vignettes are meant to highlight new functions and features supported by Seurat v5. Seurat v5 is backwards compatible with previous versions, so existing user workflows (as well as [previously released Seurat vignettes](get_started.html)) will continue to work even when using Seurat v5. -The following vignettes outline how to install, create, and interact with Seurat 5 objects. +# Spatial analysis + +These vignettes will help introduce users to the analysis of spatial datasets in Seurat v5, including technologies that leverage sequencing-based readouts, as well as technologies that leverage in-situ imaging-based readouts. The vignettes introduce data from multiple platforms including 10x Visium, SLIDE-seq, Vizgen MERSCOPE, 10x Xenium, Nanostring CosMx, and Akoya CODEX. ```{r results='asis', echo=FALSE, warning=FALSE, message = FALSE} make_vignette_card_section(vdat = vdat, cat = 1) ``` -# Analysis in Seurat 5 - -Below we demonstrate new functionality introduced in Seurat 5 including: - -* Sketch-based workflows to efficiently process large datasets while preserving rare and abundant cell types -* Bridge integration to annotate cells from complementary technologies with a scRNA-seq reference -* Spatial infrastructure to analyze public datasets from multiple technologies -* Integration using CCA, RPCA, scVI, and Harmony in a common framework - -## Large Dataset Analysis +# Streamlined and multimodal integration +Performing integrative analysis in order to identify shared cell types across multiple datasets is an increasingly important analytical step in single-cell workflows. These vignettes demonstrate new methods and infrastructure for integrative analysis in Seurat v5. They include a streamlined analytical workflow to integrate scRNA-seq datasets, and the use of 'bridge integration' for harmonizing datasets across modalities. ```{r results='asis', echo=FALSE, warning=FALSE, message = FALSE} make_vignette_card_section(vdat = vdat, cat = 2) ``` -## Integration +# Flexible analysis of massively scalable datasets -Perform integration across conditions using Seurat-based and community methods. +In Seurat v5, we introduce new infrastructure and methods to analyze, interpret, and explore datasets that extend to millions of cells. We introduce support for 'sketch-based' techniques, where a subset of representative cells are stored in memory to enable rapid and iterative exploration, while the remaining cells are stored on-disk. Users can flexibly switch between both data representations, and we leverage the [BPCells package](https://bnprks.github.io/BPCells/) from Ben Parks in the Greenleaf lab to enable high-performance analysis of disk-backed data. +\ +\ +The vignettes below demonstrate three scalable analyses in Seurat v5: Unsupervised clustering analysis of a large dataset (1.3M neurons), Unsupervised integration and comparison of 1M PBMC from healthy and diabetic patients, and Supervised mapping of 1.5M immune cells from healthy and COVID donors. In all cases, the vignettes perform these analyses without ever loading the full datasets into memory. ```{r results='asis', echo=FALSE, warning=FALSE, message = FALSE} make_vignette_card_section(vdat = vdat, cat = 3) ``` -## Spatial Analysis +# References and additional documentation -Use Seurat to analyze data from imaging-based and sequencing-based spatial technologies. +We include brief vignettes describing install instructions, a 'cheat sheet' of commands for interacting with Seurat v5 assays, and additional documentation for using the BPCells package together with Seurat. ```{r results='asis', echo=FALSE, warning=FALSE, message = FALSE} make_vignette_card_section(vdat = vdat, cat = 4) diff --git a/vignettes/seurat5_essential_commands.Rmd b/vignettes/seurat5_essential_commands.Rmd index 66ea4d068..19b90fd79 100644 --- a/vignettes/seurat5_essential_commands.Rmd +++ b/vignettes/seurat5_essential_commands.Rmd @@ -1,285 +1,132 @@ --- -title: "Seurat Command List" +title: "Seurat 5 Essential Commands" output: html_document: theme: united df_print: kable pdf_document: default -date: 'Compiled: `r Sys.Date()`' +date: 'Compiled: `r format(Sys.Date(), "%B %d, %Y")`' --- - -```{r setup, include=TRUE} +*** +```{r setup, include=FALSE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) knitr::opts_chunk$set( - echo = TRUE, tidy = TRUE, - tidy.opts = list(width.cutoff = 120), - message = FALSE, + tidy.opts = list(width.cutoff = 95), warning = FALSE, - results = 'hold', - eval = FALSE, - error = TRUE + error = TRUE, + message = FALSE, + fig.width = 8, + time_it = TRUE ) ``` -```{r load-data, echo=FALSE} -library(Seurat) -library(ggplot2) -pbmc <- readRDS(file = '~/Downloads/pbmc3k/pbmc3k_final.rds') -pbmc <- UpdateSeuratObject(pbmc) -cbmc.rna <- as.sparse(x = read.csv("~/Downloads/GSE100866_CBMC_8K_13AB_10X-RNA_umi.csv.gz", sep = ",", header = TRUE, row.names = 1)) +Here, we describe important commands and functions to store, access, and process data using Seurat v5. -# To make life a bit easier going forward, we're going to discard all but the top 100 most highly expressed mouse genes, and remove the "HUMAN_" from the CITE-seq prefix -cbmc.rna <- CollapseSpeciesExpressionMatrix(object = cbmc.rna) - -# Load in the ADT UMI matrix -cbmc.adt <- as.sparse(x = read.csv("~/Downloads/GSE100866_CBMC_8K_13AB_10X-ADT_umi.csv.gz", sep = "," ,header = TRUE, row.names = 1)) - -# When adding multimodal data to Seurat, it's okay to have duplicate feature names. Each set of modal data (eg. RNA, ADT, etc.) is stored in its own Assay object. -# One of these Assay objects is called the "default assay", meaning it's used for all analyses and visualization. -# To pull data from an assay that isn't the default, you can specify a key that's linked to an assay for feature pulling. -# To see all keys for all objects, use the Key function. -# Lastly, we observed poor enrichments for CCR5, CCR7, and CD10 - and therefore remove them from the matrix (optional) -cbmc.adt <- cbmc.adt[setdiff(x = rownames(x = cbmc.adt), y = c('CCR5', 'CCR7', 'CD10')), ] -``` - -# Seurat Standard Worflow - -The standard Seurat workflow takes raw single-cell expression data and aims to find clusters within the data. For full details, please read our tutorial. This process consists of data normalization and variable feature selection, data scaling, a PCA on variable features, construction of a shared-nearest-neighbors graph, and clustering using a modularity optimizer. Finally, we use a t-SNE to visualize our clusters in a two-dimensional space. - -```{r seurat-standard-workflow} -pbmc.counts <- Read10X(data.dir = "~/Downloads/pbmc3k/filtered_gene_bc_matrices/hg19/") -pbmc <- CreateSeuratObject(counts = pbmc.counts) -pbmc <- NormalizeData(object = pbmc) -pbmc <- FindVariableFeatures(object = pbmc) -pbmc <- ScaleData(object = pbmc) -pbmc <- RunPCA(object = pbmc) -pbmc <- FindNeighbors(object = pbmc) -pbmc <- FindClusters(object = pbmc) -pbmc <- RunTSNE(object = pbmc) -DimPlot(object = pbmc, reduction = 'tsne') +```{r loaddata} +library(Seurat) +library(SeuratData) +library(BPCells) +options(Seurat.object.assay.version = "v5") ``` -# `Seurat` Object Interaction +The Assay5 object is able to support different matrix data structures to best suit particular analyses. The PBMC 3k dataset contains ~3,000 cells, which can easily be stored in-memory as a sparse `dgCMatrix`. On the other hand, it can be impractical in terms of memory and computation to store and concomitantly analyze millions of cells in-memory. Thus, the mouse brain dataset stores counts on-disk in an Assay5 object. -Since Seurat v3.0, we’ve made improvements to the Seurat object, and added new methods for user interaction. We also introduce simple functions for common tasks, like subsetting and merging, that mirror standard R functions. +```{r matrix} +pbmc3k <- LoadData("pbmc3k") +mousebrain1m <- readRDS("/brahms/hartmana/vignette_data/1p3_million_mouse_brain.rds") - +# Both are Assay5's +print(class(pbmc3k[["RNA"]])) +print(class(mousebrain1m[["RNA"]])) -```{r features-and-cells} -# Get cell and feature names, and total numbers -colnames(x = pbmc) -Cells(object = pbmc) -rownames(x = pbmc) -ncol(x = pbmc) -nrow(x = pbmc) +# But different underlying data structures storing counts +print(class(LayerData(pbmc3k[["RNA"]], layer = "counts"))) +print(class(LayerData(mousebrain1m[["RNA"]], layer = "counts"))) ``` -```{r idents} -# Get cell identity classes -Idents(object = pbmc) -levels(x = pbmc) - -# Stash cell identity classes -pbmc[['old.ident']] <- Idents(object = pbmc) -pbmc <- StashIdent(object = pbmc, save.name = 'old.ident') - -# Set identity classes -Idents(object = pbmc) <- 'CD4 T cells' -Idents(object = pbmc, cells = 1:10) <- 'CD4 T cells' +Despite the drastic difference in dataset size, the 1.3M cell dataset occupies a small memory footprint thanks to on-disk storage. -# Set identity classes to an existing column in meta data -Idents(object = pbmc, cells = 1:10) <- 'orig.ident' -Idents(object = pbmc) <- 'orig.ident' +```{r} +print(paste("PBMC 3k contains", length(Cells(pbmc3k)), "cells")) +print(paste("Mouse brain 1.3M contains", length(Cells(mousebrain1m)), "cells")) -# Rename identity classes -pbmc <- RenameIdents(object = pbmc, 'CD4 T cells' = 'T Helper cells') +# Despite the mouse brain dataset containing 1.3 million cells, the assay is under 350Mbs in size due to on-disk storage +print(paste("PBMC 3k assay size:", format(object.size(pbmc3k[["RNA"]]), units = "Mb"))) +print(paste("Mouse brain 1.3M assay size:", format(object.size(mousebrain1m[["RNA"]]), units = "Mb"))) ``` -```{r subsetting} -# Subset Seurat object based on identity class, also see ?SubsetData -subset(x = pbmc, idents = 'B cells') -subset(x = pbmc, idents = c('CD4 T cells', 'CD8 T cells'), invert = TRUE) - -# Subset on the expression level of a gene/feature -subset(x = pbmc, subset = MS4A1 > 3) - -# Subset on a combination of criteria -subset(x = pbmc, subset = MS4A1 > 3 & PC1 > 5) -subset(x = pbmc, subset = MS4A1 > 3, idents = 'B cells') - -# Subset on a value in the object meta data -subset(x = pbmc, subset = orig.ident == "Replicate1") +We can also cast between `Assay` and `Assay5` objects with `as()`. Note that the `RNA` assay is an `Assay5` object. -# Downsample the number of cells per identity class -subset(x = pbmc, downsample = 100) -``` +```{r} +pbmc3k[["RNA3"]] <- as(object = pbmc3k[["RNA"]], Class = "Assay") -```{r merging, eval=FALSE} -# Merge two Seurat objects -merge(x = pbmc1, y = pbmc2) -# Merge more than two Seurat objects -merge(x = pbmc1, y = list(pbmc2, pbmc3)) +pbmc3k[["RNA5"]] <- as(object = pbmc3k[["RNA3"]], Class = "Assay5") ``` -# Data Access - -Accessing data in Seurat is simple, using clearly defined accessors and setters to quickly find the data needed. +The `Seurat.object.assay.version` option can also be set to create `Assay5` or `Assay` objects when new Assays or Seurat objects are created. -```{r metadata} -# View metadata data frame, stored in object@meta.data -pbmc[[]] - -# Retrieve specific values from the metadata -pbmc$nCount_RNA -pbmc[[c('percent.mito', 'nFeature_RNA')]] - -# Add metadata, see ?AddMetaData -random_group_labels <- sample(x = c('g1', 'g2'), size = ncol(x = pbmc), replace = TRUE) -pbmc$groups <- random_group_labels -``` - -```{r expression-matrices, eval=FALSE} -# Retrieve or set data in an expression matrix ('counts', 'data', and 'scale.data') -GetAssayData(object = pbmc, slot = 'counts') -pbmc <- SetAssayData(object = pbmc, slot = 'scale.data', new.data = new.data) +```{r} +# create v3 assays +options(Seurat.object.assay.version = "v3") +pbmc.data <- Read10X(data.dir = "/brahms/hartmana/vignette_data/pbmc3k/filtered_gene_bc_matrices/hg19/") +pbmc <- CreateSeuratObject(counts = pbmc.data) +print(class(pbmc[["RNA"]])) + +# create v5 assays +options(Seurat.object.assay.version = "v5") +pbmc.data <- Read10X(data.dir = "/brahms/hartmana/vignette_data/pbmc3k/filtered_gene_bc_matrices/hg19/") +pbmc <- CreateSeuratObject(counts = pbmc.data) +print(class(pbmc[["RNA"]])) ``` -```{r embeddings-loadings} -# Get cell embeddings and feature loadings -Embeddings(object = pbmc, reduction = 'pca') -Loadings(object = pbmc, reduction = 'pca') -Loadings(object = pbmc, reduction = 'pca', projected = TRUE) -``` +Layers can be split based on metadata. -```{r fetchdata} -# FetchData can pull anything from expression matrices, cell embeddings, or metadata -FetchData(object = pbmc, vars = c('PC_1', 'percent.mito', 'MS4A1')) +```{r} +pbmc$split.data <- sample(c("A", "B", "C"), size = ncol(pbmc), replace = TRUE) +pbmc[["RNA"]] <- split(x = pbmc[["RNA"]], f = pbmc$split.data) +Layers(pbmc[["RNA"]]) ``` -# Visualization in Seurat - -Seurat has a vast, ggplot2-based plotting library. All plotting functions will return a ggplot2 plot by default, allowing easy customization with ggplot2. - -```{r visualization} - -# Dimensional reduction plot for PCA or tSNE -DimPlot(object = pbmc, reduction = 'tsne') -DimPlot(object = pbmc, reduction = 'pca') - -# Dimensional reduction plot, with cells colored by a quantitative feature -FeaturePlot(object = pbmc, features = "MS4A1") - -# Scatter plot across single cells, replaces GenePlot -FeatureScatter(object = pbmc, feature1 = "MS4A1", feature2 = "PC_1") -FeatureScatter(object = pbmc, feature1 = "MS4A1", feature2 = "CD3D") - -# Scatter plot across individual features, repleaces CellPlot -CellScatter(object = pbmc, cell1 = "AGTCTACTAGGGTG", cell2 = "CACAGATGGTTTCT") +And then joined back together in a single layer. -VariableFeaturePlot(object = pbmc) - -#Violin and Ridge plots -VlnPlot(object = pbmc, features = c("LYZ", "CCL5", "IL32")) -RidgePlot(object = pbmc, feature = c("LYZ", "CCL5", "IL32")) - -# Heatmaps -DoHeatmap(object = pbmc,features = heatmap_markers) -DimHeatmap(object = pbmc,reduction = 'pca', cells = 200) - -# New things to try! -# Note that plotting functions now return ggplot2 objects, so you can add themes, titles, and options onto them -VlnPlot(object = pbmc, features = "MS4A1", split.by = "groups") -DotPlot(object = pbmc, features = c("LYZ", "CCL5", "IL32"), split.by = "groups") -FeaturePlot(object = pbmc, features = c("MS4A1", "CD79A"), blend = TRUE) -DimPlot(object = pbmc) + DarkTheme() -DimPlot(object = pbmc) + labs(title = '2,700 PBMCs clustered using Seurat and viewed\non a two-dimensional tSNE') +```{r} +pbmc[["RNA"]] <- JoinLayers(pbmc[["RNA"]]) +Layers(pbmc[["RNA"]]) ``` -Seurat provides many prebuilt themes that can be added to ggplot2 plots for quick customization - -| Theme | Function | -| ----- | -------- | -| `DarkTheme` | Set a black background with white text | -| `FontSize` | Set font sizes for various elements of a plot | -| `NoAxes` | Remove axes and axis text | -| `NoLegend` | Remove all legend elements | -| `RestoreLegend` | Restores a legend after removal | -| `RotatedAxis` | Rotates x-axis labels | - -```{r helper-functions} +New layers can be added to assays with the `$` symbol. -# Plotting helper functions work with ggplot2-based scatter plots, such as DimPlot, FeaturePlot, CellScatter, and FeatureScatter -plot <- DimPlot(object = pbmc) + NoLegend() - -# HoverLocator replaces the former `do.hover` argument -# It can also show extra data throught the `information` argument, designed to work smoothly with FetchData -HoverLocator(plot = plot, information = FetchData(object = pbmc, vars = c("ident", "PC_1", "nFeature_RNA"))) - -# FeatureLocator replaces the former `do.identify` -select.cells <- FeatureLocator(plot = plot) - -# Label points on a ggplot object -LabelPoints(plot = plot, points = TopCells(object = pbmc[["pca"]]), repel = TRUE) +```{r} +Layers(pbmc[["RNA"]]) +# create a new layer with a subset of cells and normalize +data.ss <- NormalizeData(pbmc[["RNA"]]$counts[, Cells(pbmc)[1:10]]) +pbmc[["RNA"]]$data.subset <- data.ss +Layers(pbmc[["RNA"]]) ``` -# Multi-Assay Features - -With Seurat, you can easily switch between different assays at the single cell level (such as ADT counts from CITE-seq, or integrated/batch-corrected data). Most functions now take an assay parameter, but you can set a Default Assay to avoid repetitive statements. - -```{r multi-assay} -cbmc <- CreateSeuratObject(counts = cbmc.rna) -# Add ADT data -cbmc[['ADT']] <- CreateAssayObject(counts = cbmc.adt) -# Run analyses by specifying the assay to use -NormalizeData(object = cbmc, assay = 'RNA') -NormalizeData(object = cbmc, assay = 'ADT', method = 'CLR') +Assay data can be accessed with `LayerData` or `FetchData` depending on the use case. -# Retrieve and set the default assay -DefaultAssay(object = cbmc) -DefaultAssay(object = cbmc) <- 'ADT' -DefaultAssay(object = cbmc) - -# Pull feature expression from both assays by using keys -FetchData(object = cbmc, vars = c('rna_CD3E', 'adt_CD3')) - -# Plot data from multiple assays using keys -FeatureScatter(object = cbmc, feature1 = "rna_CD3E", feature2 = "adt_CD3") +```{r} +# returns information in whichever format the layer is stored +# in this example, the counts layer is stored as a dgCMatrix +b.marker.counts <- LayerData(object = pbmc, layer = "counts")[c("MS4A1", "CD79A"), ] +b.marker.counts[, 1:30] + +# returns information as a data.frame +b.marker.counts.df <- FetchData(object = pbmc, layer = "counts", vars = c("rna_MS4A1", "rna_CD79A")) +head(b.marker.counts.df) ``` -# Seurat v2.X vs v3.X -| Seurat v2.X | Seurat v3.X | -| ----------- | ----------- | -| `object@data` | `GetAssayData(object = object)` | -| `object@raw.data` | `GetAssayData(object = object, slot = "counts")` | -| `object@scale.data` | `GetAssayData(object = object, slot = "scale.data")` | -| `object@cell.names` | `colnames(x = object)` | -| `rownames(x = object@data)` | `rownames(x = object)` | -| `object@var.genes` | `VariableFeatures(object = object)` | -| `object@hvg.info` | `HVFInfo(object = object)` | -| `object@assays$assay.name` | `object[["assay.name"]]` | -| `object@dr$pca` | `object[["pca"]]` | -| `GetCellEmbeddings(object = object, reduction.type = "pca")` | `Embeddings(object = object, reduction = "pca")` | -| `GetGeneLoadings(object = object, reduction.type = "pca")` | `Loadings(object = object, reduction = "pca")` | -| `AddMetaData(object = object, metadata = vector, col.name = "name")` | `object$name <- vector` | -| `object@meta.data$name` | `object$name` | -| `object@idents` | `Idents(object = object)` | -| `SetIdent(object = object, ident.use = "new.idents")` | `Idents(object = object) <- "new.idents"` | -| `SetIdent(object = object, cells.use = 1:10, ident.use = "new.idents")` | `Idents(object = object, cells = 1:10) <- "new.idents"` | -| `StashIdent(object = object, save.name = "saved.idents")` | `object$saved.idents <- Idents(object = object)` | -| `levels(x = object@idents)` | `levels(x = object)` | -| `RenameIdent(object = object, old.ident.name = "old.ident", new.ident.name = "new.ident")` | `RenameIdents(object = object, "old.ident" = "new.ident")` | -| `WhichCells(object = object, ident = "ident.keep")` | `WhichCells(object = object, idents = "ident.keep")` | -| `WhichCells(object = object, ident.remove = "ident.remove")` | `WhichCells(object = object, idents = "ident.remove", invert = TRUE)` | -| `WhichCells(object = object, max.cells.per.ident = 500)` | `WhichCells(object = object, downsample = 500)` | -| `WhichCells(object = object, subset.name = "name", low.threshold = low, high.threshold = high)` | `WhichCells(object = object, expression = name > low & name < high)` | -| `FilterCells(object = object, subset.names = "name", low.threshold = low, high.threshold = high)` | `subset(x = object, subset = name > low & name < high)` | -| `SubsetData(object = object, subset.name = "name", low.threshold = low, high.threshold = high)` | `subset(x = object, subset = name > low & name < high)` | -| `MergeSeurat(object1 = object1, object2 = object2)` | `merge(x = object1, y = object2)` | - -
    - **Session Info** -```{r} -sessionInfo() -``` -
    diff --git a/vignettes/vignettes_v5.yaml b/vignettes/vignettes_v5.yaml index c5ba1e16e..bd8656767 100644 --- a/vignettes/vignettes_v5.yaml +++ b/vignettes/vignettes_v5.yaml @@ -1,62 +1,67 @@ -- category: Introduction +- category: Spatial analysis vignettes: - - title: Seurat 5 Assay - name: seurat5_assay + - title: Analysis of spatial datasets (Imaging-based) + name: seurat5_spatial_vignette_2 summary: | - Explore the new assay structure introduced in Seurat v5. - image: assay.png + Learn to explore spatially-resolved data from multiplexed imaging technologies, including MERSCOPE, Xenium, CosMx SMI, and CODEX. + image: spatial_vignette_2.jpg - - title: BPCells Interaction - name: seurat5_bpcells_interaction_vignette + - title: Analysis of spatial datasets (Sequencing-based) + name: spatial_vignette summary: | - Load and save large on-disk matrices using BPCells. - image: bpcells.png + Learn to explore spatially-resolved transcriptomic data with examples from 10x Visium and Slide-seq v2. + image: spatial_vignette_ttr.jpg -- category: Large dataset analysis +- category: Streamlined and multimodal integration vignettes: - - - title: COVID Mapping - name: COVID_SCTMapping + - title: scRNA-seq Integration + name: seurat5_integration summary: | - Map PBMC datasets from COVID-19 patients to a healthy PBMC reference. - image: COVID_SCTMapping.png + Integrate scRNA-seq datasets using a variety of computational methods. + image: integration_seurat5.jpg - - title: Sketch Clustering - name: seurat5_sketch_analysis + - title: Cross-modality Bridge Integration + name: seurat5_integration_bridge summary: | - Analyze a 1.3 million cell mouse brain dataset using on-disk capabilities powered by BPCells. - image: sketch.png + Map scATAC-seq onto an scRNA-seq reference using a multi-omic bridge dataset. + image: bridge_integration.png -- category: Integration +- category: Flexible analysis of massively scalable datasets vignettes: - - title: Integration - name: seurat5_integration + - title: Unsupervised clustering of 1.3M neurons + name: seurat5_sketch_analysis summary: | - Integrate scRNA-seq datasets using a variety of computational methods. - image: integration_seurat5.jpg + Analyze a 1.3 million cell mouse brain dataset using on-disk capabilities powered by BPCells. + image: sketch_1p3.png - - title: Sketch Integration + - title: Integrating/comparing healthy and diabetic samples name: ParseBio_sketch_integration summary: | Perform sketch integration on a large dataset from Parse Biosciences. image: sketch.png - - title: Bridge Integration - name: seurat5_integration_bridge + - title: Supervised mapping of 1.5M immune cells + name: COVID_SCTMapping summary: | - Map scATAC-seq onto an scRNA-seq reference using a multi-omic bridge dataset. - image: bridge_integration.png + Map PBMC datasets from COVID-19 patients to a healthy PBMC reference. + image: COVID_SCTMapping.png -- category: Spatial +- category: References and additional documentation vignettes: - - title: Analysis of spatial datasets (Imaging-based) - name: seurat5_spatial_vignette_2 + - title: Seurat 5 Essential Commands + name: seurat5_essential_commands summary: | - Learn to explore spatially-resolved data from multiplexed imaging technologies, including MERSCOPE, Xenium, CosMx SMI, and CODEX. - image: spatial_vignette_2.jpg + Explore the new assay structure introduced in Seurat v5. + image: assay.png - - title: Analysis of spatial datasets (Sequencing-based) - name: spatial_vignette + - title: BPCells Interaction + name: seurat5_bpcells_interaction_vignette summary: | - Learn to explore spatially-resolved transcriptomic data with examples from 10x Visium and Slide-seq v2. - image: spatial_vignette_ttr.jpg + Load and save large on-disk matrices using BPCells. + image: bpcells.png + + - title: Seurat v5 Installation + name: install + summary: | + Install Seurat v5 and the required dependencies. + image: SeuratV5.png From 34fe0e07a0a210c5c8247c8e738ee3179ac66958 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sun, 26 Mar 2023 17:32:02 -0400 Subject: [PATCH 588/979] update parsebio vig --- vignettes/ParseBio_sketch_integration.Rmd | 31 ++++++++++++++++------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/vignettes/ParseBio_sketch_integration.Rmd b/vignettes/ParseBio_sketch_integration.Rmd index 0498478ee..40d7dff75 100755 --- a/vignettes/ParseBio_sketch_integration.Rmd +++ b/vignettes/ParseBio_sketch_integration.Rmd @@ -47,6 +47,7 @@ library(BPCells) library(dplyr) library(ggplot2) library(ggrepel) +library(patchwork) # set this option when analyzing large datasets options(future.globals.maxSize = 3e9) options(Seurat.object.assay.version = "v5") @@ -106,12 +107,12 @@ plot.s1 <- DimPlot(object, group.by = 'sample', reduction = 'umap') plot.s2 <- DimPlot(object, group.by = 'celltype.manual', reduction = 'umap') ``` -```{r, fig.width=10, fig.height=5} -plot.s1 + plot.s2 +```{r, fig.width=10, fig.height=10} +plot.s1 + plot.s2 + plot_layout(ncol = 1) ``` ## Integrate the full datasets -Now that we have integrated the subset of atoms of each dataset, placing them each in an integrated low-dimensional space, we can now place each cell from each dataset in this space as well. We load the full datasets back in individually, and use the `ProjectIntegration` function to integrate all cells. After this function is run, the `integrated.rpca.full` space now embeds all cells in the dataset.Even though all cells in the dataset have been integrated together, the non-sketched cells are not loaded into memory. Users can still switch between the `sketch` (sketched cells, in-memory) and `RNA` (full dataset, on disk) for analysis. +Now that we have integrated the subset of atoms of each dataset, placing them each in an integrated low-dimensional space, we can now place each cell from each dataset in this space as well. We load the full datasets back in individually, and use the `ProjectIntegration` function to integrate all cells. After this function is run, the `integrated.rpca.full` space now embeds all cells in the dataset.Even though all cells in the dataset have been integrated together, the non-sketched cells are not loaded into memory. Users can still switch between the `sketch` (sketched cells, in-memory) and `RNA` (full dataset, on disk) for analysis. After integration, we can also project cell type labels from the sketched cells onto the full dataset using `ProjectData`. ```{r} @@ -122,17 +123,29 @@ object[['sketch']] <- split(object[['sketch']], f = object$sample) object <- ProjectIntegration(object = object, sketched.assay = 'sketch', assay = 'RNA', - reduction = 'integrated.rpca') + reduction = 'integrated.rpca' + ) + + +object <- ProjectData(object = object, + sketched.assay = 'sketch', + assay = 'RNA', + sketched.reduction = 'integrated.rpca.full', + full.reduction = 'integrated.rpca.full', + dims = 1:30, + refdata = list(celltype.full = 'celltype.manual') + ) + ``` ```{r} object <- RunUMAP(object, reduction = 'integrated.rpca.full', dims = 1:30 , reduction.name = 'umap.full', reduction.key = 'UMAP_full_') ``` -```{r,fig.width=10, fig.height=5} -p1 <- DimPlot(object, reduction = 'umap.full', group.by = 'sample',alpha = 0.1) + NoLegend() -p2 <- DimPlot(object, reduction = 'umap.full', group.by = 'celltype.manual', label = T, alpha = 0.1) + NoLegend() -p1 + p2 +```{r, fig.width=10, fig.height=10} +p1 <- DimPlot(object, reduction = 'umap.full', group.by = 'sample',alpha = 0.1) +p2 <- DimPlot(object, reduction = 'umap.full', group.by = 'celltype.full', alpha = 0.1) +p1 + p2 + plot_layout(ncol = 1) ``` ## Compare healthy and diabetic samples @@ -143,7 +156,7 @@ After we aggregate cells, we can perform celltype-specific differential expressi ```{r} bulk <- AggregateExpression(object, return.seurat = T, slot = 'counts', - assays = 'RNA', group.by = c("celltype.manual","sample", 'disease')) + assays = 'RNA', group.by = c("celltype.full","sample", 'disease')) # each sample is an individual-specific celltype-specific pseudobulk profile tail(Cells(bulk)) From a5fbbb3e3a84167f6d59c443d39a8b36e737f35f Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sun, 26 Mar 2023 17:41:55 -0400 Subject: [PATCH 589/979] fix legend alpha --- R/visualization.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/visualization.R b/R/visualization.R index dc4c0d94c..217e1bce0 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -8179,7 +8179,7 @@ SingleDimPlot <- function( ) } plot <- plot + - guides(color = guide_legend(override.aes = list(size = 3))) + + guides(color = guide_legend(override.aes = list(size = 3, alpha = 1))) + labs(color = NULL, title = col.by) + CenterTitle() if (label && !is.null(x = col.by)) { From 8c91a9344790558e325849217023673328f5015c Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sun, 26 Mar 2023 20:09:03 -0400 Subject: [PATCH 590/979] update parBio big --- vignettes/ParseBio_sketch_integration.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/ParseBio_sketch_integration.Rmd b/vignettes/ParseBio_sketch_integration.Rmd index 40d7dff75..08d127d1e 100755 --- a/vignettes/ParseBio_sketch_integration.Rmd +++ b/vignettes/ParseBio_sketch_integration.Rmd @@ -171,7 +171,7 @@ cd14.bulk <- subset(bulk,celltype == "CD14 Mono") Idents(cd14.bulk) <- 'disease' de_markers <- FindMarkers(cd14.bulk, ident.1 = 'D',ident.2 = 'H', slot = 'counts', test.use = 'DESeq2', verbose = F ) de_markers$gene <- rownames(de_markers) -ggplot(de_markers, aes(avg_log2FC, -log10(p_val_adj))) + geom_point(size=0.5, alpha=0.5) + theme_bw() + ylab("-log10(p-value)")+geom_text_repel(aes(label = ifelse(p_val_adj<0.01, gene, "")),colour = 'red', size = 3) +ggplot(de_markers, aes(avg_log2FC, -log10(p_val))) + geom_point(size=0.5, alpha=0.5) + theme_bw() + ylab("-log10(unadjusted p-value)")+geom_text_repel(aes(label = ifelse(p_val_adj<0.01, gene, "")),colour = 'red', size = 3) ``` From 367f677f8fb23ebe0faeddcb30a0f25f9bbcbf02 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 27 Mar 2023 12:56:47 -0400 Subject: [PATCH 591/979] update s5 essential --- vignettes/seurat5_essential_commands.Rmd | 78 +++++++++++++++--------- 1 file changed, 50 insertions(+), 28 deletions(-) diff --git a/vignettes/seurat5_essential_commands.Rmd b/vignettes/seurat5_essential_commands.Rmd index 19b90fd79..040e9078c 100644 --- a/vignettes/seurat5_essential_commands.Rmd +++ b/vignettes/seurat5_essential_commands.Rmd @@ -38,6 +38,7 @@ Here, we describe important commands and functions to store, access, and process library(Seurat) library(SeuratData) library(BPCells) +library(dplyr) options(Seurat.object.assay.version = "v5") ``` @@ -46,36 +47,54 @@ The Assay5 object is able to support different matrix data structures to best su ```{r matrix} pbmc3k <- LoadData("pbmc3k") mousebrain1m <- readRDS("/brahms/hartmana/vignette_data/1p3_million_mouse_brain.rds") - + # Both are Assay5's print(class(pbmc3k[["RNA"]])) print(class(mousebrain1m[["RNA"]])) +# Get counts layer by "$" or "[[" symbols. +pbmc3k[["RNA"]]$counts +pbmc3k[["RNA"]][["counts"]] + +# New layers can be added or deleted to assays with the "$" or "[[" symbols. +pbmc3k[["RNA"]]$data <- log1p(pbmc3k[["RNA"]]$counts) +pbmc3k[["RNA"]]$data <- NULL + + # But different underlying data structures storing counts -print(class(LayerData(pbmc3k[["RNA"]], layer = "counts"))) -print(class(LayerData(mousebrain1m[["RNA"]], layer = "counts"))) -``` +print(class(pbmc3k[["RNA"]]$counts)) +print(class(mousebrain1m[["RNA"]]$counts)) +``` Despite the drastic difference in dataset size, the 1.3M cell dataset occupies a small memory footprint thanks to on-disk storage. ```{r} -print(paste("PBMC 3k contains", length(Cells(pbmc3k)), "cells")) -print(paste("Mouse brain 1.3M contains", length(Cells(mousebrain1m)), "cells")) +print(paste("PBMC 3k contains", length(colnames(pbmc3k)), "cells")) +print(paste("Mouse brain 1.3M contains", length(colnames(mousebrain1m)), "cells")) # Despite the mouse brain dataset containing 1.3 million cells, the assay is under 350Mbs in size due to on-disk storage print(paste("PBMC 3k assay size:", format(object.size(pbmc3k[["RNA"]]), units = "Mb"))) print(paste("Mouse brain 1.3M assay size:", format(object.size(mousebrain1m[["RNA"]]), units = "Mb"))) ``` +Get cell names. Since Seurat v5 object doesn't require all assays have the same cells, `Cells()` is designed to get cell names of the default assay and `colnames()` is deigned to get cell names of the entire object +```{r} +pbmc3k[["RNAsub"]] <- subset(pbmc3k[["RNA"]], cells = colnames(pbmc3k)[1:100]) +DefaultAssay(pbmc3k) <- 'RNAsub' +print(length(Cells(pbmc3k))) +print(length(colnames(pbmc3k))) + +``` + + We can also cast between `Assay` and `Assay5` objects with `as()`. Note that the `RNA` assay is an `Assay5` object. ```{r} pbmc3k[["RNA3"]] <- as(object = pbmc3k[["RNA"]], Class = "Assay") - pbmc3k[["RNA5"]] <- as(object = pbmc3k[["RNA3"]], Class = "Assay5") ``` -The `Seurat.object.assay.version` option can also be set to create `Assay5` or `Assay` objects when new Assays or Seurat objects are created. +The `Seurat.object.assay.version` option can also be set to create `Assay5` or `Assay` objects when new Assays or Seurat objects are created. ```{r} # create v3 assays @@ -88,45 +107,48 @@ print(class(pbmc[["RNA"]])) options(Seurat.object.assay.version = "v5") pbmc.data <- Read10X(data.dir = "/brahms/hartmana/vignette_data/pbmc3k/filtered_gene_bc_matrices/hg19/") pbmc <- CreateSeuratObject(counts = pbmc.data) +pbmc <- NormalizeData(pbmc) print(class(pbmc[["RNA"]])) ``` -Layers can be split based on metadata. +`CreateAssayObject()` and `CreateAssay5Object()` can be used to create v3 and v5 assay regardless of the setting in `Seurat.object.assay.version` +```{r} +assay.v3 <- CreateAssayObject(counts = pbmc.data) +assay.v5 <- CreateAssay5Object(counts = pbmc.data) +print(class(assay.v3)) +print(class(assay.v5)) +``` +Layers can be split based on metadata. ```{r} pbmc$split.data <- sample(c("A", "B", "C"), size = ncol(pbmc), replace = TRUE) pbmc[["RNA"]] <- split(x = pbmc[["RNA"]], f = pbmc$split.data) Layers(pbmc[["RNA"]]) ``` +Search and get Layers name +```{r} +Layers(object = pbmc[['RNA']], search = 'counts') +Layers(object = pbmc[['RNA']], search = 'counts.B') -And then joined back together in a single layer. +# it will return the exact match first. If no exact match, it will return layer names start with search input +pbmc[['RNA']]$counts <- pbmc[['RNA']]$counts.A +Layers(object = pbmc[['RNA']], search = 'counts') -```{r} -pbmc[["RNA"]] <- JoinLayers(pbmc[["RNA"]]) -Layers(pbmc[["RNA"]]) +pbmc[['RNA']]$counts <- NULL ``` - -New layers can be added to assays with the `$` symbol. +And then joined back together in a single layer. ```{r} -Layers(pbmc[["RNA"]]) -# create a new layer with a subset of cells and normalize -data.ss <- NormalizeData(pbmc[["RNA"]]$counts[, Cells(pbmc)[1:10]]) -pbmc[["RNA"]]$data.subset <- data.ss +pbmc[["RNA"]] <- JoinLayers(pbmc[["RNA"]]) Layers(pbmc[["RNA"]]) ``` -Assay data can be accessed with `LayerData` or `FetchData` depending on the use case. +Assay data can be accessed with `FetchData` depending on the use case. ```{r} -# returns information in whichever format the layer is stored -# in this example, the counts layer is stored as a dgCMatrix -b.marker.counts <- LayerData(object = pbmc, layer = "counts")[c("MS4A1", "CD79A"), ] -b.marker.counts[, 1:30] - -# returns information as a data.frame -b.marker.counts.df <- FetchData(object = pbmc, layer = "counts", vars = c("rna_MS4A1", "rna_CD79A")) -head(b.marker.counts.df) +# returns information from both assay and meta.data as a data.frame +fetch_df <- FetchData(object = pbmc, layer = "counts", vars = c("rna_MS4A1", "rna_CD79A", 'split.data')) +head(fetch_df) ``` From fa2392c1d9f7e219da56f83cd37e391944859e04 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 27 Mar 2023 13:15:05 -0400 Subject: [PATCH 592/979] fix BPCells VST --- R/preprocessing.R | 2 +- R/preprocessing5.R | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index 1e23c6eb0..c69982656 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -4456,7 +4456,7 @@ ScaleData.IterableMatrix <- function( features.sd <- 1 } if (scale.max != Inf) { - scaled.data <- BPCells::min_by_row(mat = scaled.data, vals = scale.max*feature.sd + feature.mean) + object <- BPCells::min_by_row(mat = object, vals = scale.max*feature.sd + feature.mean) } scaled.data <- (object - features.mean) / features.sd return(scaled.data) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 47ab4baa3..1f4b62a56 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -813,11 +813,11 @@ VST.IterableMatrix <-function( span = span ) hvf.info$variance.expected[not.const] <- 10 ^ fit$fitted - data.standard <- (data - hvf.info$mean) / sqrt(x = hvf.info$variance.expected) - data.standard <- min_scalar( - mat = data.standard, - val = clip %||% sqrt(x = ncol(x = data)) - ) + feature.mean <- hvf.info$mean + feature.sd <- sqrt(x = hvf.info$variance.expected) + standard.max <- clip %||% sqrt(x = ncol(x = data)) + data <- BPCells::min_by_row(mat = data, vals = standard.max*feature.sd + feature.mean) + data.standard <- (data - feature.mean) /feature.sd hvf.info$variance.standardized <- matrix_stats(matrix = data.standard, row_stats = 'variance')$row_stats['variance',] # Set variable features hvf.info$variable <- FALSE From 80c4dad0ebcc85d78a0efbd4d877ba3395db95ff Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 27 Mar 2023 14:55:40 -0400 Subject: [PATCH 593/979] update covid vig --- vignettes/COVID_SCTMapping.Rmd | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/vignettes/COVID_SCTMapping.Rmd b/vignettes/COVID_SCTMapping.Rmd index e5162ca2a..776f875b3 100755 --- a/vignettes/COVID_SCTMapping.Rmd +++ b/vignettes/COVID_SCTMapping.Rmd @@ -96,14 +96,14 @@ We can now visualize the 1.5 million query cells from three studies, which have ```{r, fig.width=10, fig.height=6} -DimPlot(object, reduction = 'ref.umap', group.by = 'cell_type',alpha = 0.1, label = TRUE, split.by = 'publication', ncol = 3) + NoLegend() +DimPlot(object, reduction = 'ref.umap', group.by = 'cell_type',alpha = 0.1, label = TRUE, split.by = 'publication', ncol = 3, label.size = 3) + NoLegend() ``` ```{r, fig.width=10, fig.height=6} -DimPlot(object, reduction = 'ref.umap', group.by = 'predicted.celltype.l2',alpha = 0.1, label = TRUE, split.by = 'publication', ncol = 3) + NoLegend() +DimPlot(object, reduction = 'ref.umap', group.by = 'predicted.celltype.l2',alpha = 0.1, label = TRUE, split.by = 'publication', ncol = 3, label.size = 3) + NoLegend() ``` ## Differential composition analysis - +We now have annotations for all cells from this object. This enables us to determine the proportion of different cell types for each donor and explore variations in cell type composition between healthy individuals and COVID-19 patients. For example, we noticed a reduction in MAIT cells and an increase in plasmablasts among COVID-19 patients. ```{r} df_comp <- as.data.frame.matrix(table(object$donor_id, object$predicted.celltype.l2)) @@ -124,6 +124,8 @@ VlnPlot(obj.comp, features = c("MAIT",'Plasmablast'), group.by = 'disease', slo ``` ## Differential expression analysis +Except for the composition analysis, we use an aggregation-based (pseudobulk) workflow to explore differential genes between healthy and COVID donors. We aggregate all cells within the same cell type and sample using the AggregateExpression function. This returns a Seurat object where each ‘cell’ represents the pseudobulk profile of one cell type in one individual. + ```{r} bulk <- AverageExpression(object, @@ -146,6 +148,8 @@ bulk$disease <- factor(bulk$disease, levels = c('normal', 'COVID-19')) ``` +Once cells are aggregated into individuals, we can execute cell type-specific differential expression analysis between healthy and COVID-19 samples. Here we only visualize certain interferon-stimulated genes across all cell types when comparing healthy and COVID-19 donors. + ```{r, fig.width=10, fig.height=8} VlnPlot(bulk, features = c('IFI6','ISG15', 'IFIT5'), group.by = 'celltype', split.by = 'disease', cols = c("#377eb8", "#e41a1c"), ncol = 1) ``` From 659ca5c0b62ccec3115c8e323ea5411e3842fe45 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 27 Mar 2023 14:58:54 -0400 Subject: [PATCH 594/979] mouse 1.3M dim label size --- vignettes/seurat5_sketch_analysis.Rmd | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/vignettes/seurat5_sketch_analysis.Rmd b/vignettes/seurat5_sketch_analysis.Rmd index 4088a990e..1e5da9245 100644 --- a/vignettes/seurat5_sketch_analysis.Rmd +++ b/vignettes/seurat5_sketch_analysis.Rmd @@ -102,7 +102,7 @@ obj <- RunPCA(obj) obj <- FindNeighbors(obj, dims = 1:50) obj <- FindClusters(obj, resolution = 2) obj <- RunUMAP(obj, dims = 1:50, return.model = T) -DimPlot(obj, label = T, reduction = 'umap') + NoLegend() +DimPlot(obj, label = T, label.size = 3, reduction = 'umap') + NoLegend() ``` ```{r,fig.height = 7, fig.width = 10} @@ -130,13 +130,12 @@ DefaultAssay(obj) <- 'RNA' ``` ```{r save.img, include = FALSE, eval=TRUE} -library(ggplot2) -p <- DimPlot(obj, label = T, label.size=8, reduction = "ref.umap", group.by = "cluster_full", alpha = 0.1) + NoLegend() +p <- DimPlot(obj, label = T, label.size = 3, reduction = "ref.umap", group.by = "cluster_full", alpha = 0.1) + NoLegend() ggsave(filename = "../output/images/MouseBrain_sketch_clustering.jpg", height = 7, width = 7, plot = p, quality = 50) ``` ```{r, fig.width=5, fig.height=5} -DimPlot(obj, label = T, reduction = 'ref.umap', group.by = 'cluster_full', alpha = 0.1) + NoLegend() +DimPlot(obj, label = T, label.size = 3, reduction = 'ref.umap', group.by = 'cluster_full', alpha = 0.1) + NoLegend() ``` ```{r, fig.width=10, fig.height=5} @@ -178,7 +177,7 @@ obj.sub <- FindClusters(obj.sub) ``` ```{r, fig.width=5, fig.height=5} -DimPlot(obj.sub, label = T) + NoLegend() +DimPlot(obj.sub, label = T, label.size = 3) + NoLegend() ``` Note that we can start to see distinct interneuron lineages emerging in this dataset. We can see a clear separation of interneuron precursors that originated from the medial ganglionic eminence (Lhx6) or caudal ganglionic eminence (Nr2f2). We can further see the emergence of Sst (Sst) and Pvalb (Mef2c)-committed interneurons, and a CGE-derived Meis2-expressing progenitor population. From 4cd4d159acc67dbd7e5e919ea9eee1ec956c779d Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 27 Mar 2023 15:03:28 -0400 Subject: [PATCH 595/979] fix bugs in Inte vig --- vignettes/seurat5_integration.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vignettes/seurat5_integration.Rmd b/vignettes/seurat5_integration.Rmd index ffe95fd49..9f7027d90 100644 --- a/vignettes/seurat5_integration.Rmd +++ b/vignettes/seurat5_integration.Rmd @@ -75,7 +75,7 @@ obj <- RunPCA(obj) We can now visualize the results of a standard analysis without integration. Note that cells are grouping both by cell type and by underlying method. While a UMAP analysis is just a visualization of this, clustering this dataset would return predominantly batch-specific clusters. Especially if previous cell-type annotations were not available, this would make downstream analysis extremely challenging. ```{r unintegratedUMAP, fig.height=5, fig.width=14} obj <- FindNeighbors(obj, dims=1:30, reduction = 'pca') -obj <- FindNeighbors(obj, resolution = 2, cluster.name = "unintegrated_clusters") +obj <- FindClusters(obj, resolution = 2, cluster.name = "unintegrated_clusters") obj <- RunUMAP(obj, dims = 1:30, reduction = 'pca', reduction.name = 'umap.unintegrated') # visualize by batch and cell type annotation # cell type annotations were previously added by Azimuth @@ -142,7 +142,7 @@ wrap_plots(c(p1, p2), ncol=2) We hope that by simplifying the process of performing integrative analysis, users can more carefully evaluate the biological information retained in the integrated dataset. For example, users can compare the expression of biological markers based on different clustering solutions, or visualize one method's clustering solution on different UMAP visualizations. ```{r vlnplots, fig.height=5, fig.width=16, warning=FALSE} -p1 <- VlnPlot(obj, features = "rna_CD8A", group.by = 'cca_clusters') + NoLegend() + ggtitle("CD8A - Unintegrated Clusters") +p1 <- VlnPlot(obj, features = "rna_CD8A", group.by = 'unintegrated_clusters') + NoLegend() + ggtitle("CD8A - Unintegrated Clusters") p2 <- VlnPlot(obj, "rna_CD8A", group.by = 'cca_clusters') + NoLegend() + ggtitle("CD8A - CCA Clusters") p3 <- VlnPlot(obj, "rna_CD8A", group.by = 'scvi_clusters') + NoLegend() + ggtitle("CD8A - scVI Clusters") p1 | p2 | p3 From 7ce391b8d560747dbab0e8ce9ddf251eae97e27c Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 27 Mar 2023 16:36:48 -0400 Subject: [PATCH 596/979] update covid vig --- vignettes/COVID_SCTMapping.Rmd | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/vignettes/COVID_SCTMapping.Rmd b/vignettes/COVID_SCTMapping.Rmd index 776f875b3..d226dfa3e 100755 --- a/vignettes/COVID_SCTMapping.Rmd +++ b/vignettes/COVID_SCTMapping.Rmd @@ -58,7 +58,7 @@ To run this vignette please install Seurat v5 as shown [here](articles/install.h -## Load the Multimodal PBMC Reference Dataset and Query Datasets +## Load the PBMC Reference Dataset and Query Datasets We load the CITE-seq reference (download [here]()) from our Seurat v4 [paper](https://doi.org/10.1016/j.cell.2021.04.048). We will use the query datasets prepared in BPCells interaction vignette containing scRNA-seq data from three different studies. ```{r load.data} @@ -123,6 +123,7 @@ obj.comp <- subset(obj.comp, subset = disease %in% c('normal','COVID-19')) VlnPlot(obj.comp, features = c("MAIT",'Plasmablast'), group.by = 'disease', slot = 'counts', cols = c("#377eb8", "#e41a1c")) + ylab('relative abundance') ``` + ## Differential expression analysis Except for the composition analysis, we use an aggregation-based (pseudobulk) workflow to explore differential genes between healthy and COVID donors. We aggregate all cells within the same cell type and sample using the AggregateExpression function. This returns a Seurat object where each ‘cell’ represents the pseudobulk profile of one cell type in one individual. @@ -150,17 +151,12 @@ bulk$disease <- factor(bulk$disease, levels = c('normal', 'COVID-19')) Once cells are aggregated into individuals, we can execute cell type-specific differential expression analysis between healthy and COVID-19 samples. Here we only visualize certain interferon-stimulated genes across all cell types when comparing healthy and COVID-19 donors. -```{r, fig.width=10, fig.height=8} +```{r, fig.width=10, fig.height=15} VlnPlot(bulk, features = c('IFI6','ISG15', 'IFIT5'), group.by = 'celltype', split.by = 'disease', cols = c("#377eb8", "#e41a1c"), ncol = 1) ``` - -```{r save.img, include=TRUE} -library(ggplot2) -ggsave(filename = "../output/images/COVID_SCTMapping.jpg", height = 7, width = 8, plot = p3, quality = 50) -``` + ```{r save.times, include=TRUE} -print(as.data.frame(all_times)) write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/COVID_SCTMapping.csv") ``` From 463ed99f684cc1fbdc1937717211736f1083a5d1 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 27 Mar 2023 17:12:21 -0400 Subject: [PATCH 597/979] update covid vig --- vignettes/COVID_SCTMapping.Rmd | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/vignettes/COVID_SCTMapping.Rmd b/vignettes/COVID_SCTMapping.Rmd index d226dfa3e..05b7cc266 100755 --- a/vignettes/COVID_SCTMapping.Rmd +++ b/vignettes/COVID_SCTMapping.Rmd @@ -28,6 +28,8 @@ knitr::opts_chunk$set( message = FALSE, warning = FALSE, fig.width = 10, + cache = TRUE, + cache.lazy = FALSE, time_it = TRUE, error = TRUE ) @@ -37,6 +39,7 @@ knitr::opts_chunk$set( library(Seurat) library(BPCells) library(dplyr) +library(patchwork) library(ggplot2) options(future.globals.maxSize = 1e9) ``` @@ -120,8 +123,9 @@ obj.comp <- CreateSeuratObject(counts = t(df_comp_ratio), meta.data = df_disease obj.comp <- subset(obj.comp, subset = disease %in% c('normal','COVID-19')) ``` ```{r, fig.width=10, fig.height=6} -VlnPlot(obj.comp, features = c("MAIT",'Plasmablast'), group.by = 'disease', slot = 'counts', cols = c("#377eb8", "#e41a1c")) + ylab('relative abundance') - +p1 <- VlnPlot(obj.comp, features = "MAIT", group.by = 'disease', slot = 'counts', cols = c("#377eb8", "#e41a1c")) + ylab('relative abundance') +p2 <- VlnPlot(obj.comp, features = 'Plasmablast', group.by = 'disease', slot = 'counts', cols = c("#377eb8", "#e41a1c")) + ylab('relative abundance') +p1 + p2 + plot_layout(ncol = 2) ``` ## Differential expression analysis @@ -155,11 +159,6 @@ Once cells are aggregated into individuals, we can execute cell type-specific di VlnPlot(bulk, features = c('IFI6','ISG15', 'IFIT5'), group.by = 'celltype', split.by = 'disease', cols = c("#377eb8", "#e41a1c"), ncol = 1) ``` - -```{r save.times, include=TRUE} -write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/COVID_SCTMapping.csv") -``` -
    **Session Info** ```{r} From 72b4ffe022689438be5822820c7e080f382469f0 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 27 Mar 2023 17:26:20 -0400 Subject: [PATCH 598/979] remove cache --- vignettes/COVID_SCTMapping.Rmd | 2 -- 1 file changed, 2 deletions(-) diff --git a/vignettes/COVID_SCTMapping.Rmd b/vignettes/COVID_SCTMapping.Rmd index 05b7cc266..a0c99af9d 100755 --- a/vignettes/COVID_SCTMapping.Rmd +++ b/vignettes/COVID_SCTMapping.Rmd @@ -28,8 +28,6 @@ knitr::opts_chunk$set( message = FALSE, warning = FALSE, fig.width = 10, - cache = TRUE, - cache.lazy = FALSE, time_it = TRUE, error = TRUE ) From 26160fcd4f9d7645aa305d382280c9d1d8caec5d Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 27 Mar 2023 17:55:49 -0400 Subject: [PATCH 599/979] fix BPCells VST --- R/preprocessing5.R | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 1f4b62a56..631b958f1 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -816,9 +816,13 @@ VST.IterableMatrix <-function( feature.mean <- hvf.info$mean feature.sd <- sqrt(x = hvf.info$variance.expected) standard.max <- clip %||% sqrt(x = ncol(x = data)) + feature.mean[feature.mean == 0] <- 0.1 data <- BPCells::min_by_row(mat = data, vals = standard.max*feature.sd + feature.mean) - data.standard <- (data - feature.mean) /feature.sd - hvf.info$variance.standardized <- matrix_stats(matrix = data.standard, row_stats = 'variance')$row_stats['variance',] + data.standard <- (data - feature.mean)/feature.sd + hvf.info$variance.standardized <- BPCells::matrix_stats( + matrix = data.standard, + row_stats = 'variance' + )$row_stats['variance',] # Set variable features hvf.info$variable <- FALSE hvf.info$rank <- NA From 08aa9ce4449c8cc8722a12490c16cd1464625456 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 27 Mar 2023 21:14:05 -0400 Subject: [PATCH 600/979] update BPCells in COVID --- vignettes/COVID_SCTMapping.Rmd | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/vignettes/COVID_SCTMapping.Rmd b/vignettes/COVID_SCTMapping.Rmd index a0c99af9d..4d0e54e9f 100755 --- a/vignettes/COVID_SCTMapping.Rmd +++ b/vignettes/COVID_SCTMapping.Rmd @@ -23,8 +23,7 @@ knitr::knit_hooks$set(time_it = local({ } })) knitr::opts_chunk$set( - tidy = TRUE, - tidy.opts = list(width.cutoff = 95), + tidy = 'styler', message = FALSE, warning = FALSE, fig.width = 10, @@ -46,7 +45,7 @@ options(future.globals.maxSize = 1e9) ## Introduction: Reference mapping analysis in Seurat v5 In numerous studies profiling multiple tissues across hundreds of individuals and millions of cells, the single-cell reference mapping approach offers a robust and consistent method for annotating all those publicly available single-cell datasets. This vignette demonstrates the process of mapping multiple query scRNA PBMC datasets onto [our annotated CITE-seq reference of 162,000 PBMC measured with 228 antibodies](https://doi.org/10.1016/j.cell.2021.04.048) -We download three datasets (1,498,064 cells and 277 donors) from [CZI cellxgene collections](https://cellxgene.cziscience.com/collections):[Ahern et al. (2022) Nature](https://cellxgene.cziscience.com/collections/8f126edf-5405-4731-8374-b5ce11f53e82), [Jin et al. (2021) Science](https://cellxgene.cziscience.com/collections/b9fc3d70-5a72-4479-a046-c2cc1ab19efc), and [Yoshida et al. (2022) Nature](https://cellxgene.cziscience.com/collections/03f821b4-87be-4ff4-b65a-b5fc00061da7). We demonstrate how to create a Seurat object with BPCells matrix in our BPCells interaction vignette. +We download three datasets (1,498,064 cells and 277 donors) from [CZI cellxgene collections](https://cellxgene.cziscience.com/collections):[Ahern et al. (2022) Nature](https://cellxgene.cziscience.com/collections/8f126edf-5405-4731-8374-b5ce11f53e82), [Jin et al. (2021) Science](https://cellxgene.cziscience.com/collections/b9fc3d70-5a72-4479-a046-c2cc1ab19efc), and [Yoshida et al. (2022) Nature](https://cellxgene.cziscience.com/collections/03f821b4-87be-4ff4-b65a-b5fc00061da7). Prior to running this vignette, please [install Seurat v5](https://satijalab.org/seurat/articles/install.html), as well as the [BPCells](https://github.com/bnprks/BPCells) package, which we use for on-disk storage. You can read more about using BPCells in Seurat v5 [here](https://satijalab.org/seurat/articles/seurat5_bpcells_interaction_vignette.html). We have previously demonstrated the use of the reference-mapping approach for annotating cell labels in a single query dataset. With Seurat v5, we have substantially improved the speed, memory efficiency, and user-friendliness for mapping a large number of query datasets to the same reference. @@ -57,10 +56,8 @@ In this vignette, we demonstrate how to use a previously established reference t To run this vignette please install Seurat v5 as shown [here](articles/install.html). - - ## Load the PBMC Reference Dataset and Query Datasets -We load the CITE-seq reference (download [here]()) from our Seurat v4 [paper](https://doi.org/10.1016/j.cell.2021.04.048). We will use the query datasets prepared in BPCells interaction vignette containing scRNA-seq data from three different studies. +We load the CITE-seq reference (download [here]()) from our Seurat v4 [paper](https://doi.org/10.1016/j.cell.2021.04.048). We will use the query Seurat object prepared in [BPCells interaction vignette](https://satijalab.org/seurat/articles/seurat5_bpcells_interaction_vignette.html). This query Seurat object is generated for datasets from three different studies by utilizing the `CreateSeuratObject` function, which also accepts a list of BPCells matrices as input. As a result, the three datasets are created as three layers and stored on disk. ```{r load.data} reference <- readRDS("/brahms/hartmana/vignette_data/pbmc_multimodal_2023.rds") @@ -69,7 +66,6 @@ object <- NormalizeData(object, verbose = FALSE) ``` - ## Mapping Using the same code from in our v4 reference mapping vignette, we find anchors between reference and query in the reference precomputed supervised PCA (spca) space. We recommend the use of supervised PCA for CITE-seq reference dataset, and demonstrate how to compute this transformation in [v4 mapping vignette](). The distinction between the v4 reference mapping vignette and this method is that we simultaneously map three datasets calling of `FindTransferAnchors` and `MapQuery` once. From de027cec84600be10e9ff92f429a7aee8e9a6b68 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 28 Mar 2023 08:30:22 -0400 Subject: [PATCH 601/979] update essential vig --- vignettes/seurat5_essential_commands.Rmd | 65 ++++++++++++++++++------ 1 file changed, 50 insertions(+), 15 deletions(-) diff --git a/vignettes/seurat5_essential_commands.Rmd b/vignettes/seurat5_essential_commands.Rmd index 040e9078c..84d975180 100644 --- a/vignettes/seurat5_essential_commands.Rmd +++ b/vignettes/seurat5_essential_commands.Rmd @@ -22,8 +22,7 @@ knitr::knit_hooks$set(time_it = local({ } })) knitr::opts_chunk$set( - tidy = TRUE, - tidy.opts = list(width.cutoff = 95), + tidy = 'styler', warning = FALSE, error = TRUE, message = FALSE, @@ -99,29 +98,50 @@ The `Seurat.object.assay.version` option can also be set to create `Assay5` or ` ```{r} # create v3 assays options(Seurat.object.assay.version = "v3") -pbmc.data <- Read10X(data.dir = "/brahms/hartmana/vignette_data/pbmc3k/filtered_gene_bc_matrices/hg19/") -pbmc <- CreateSeuratObject(counts = pbmc.data) +pbmc.counts <- Read10X(data.dir = "/brahms/hartmana/vignette_data/pbmc3k/filtered_gene_bc_matrices/hg19/") +pbmc <- CreateSeuratObject(counts = pbmc.counts) print(class(pbmc[["RNA"]])) # create v5 assays options(Seurat.object.assay.version = "v5") -pbmc.data <- Read10X(data.dir = "/brahms/hartmana/vignette_data/pbmc3k/filtered_gene_bc_matrices/hg19/") -pbmc <- CreateSeuratObject(counts = pbmc.data) +pbmc.counts <- Read10X(data.dir = "/brahms/hartmana/vignette_data/pbmc3k/filtered_gene_bc_matrices/hg19/") +pbmc <- CreateSeuratObject(counts = pbmc.counts) pbmc <- NormalizeData(pbmc) print(class(pbmc[["RNA"]])) ``` `CreateAssayObject()` and `CreateAssay5Object()` can be used to create v3 and v5 assay regardless of the setting in `Seurat.object.assay.version` ```{r} -assay.v3 <- CreateAssayObject(counts = pbmc.data) -assay.v5 <- CreateAssay5Object(counts = pbmc.data) +assay.v3 <- CreateAssayObject(counts = pbmc.counts) +assay.v5 <- CreateAssay5Object(counts = pbmc.counts) print(class(assay.v3)) print(class(assay.v5)) + +# Normalized data can also be used to create Assay5 +assay.v5 <- CreateAssay5Object(data = log1p(pbmc.counts)) +Layers(assay.v5) + +# Simultaneous setting of counts and normalized data is possible, +# but the cells need to be the same. +assay.v5 <- CreateAssay5Object(counts = pbmc.counts, data = log1p(pbmc.counts)) +Layers(assay.v5) + + +# A list of matrices can be used to generate multi-layer assay +split.data <- sample(c("A", "B", "C"), size = ncol(pbmc), replace = TRUE) +cells.split <- split(x = colnames(pbmc.counts), f = split.data) +pbmc.counts.list <- lapply(X = cells.split,FUN = function(x) pbmc.counts[,x]) + +assay.v5 <- CreateAssay5Object(counts = pbmc.counts.list) +Layers(assay.v5) + ``` -Layers can be split based on metadata. +Counts Layers can be split based on metadata in the assay level ```{r} -pbmc$split.data <- sample(c("A", "B", "C"), size = ncol(pbmc), replace = TRUE) +pbmc <- CreateSeuratObject(counts = pbmc.counts) +pbmc <- NormalizeData(pbmc) +pbmc$split.data <- split.data pbmc[["RNA"]] <- split(x = pbmc[["RNA"]], f = pbmc$split.data) Layers(pbmc[["RNA"]]) ``` @@ -141,14 +161,29 @@ And then joined back together in a single layer. ```{r} pbmc[["RNA"]] <- JoinLayers(pbmc[["RNA"]]) Layers(pbmc[["RNA"]]) -``` -Assay data can be accessed with `FetchData` depending on the use case. +``` +Dimensionality reduction data and metadata can be retrieved through the use of `FetchData`, `[[`, or V4 functions, depending on the use case. ```{r} -# returns information from both assay and meta.data as a data.frame -fetch_df <- FetchData(object = pbmc, layer = "counts", vars = c("rna_MS4A1", "rna_CD79A", 'split.data')) +pbmc <- FindVariableFeatures(pbmc) +pbmc <- ScaleData(pbmc) +pbmc <- RunPCA(pbmc) + +# returns information from both assay, cell embeddings and meta.data as a data.frame +fetch_df <- FetchData(object = pbmc, layer = "counts", vars = c("rna_MS4A1", "PC_1", "nCount_RNA")) head(fetch_df) -``` + +# get cell embeddings +head(Embeddings(object = pbmc[['pca']])[,1:5]) +head(pbmc[['pca']][[]][,1:5]) + +# get feature loadings +head(Loadings(object = pbmc[['pca']])[,1:5]) +head(pbmc[['pca']][][,1:5]) + +# get meta.data +head(pbmc[[]]) +``` \ No newline at end of file From 0e6147509c529b2b40bdbdfb4ff1da849e1a07a0 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 28 Mar 2023 11:49:52 -0400 Subject: [PATCH 602/979] update covid vig --- vignettes/COVID_SCTMapping.Rmd | 55 +++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 14 deletions(-) diff --git a/vignettes/COVID_SCTMapping.Rmd b/vignettes/COVID_SCTMapping.Rmd index 4d0e54e9f..51bed2dd8 100755 --- a/vignettes/COVID_SCTMapping.Rmd +++ b/vignettes/COVID_SCTMapping.Rmd @@ -104,21 +104,45 @@ We now have annotations for all cells from this object. This enables us to deter ```{r} df_comp <- as.data.frame.matrix(table(object$donor_id, object$predicted.celltype.l2)) -df_comp <- df_comp[rowSums(df_comp)> 50,] -df_comp_ratio <- sweep(x = df_comp, MARGIN = 1, STATS = rowSums(df_comp), FUN = '/') +select.donors <- rownames(df_comp)[rowSums(df_comp)> 50] +df_comp <- df_comp[select.donors, ] +df_comp_relative <- sweep(x = df_comp, MARGIN = 1, STATS = rowSums(df_comp), FUN = '/') -df_disease <- as.data.frame.matrix(table(object$donor_id, object$disease)) -df_disease$disease <- 'other' -df_disease$disease[df_disease$normal!=0] <- 'normal' -df_disease$disease[df_disease$`COVID-19`!=0] <- 'COVID-19' -df_disease$disease <- factor(df_disease$disease, levels = c('normal','COVID-19','other')) +df_disease <- as.data.frame.matrix(table(object$donor_id, object$disease))[select.donors, ] -obj.comp <- CreateSeuratObject(counts = t(df_comp_ratio), meta.data = df_disease) -obj.comp <- subset(obj.comp, subset = disease %in% c('normal','COVID-19')) +df_comp_relative$disease <- 'other' +df_comp_relative$disease[df_disease$normal!=0] <- 'normal' +df_comp_relative$disease[df_disease$`COVID-19`!=0] <- 'COVID-19' +df_comp_relative$disease <- factor(df_comp_relative$disease, levels = c('normal','COVID-19','other')) +df_comp_relative <- df_comp_relative[df_comp_relative$disease %in% c('normal','COVID-19'),] ``` -```{r, fig.width=10, fig.height=6} -p1 <- VlnPlot(obj.comp, features = "MAIT", group.by = 'disease', slot = 'counts', cols = c("#377eb8", "#e41a1c")) + ylab('relative abundance') -p2 <- VlnPlot(obj.comp, features = 'Plasmablast', group.by = 'disease', slot = 'counts', cols = c("#377eb8", "#e41a1c")) + ylab('relative abundance') + +```{r, fig.width=10, fig.height=4} +p1 <- ggplot(data = df_comp_relative, mapping = aes(x = disease, y = MAIT, fill = disease)) + + geom_boxplot(outlier.shape = NA) + + scale_fill_manual(values = c("#377eb8", "#e41a1c")) + + xlab("") + ylab('relative abundance') + + ggtitle('MAIT') + + geom_jitter(color="black", size=0.4, alpha=0.9 ) + + theme_bw() + + theme( axis.title = element_text(size = 12), + axis.text = element_text(size = 12), + plot.title = element_text(size = 15, hjust = 0.5, face = "bold") + ) + +p2 <- ggplot(data = df_comp_relative, mapping = aes(x = disease, y = Plasmablast, fill = disease)) + + geom_boxplot(outlier.shape = NA) + + scale_fill_manual(values = c("#377eb8", "#e41a1c")) + + xlab("") + ylab('relative abundance') + + ggtitle('Plasmablast') + + geom_jitter(color="black", size=0.4, alpha=0.9 ) + + theme_bw() + + theme( axis.title = element_text(size = 12), + axis.text = element_text(size = 12), + plot.title = element_text(size = 15, hjust = 0.5, face = "bold") + ) + + p1 + p2 + plot_layout(ncol = 2) ``` @@ -149,8 +173,11 @@ bulk$disease <- factor(bulk$disease, levels = c('normal', 'COVID-19')) Once cells are aggregated into individuals, we can execute cell type-specific differential expression analysis between healthy and COVID-19 samples. Here we only visualize certain interferon-stimulated genes across all cell types when comparing healthy and COVID-19 donors. -```{r, fig.width=10, fig.height=15} -VlnPlot(bulk, features = c('IFI6','ISG15', 'IFIT5'), group.by = 'celltype', split.by = 'disease', cols = c("#377eb8", "#e41a1c"), ncol = 1) +```{r, fig.width=10, fig.height=12} +p1 <- VlnPlot(bulk, features = c('IFI6'), group.by = 'celltype', split.by = 'disease', cols = c("#377eb8", "#e41a1c")) +p2 <- VlnPlot(bulk, features = c('ISG15'), group.by = 'celltype', split.by = 'disease', cols = c("#377eb8", "#e41a1c")) +p3 <- VlnPlot(bulk, features = c('IFIT5'), group.by = 'celltype', split.by = 'disease', cols = c("#377eb8", "#e41a1c")) +p1 + p2 + p3 + plot_layout(ncol = 1) ```
    From 1a7a8a9bc1a114d9c9cfac01d6a5bc6d880105e0 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Tue, 28 Mar 2023 16:03:45 -0400 Subject: [PATCH 603/979] update vignette rmd --- _pkgdown.yaml | 4 -- vignettes/COVID_SCTMapping.Rmd | 71 +++++++++---------- vignettes/install.Rmd | 8 +-- .../seurat5_bpcells_interaction_vignette.Rmd | 45 ++++++++---- vignettes/seurat5_integration.Rmd | 60 ++++++++++++---- vignettes/seurat5_integration_bridge.Rmd | 70 ++++++++++-------- vignettes/seurat5_sketch_analysis.Rmd | 44 ++++++++---- vignettes/spatial_vignette_2.Rmd | 2 +- vignettes/vignettes_v5.yaml | 2 +- 9 files changed, 185 insertions(+), 121 deletions(-) diff --git a/_pkgdown.yaml b/_pkgdown.yaml index da41f2eb6..43030a5e0 100644 --- a/_pkgdown.yaml +++ b/_pkgdown.yaml @@ -26,10 +26,6 @@ navbar: href: articles/pbmc3k_tutorial.html - text: "Using Seurat with multi-modal data" href: articles/multimodal_vignette.html - - text: "Analysis of spatial datasets (Sequencing-based)" - href: articles/spatial_vignette.html - - text: "Analysis of spatial datasets (Imaging-based)" - href: articles/spatial_vignette_2.html - text: ------- - text: Data Integration - text: "Introduction to scRNA-seq integration" diff --git a/vignettes/COVID_SCTMapping.Rmd b/vignettes/COVID_SCTMapping.Rmd index 51bed2dd8..1a2bedeb6 100755 --- a/vignettes/COVID_SCTMapping.Rmd +++ b/vignettes/COVID_SCTMapping.Rmd @@ -43,38 +43,32 @@ options(future.globals.maxSize = 1e9) ## Introduction: Reference mapping analysis in Seurat v5 -In numerous studies profiling multiple tissues across hundreds of individuals and millions of cells, the single-cell reference mapping approach offers a robust and consistent method for annotating all those publicly available single-cell datasets. This vignette demonstrates the process of mapping multiple query scRNA PBMC datasets onto [our annotated CITE-seq reference of 162,000 PBMC measured with 228 antibodies](https://doi.org/10.1016/j.cell.2021.04.048) -We download three datasets (1,498,064 cells and 277 donors) from [CZI cellxgene collections](https://cellxgene.cziscience.com/collections):[Ahern et al. (2022) Nature](https://cellxgene.cziscience.com/collections/8f126edf-5405-4731-8374-b5ce11f53e82), [Jin et al. (2021) Science](https://cellxgene.cziscience.com/collections/b9fc3d70-5a72-4479-a046-c2cc1ab19efc), and [Yoshida et al. (2022) Nature](https://cellxgene.cziscience.com/collections/03f821b4-87be-4ff4-b65a-b5fc00061da7). Prior to running this vignette, please [install Seurat v5](https://satijalab.org/seurat/articles/install.html), as well as the [BPCells](https://github.com/bnprks/BPCells) package, which we use for on-disk storage. You can read more about using BPCells in Seurat v5 [here](https://satijalab.org/seurat/articles/seurat5_bpcells_interaction_vignette.html). - -We have previously demonstrated the use of the reference-mapping approach for annotating cell labels in a single query dataset. With Seurat v5, we have substantially improved the speed, memory efficiency, and user-friendliness for mapping a large number of query datasets to the same reference. - -In this vignette, we demonstrate how to use a previously established reference to interpret three scRNA-seq query datasets: - -* Annotate all query cells from multiple studies based on a set of reference-defined cell states -* Aggregate the annotated cells into individuals for performing Pseudo-bulk differential analysis +In Seurat v5, we introduce a scalable approach for reference mapping datasets from separate studies or individuals. Reference mapping is a powerful approach to identify consistent labels across studies and perform cross-dataset analysis. We emphasize that while individual datasets are manageable in size, the aggregate of many datasets often amounts to millions of cell which do not fit in-memory. Importantly, we never simultaneously load all of the cells in-memory to maintain low memory usage. Cross-dataset analysis is often challenged by disparate or unique cell type labels. Thus, reference mapping annotates cells using a common reference for consistent cell type labels. + +In this vignette, we reference map three publicly available datasets totaling 1,498,064 cells and 277 donors which are available through [CZI cellxgene collections](https://cellxgene.cziscience.com/collections):[Ahern et al. (2022) Nature](https://cellxgene.cziscience.com/collections/8f126edf-5405-4731-8374-b5ce11f53e82), [Jin et al. (2021) Science](https://cellxgene.cziscience.com/collections/b9fc3d70-5a72-4479-a046-c2cc1ab19efc), and [Yoshida et al. (2022) Nature](https://cellxgene.cziscience.com/collections/03f821b4-87be-4ff4-b65a-b5fc00061da7). Each dataset consists of PBMCs from healthy and COVID-19 donors. Using the harmonized annotations, we demonstrate how to perform differential expression across disease within cell types using pseudobulk expression. -To run this vignette please install Seurat v5 as shown [here](articles/install.html). +Prior to running this vignette, please [install Seurat v5](articles/install.html) and see the [BPCells vignette](articles/seurat5_bpcells_interaction_vignette.html) to construct the on-disk object used in this vignette. Additionally, we map to our annotated CITE-seq reference containing 162,000 cells and 228 antibodies ([Hao*, Hao*, et al., Cell 2021](https://doi.org/10.1016/j.cell.2021.04.048)) which is available for download [here](https://zenodo.org/record/7779017#.ZCMojezMJqs). ## Load the PBMC Reference Dataset and Query Datasets -We load the CITE-seq reference (download [here]()) from our Seurat v4 [paper](https://doi.org/10.1016/j.cell.2021.04.048). We will use the query Seurat object prepared in [BPCells interaction vignette](https://satijalab.org/seurat/articles/seurat5_bpcells_interaction_vignette.html). This query Seurat object is generated for datasets from three different studies by utilizing the `CreateSeuratObject` function, which also accepts a list of BPCells matrices as input. As a result, the three datasets are created as three layers and stored on disk. +We first load the reference (available [here](https://zenodo.org/record/7779017#.ZCMojezMJqs)) and normalize the query Seurat object prepared in the [BPCells interaction vignette](articles/seurat5_bpcells_interaction_vignette.html). The query object is generated for datasets from three different studies using the `CreateSeuratObject` function, which accepts a list of BPCells matrices as input. As a result, the three datasets reside in three separate `layers` stored on-disk. ```{r load.data} reference <- readRDS("/brahms/hartmana/vignette_data/pbmc_multimodal_2023.rds") object <- readRDS("/brahms/hartmana/vignette_data/merged_covid_object.rds") object <- NormalizeData(object, verbose = FALSE) - ``` ## Mapping -Using the same code from in our v4 reference mapping vignette, we find anchors between reference and query in the reference precomputed supervised PCA (spca) space. We recommend the use of supervised PCA for CITE-seq reference dataset, and demonstrate how to compute this transformation in [v4 mapping vignette](). The distinction between the v4 reference mapping vignette and this method is that we simultaneously map three datasets calling of `FindTransferAnchors` and `MapQuery` once. +Using the same code from the [v4 reference mapping vignette](articles/multimodal_reference_mapping.html), we find anchors between reference and query in the precomputed supervised PCA. We recommend the use of supervised PCA for CITE-seq reference datasets, and demonstrate how to compute this transformation in [v4 reference mapping vignette](articles/multimodal_reference_mapping.html). In Seurat v5, all three datasets are mapped by calling `FindTransferAnchors` and `MapQuery` once. ```{r} -anchor <- FindTransferAnchors(reference = reference, - query = object, - reference.reduction = 'spca', - normalization.method = 'SCT', - dims = 1:50) +anchor <- FindTransferAnchors( + reference = reference, + query = object, + reference.reduction = 'spca', + normalization.method = 'SCT', + dims = 1:50) object <- MapQuery( anchorset = anchor, query = object, @@ -88,9 +82,7 @@ object <- MapQuery( ``` ## Explore the mapping results - -We can now visualize the 1.5 million query cells from three studies, which have been projected into a UMAP visualization defined by the reference. Each cell has received consistent annotations at two levels of granularity (level 1 and level 2). The incoherence of their original annotations (cell_type) prevents us from directly performing integrative analysis across these studies. However, our mapping results annotate the various datasets using a single set of curated annotations (predicted.celltype.l1 and predicted.celltype.l2), enabling cross-study differential compositional and transcriptome analyses. - +Next, we visualize all cells from the three studies which have been projected into a UMAP-space defined by the reference. Each cell is annotated at two levels of granularity (`predicted.celltype.l1` and `predicted.celltype.l2`). The differing ontologies used in the original annotations (`cell_type`) prevent us from directly performing integrative analysis across studies. ```{r, fig.width=10, fig.height=6} DimPlot(object, reduction = 'ref.umap', group.by = 'cell_type',alpha = 0.1, label = TRUE, split.by = 'publication', ncol = 3, label.size = 3) + NoLegend() @@ -100,7 +92,7 @@ DimPlot(object, reduction = 'ref.umap', group.by = 'predicted.celltype.l2',alpha ``` ## Differential composition analysis -We now have annotations for all cells from this object. This enables us to determine the proportion of different cell types for each donor and explore variations in cell type composition between healthy individuals and COVID-19 patients. For example, we noticed a reduction in MAIT cells and an increase in plasmablasts among COVID-19 patients. +We utilize our annotations to identify differences in the proportion of different cell types between healthy individuals and COVID-19 patients. For example, we noticed a reduction in MAIT cells as well as an increase in plasmablasts among COVID-19 patients. ```{r} df_comp <- as.data.frame.matrix(table(object$donor_id, object$predicted.celltype.l2)) @@ -121,34 +113,32 @@ df_comp_relative <- df_comp_relative[df_comp_relative$disease %in% c('normal','C p1 <- ggplot(data = df_comp_relative, mapping = aes(x = disease, y = MAIT, fill = disease)) + geom_boxplot(outlier.shape = NA) + scale_fill_manual(values = c("#377eb8", "#e41a1c")) + - xlab("") + ylab('relative abundance') + - ggtitle('MAIT') + + xlab("") + ylab('relative abundance') + + ggtitle('MAIT') + geom_jitter(color="black", size=0.4, alpha=0.9 ) + theme_bw() + theme( axis.title = element_text(size = 12), - axis.text = element_text(size = 12), + axis.text = element_text(size = 12), plot.title = element_text(size = 15, hjust = 0.5, face = "bold") - ) + ) p2 <- ggplot(data = df_comp_relative, mapping = aes(x = disease, y = Plasmablast, fill = disease)) + geom_boxplot(outlier.shape = NA) + scale_fill_manual(values = c("#377eb8", "#e41a1c")) + xlab("") + ylab('relative abundance') + - ggtitle('Plasmablast') + + ggtitle('Plasmablast') + geom_jitter(color="black", size=0.4, alpha=0.9 ) + theme_bw() + theme( axis.title = element_text(size = 12), - axis.text = element_text(size = 12), + axis.text = element_text(size = 12), plot.title = element_text(size = 15, hjust = 0.5, face = "bold") - ) - - + ) + p1 + p2 + plot_layout(ncol = 2) ``` ## Differential expression analysis -Except for the composition analysis, we use an aggregation-based (pseudobulk) workflow to explore differential genes between healthy and COVID donors. We aggregate all cells within the same cell type and sample using the AggregateExpression function. This returns a Seurat object where each ‘cell’ represents the pseudobulk profile of one cell type in one individual. - +In addition to composition analysis, we use an aggregation-based (pseudobulk) workflow to explore differential genes between healthy individuals and COVID-19 donors. We aggregate all cells within the same cell type and donor using the `AggregateExpression` function. This returns a Seurat object where each ‘cell’ represents the pseudobulk profile of one cell type in one individual. ```{r} bulk <- AverageExpression(object, @@ -168,15 +158,20 @@ bulk$disease <- sapply(strsplit(Cells(bulk), split = "_"), '[', 3) bulk <- subset(bulk, subset = disease %in% c('normal', 'COVID-19') ) bulk <- subset(bulk, subset = celltype != c('Doublet') ) bulk$disease <- factor(bulk$disease, levels = c('normal', 'COVID-19')) - ``` -Once cells are aggregated into individuals, we can execute cell type-specific differential expression analysis between healthy and COVID-19 samples. Here we only visualize certain interferon-stimulated genes across all cell types when comparing healthy and COVID-19 donors. +Once a pseudobulk object is created, we can perform cell type-specific differential expression analysis between healthy individuals and COVID-19 donors. Here, we only visualize certain interferon-stimulated genes which are often upregulated during viral infection. ```{r, fig.width=10, fig.height=12} -p1 <- VlnPlot(bulk, features = c('IFI6'), group.by = 'celltype', split.by = 'disease', cols = c("#377eb8", "#e41a1c")) -p2 <- VlnPlot(bulk, features = c('ISG15'), group.by = 'celltype', split.by = 'disease', cols = c("#377eb8", "#e41a1c")) -p3 <- VlnPlot(bulk, features = c('IFIT5'), group.by = 'celltype', split.by = 'disease', cols = c("#377eb8", "#e41a1c")) +p1 <- VlnPlot( + object = bulk, features = 'IFI6', group.by = 'celltype', + split.by = 'disease', cols = c("#377eb8", "#e41a1c")) +p2 <- VlnPlot( + object = bulk, features = c('ISG15'), group.by = 'celltype', + split.by = 'disease', cols = c("#377eb8", "#e41a1c")) +p3 <- VlnPlot( + object = bulk, features = c('IFIT5'), group.by = 'celltype', + split.by = 'disease', cols = c("#377eb8", "#e41a1c")) p1 + p2 + p3 + plot_layout(ncol = 1) ``` diff --git a/vignettes/install.Rmd b/vignettes/install.Rmd index 030542d29..7cc7d55c2 100644 --- a/vignettes/install.Rmd +++ b/vignettes/install.Rmd @@ -5,15 +5,15 @@ output: html_document To install Seurat, [R](https://www.r-project.org/) version 4.0 or greater is required. We also recommend installing [R Studio](https://www.rstudio.com/). -# ![Seurat 5:](../output/images/SeuratV5.png){#id .class width=60 height=60} Seurat 5: Install from GitHub +# ![Seurat v5:](../output/images/SeuratV5.png){#id .class width=60 height=60} Seurat 5: Install from GitHub -Copy the code below to install Seurat 5: +Copy the code below to install Seurat v5: ```{r required, eval=FALSE} remotes::install_github("satijalab/seurat", "seurat5", quiet = TRUE) ``` -The following packages are not required but are used in many Seurat 5 vignettes: +The following packages are not required but are used in many Seurat v5 vignettes: * SeuratData: automatically load datasets pre-packaged as Seurat objects * Azimuth: local annotation of scRNA-seq and scATAC-seq queries across multiple organs and tissues @@ -27,7 +27,7 @@ remotes::install_github("satijalab/seurat-wrappers", "seurat5", quiet = TRUE) remotes::install_github("stuart-lab/signac", "seurat5", quiet = TRUE) ``` -Seurat 5 utilizes BPCells to support analysis of extremely large datasets: +Seurat v5 utilizes BPCells to support analysis of extremely large datasets: ```{r bpcells, eval=FALSE} remotes::install_github("bnprks/BPCells", quiet = TRUE) diff --git a/vignettes/seurat5_bpcells_interaction_vignette.Rmd b/vignettes/seurat5_bpcells_interaction_vignette.Rmd index 02acacb02..280418d7b 100644 --- a/vignettes/seurat5_bpcells_interaction_vignette.Rmd +++ b/vignettes/seurat5_bpcells_interaction_vignette.Rmd @@ -22,13 +22,13 @@ knitr::knit_hooks$set(time_it = local({ } })) knitr::opts_chunk$set( - tidy = TRUE, - tidy.opts = list(width.cutoff = 95), + tidy = 'styler', warning = FALSE, error = TRUE, message = FALSE, fig.width = 8, - time_it = TRUE + time_it = TRUE, + cache = TRUE ) ``` @@ -55,14 +55,18 @@ We use BPCells functionality to both load in our data and write the counts layer # Load Data ## Load Data from one h5 file -In this section, we will load a dataset of mouse brain cells freely available from 10X Genomics. This includes 1.3 Million single cells that were sequenced on the Illumina NovaSeq 6000. The raw data can be found [here](https://support.10xgenomics.com/single-cell-gene-expression/datasets/1.3.0/1M_neurons?). +In this section, we will load a dataset of mouse brain cells freely available from 10x Genomics. This includes 1.3 Million single cells that were sequenced on the Illumina NovaSeq 6000. The raw data can be found [here](https://support.10xgenomics.com/single-cell-gene-expression/datasets/1.3.0/1M_neurons?). To read in the file, we will use open_matrix_10x_hdf5, a BPCells function written to read in feature matrices from 10x. We then write a matrix directory, load the matrix, and create a Seurat object. ```{r} -brain.data <- open_matrix_10x_hdf5(path = "/brahms/hartmana/vignette_data/1M_neurons_filtered_gene_bc_matrices_h5.h5") +brain.data <- open_matrix_10x_hdf5( + path = "/brahms/hartmana/vignette_data/1M_neurons_filtered_gene_bc_matrices_h5.h5") # Write the matrix to a directory -write_matrix_dir(mat = brain.data, dir = '/brahms/hartmana/vignette_data/bpcells/brain_counts', overwrite = TRUE) +write_matrix_dir( + mat = brain.data, + dir = '/brahms/hartmana/vignette_data/bpcells/brain_counts', + overwrite = TRUE) # Now that we have the matrix on disk, we can load it brain.mat <- open_matrix_dir(dir = "/brahms/hartmana/vignette_data/bpcells/brain_counts") brain.mat <- Azimuth:::ConvertEnsembleToSymbol(mat = brain.mat, species = "mouse") @@ -107,7 +111,10 @@ If you save your object and load it in in the future, Seurat will access the on- This also makes it easy to share your Seurat objects with BPCells matrices by sharing a folder that contains both the object and the BPCells directory. ```{r} -saveRDS(brain, file = "obj.Rds", destdir = "/brahms/hartmana/vignette_data/bpcells/brain_object") +saveRDS( + object = brain, + file = "obj.Rds", + destdir = "/brahms/hartmana/vignette_data/bpcells/brain_object") ``` @@ -134,8 +141,11 @@ metadata.list <- c() for (i in 1:length(files.set)) { path <- paste0(file.dir, files.set[i]) data <- open_matrix_anndata_hdf5(path) - write_matrix_dir(mat = data, dir = paste0(gsub(".h5ad", "", path), - "_BP"), overwrite = TRUE) + write_matrix_dir( + mat = data, + dir = paste0(gsub(".h5ad", "", path), "_BP"), + overwrite = TRUE + ) # Load in BP matrices mat <- open_matrix_dir(dir = paste0(gsub(".h5ad", "", path), "_BP")) mat <- Azimuth:::ConvertEnsembleToSymbol(mat = mat, species = "human") @@ -161,12 +171,15 @@ When we create the Seurat object with the list of matrices from each publication ```{r} options(Seurat.object.assay.version = "v5") -merged.object <- CreateSeuratObject(counts = data.list, meta.data = metadata) +merged.object <- CreateSeuratObject(counts = data.list, meta.data = metadata) merged.object ``` ```{r save_merged, eval=FALSE} -saveRDS(merged.object, file = "obj.Rds", destdir = "/brahms/hartmana/vignette_data/bpcells/merged_object") +saveRDS( + object = merged.object, + file = "obj.Rds", + destdir = "/brahms/hartmana/vignette_data/bpcells/merged_object") ``` ## Parse Biosciences @@ -174,7 +187,8 @@ saveRDS(merged.object, file = "obj.Rds", destdir = "/brahms/hartmana/vignette_da Here, we show how to load a 1 million cell data set from Parse Biosciences and create a Seurat Object. The data is available for download [here](https://support.parsebiosciences.com/hc/en-us/articles/7704577188500-How-to-analyze-a-1-million-cell-data-set-using-Scanpy-and-Harmony) ```{r} -parse.data <- open_matrix_anndata_hdf5("/brahms/hartmana/vignette_data/h5ad_files/ParseBio_PBMC.h5ad") +parse.data <- open_matrix_anndata_hdf5( + "/brahms/hartmana/vignette_data/h5ad_files/ParseBio_PBMC.h5ad") ``` ```{r, eval=FALSE} @@ -183,15 +197,16 @@ write_matrix_dir(mat = parse.data, dir = "/brahms/hartmana/vignette_data/bpcells ```{r} parse.mat <- open_matrix_dir(dir = "/brahms/hartmana/vignette_data/bpcells/parse_1m_pbmc") - metadata <- readRDS("/brahms/hartmana/vignette_data/ParseBio_PBMC_meta.rds") metadata$disease <- sapply(strsplit(x = metadata$sample, split = "_"), "[", 1) - parse.object <- CreateSeuratObject(counts = parse.mat, meta.data = metadata) ``` ```{r save_parse, eval=FALSE} -saveRDS(parse.object, file = "obj.Rds", destdir = "/brahms/hartmana/vignette_data/bpcells/parse_object") +saveRDS( + object = parse.object, + file = "obj.Rds", + destdir = "/brahms/hartmana/vignette_data/bpcells/parse_object") ``` diff --git a/vignettes/seurat5_integration.Rmd b/vignettes/seurat5_integration.Rmd index 9f7027d90..74a090485 100644 --- a/vignettes/seurat5_integration.Rmd +++ b/vignettes/seurat5_integration.Rmd @@ -21,8 +21,7 @@ knitr::knit_hooks$set(time_it = local({ } })) knitr::opts_chunk$set( - tidy = TRUE, - tidy.opts = list(width.cutoff = 95), + tidy = 'styler', fig.width = 10, message = FALSE, warning = FALSE, @@ -97,24 +96,38 @@ Note that you can find more detail on each method, and any installation prerequi Each of the following lines perform a new integration using a single line of code: ```{r integratelayerscca} -obj <- IntegrateLayers(object = obj, method = CCAIntegration, orig.reduction = "pca", new.reduction = 'integrated.cca', verbose = FALSE) +obj <- IntegrateLayers( + object = obj, method = CCAIntegration, + orig.reduction = "pca", new.reduction = 'integrated.cca', + verbose = FALSE) ``` ```{r integratelayersrpca} -obj <- IntegrateLayers(object = obj, method = RPCAIntegration, orig.reduction = "pca", new.reduction = 'integrated.rpca', verbose = FALSE) +obj <- IntegrateLayers( + object = obj, method = RPCAIntegration, + orig.reduction = "pca", new.reduction = 'integrated.rpca', + verbose = FALSE) ``` ```{r integratelayersharmony} -obj <- IntegrateLayers(object = obj, method = HarmonyIntegration, orig.reduction = "pca", new.reduction = 'harmony', verbose = FALSE) +obj <- IntegrateLayers( + object = obj, method = HarmonyIntegration, + orig.reduction = "pca", new.reduction = 'harmony', + verbose = FALSE) ``` ```{r integratelayersfastmnn} -obj <- IntegrateLayers(object = obj, method = FastMNNIntegration, new.reduction = 'integrated.mnn', verbose = FALSE) +obj <- IntegrateLayers( + object = obj, method = FastMNNIntegration, + new.reduction = 'integrated.mnn', + verbose = FALSE) ``` ```{r integratelayersscvi, eval=FALSE} -obj <- IntegrateLayers(object = obj, method = scVIIntegration, new.reduction = 'integrated.scvi', - conda_env = '../miniconda3/envs/scvi-env', verbose = FALSE) +obj <- IntegrateLayers( + object = obj, method = scVIIntegration, + new.reduction = 'integrated.scvi', + conda_env = '../miniconda3/envs/scvi-env', verbose = FALSE) ``` ```{r addscvi, include=FALSE} @@ -129,22 +142,34 @@ For any of the methods, we can now visualize and cluster the datasets. We show t obj <- FindNeighbors(obj, reduction = 'integrated.cca', dims = 1:30) obj <- FindClusters(obj,resolution = 2, cluster.name = 'cca_clusters') obj <- RunUMAP(obj, reduction = "integrated.cca", dims = 1:30, reduction.name = 'umap.cca') -p1 <- DimPlot(obj, reduction="umap.cca", group.by=c("Method", "predicted.celltype.l2", "cca_clusters"), combine=FALSE) +p1 <- DimPlot( + obj, reduction = "umap.cca", + group.by = c("Method", "predicted.celltype.l2", "cca_clusters"), + combine = FALSE) obj <- FindNeighbors(obj, reduction = 'integrated.scvi', dims = 1:10) obj <- FindClusters(obj,resolution = 2, cluster.name = 'scvi_clusters') obj <- RunUMAP(obj, reduction = "integrated.scvi", dims = 1:10, reduction.name = 'umap.scvi') -p2 <- DimPlot(obj, reduction="umap.scvi", group.by=c("Method", "predicted.celltype.l2", "scvi_clusters"), combine = FALSE) +p2 <- DimPlot( + obj, reduction = "umap.scvi", + group.by = c("Method", "predicted.celltype.l2", "scvi_clusters"), + combine = FALSE) -wrap_plots(c(p1, p2), ncol=2) +wrap_plots(c(p1, p2), ncol = 2) ``` We hope that by simplifying the process of performing integrative analysis, users can more carefully evaluate the biological information retained in the integrated dataset. For example, users can compare the expression of biological markers based on different clustering solutions, or visualize one method's clustering solution on different UMAP visualizations. ```{r vlnplots, fig.height=5, fig.width=16, warning=FALSE} -p1 <- VlnPlot(obj, features = "rna_CD8A", group.by = 'unintegrated_clusters') + NoLegend() + ggtitle("CD8A - Unintegrated Clusters") -p2 <- VlnPlot(obj, "rna_CD8A", group.by = 'cca_clusters') + NoLegend() + ggtitle("CD8A - CCA Clusters") -p3 <- VlnPlot(obj, "rna_CD8A", group.by = 'scvi_clusters') + NoLegend() + ggtitle("CD8A - scVI Clusters") +p1 <- VlnPlot( + obj, features = "rna_CD8A", group.by = 'unintegrated_clusters' +) + NoLegend() + ggtitle("CD8A - Unintegrated Clusters") +p2 <- VlnPlot( + obj, "rna_CD8A", group.by = 'cca_clusters' +) + NoLegend() + ggtitle("CD8A - CCA Clusters") +p3 <- VlnPlot( + obj, "rna_CD8A", group.by = 'scvi_clusters' +) + NoLegend() + ggtitle("CD8A - scVI Clusters") p1 | p2 | p3 ``` @@ -162,3 +187,10 @@ Once integrative analysis is complete, you can rejoin the layers - which collaps obj <- JoinLayers(obj) obj ``` + +
    + **Session Info** +```{r} +sessionInfo() +``` +
    diff --git a/vignettes/seurat5_integration_bridge.Rmd b/vignettes/seurat5_integration_bridge.Rmd index 1b4eb2b98..941b39b21 100644 --- a/vignettes/seurat5_integration_bridge.Rmd +++ b/vignettes/seurat5_integration_bridge.Rmd @@ -22,7 +22,7 @@ knitr::knit_hooks$set(time_it = local({ } })) knitr::opts_chunk$set( - tidy = TRUE, + tidy = "styler", tidy.opts = list(width.cutoff = 95), warning = FALSE, error = TRUE, @@ -163,7 +163,9 @@ obj.rna <- readRDS("/brahms/haoy/seurat4_pbmc/pbmc_multimodal_2023.rds") As an alternative to using a pre-built reference, you can also use your own reference. To demonstrate, you can download a scRNA-seq dataset of 23,837 human PBMC [here](https://www.dropbox.com/s/x8mu9ye2w3a63hf/20k_PBMC_scRNA.rds?dl=0), which we have already annotated. ```{r, message=FALSE, warning=FALSE, eval=FALSE} obj.rna = readRDS("/path/to/reference.rds") -obj.rna = SCTransform(object = obj.rna) %>% RunPCA() %>% RunUMAP(dims = 1:50, return.model = TRUE) +obj.rna = SCTransform(object = obj.rna) %>% + RunPCA() %>% + RunUMAP(dims = 1:50, return.model = TRUE) ``` When using your own reference, set `reference.reduction = "pca"` in the `PrepareBridgeReference` function. @@ -178,7 +180,7 @@ Prior to performing bridge integration, we normalize and pre-process each of the ```{r, message=FALSE, warning=FALSE} # normalize multiome RNA DefaultAssay(obj.multi) <- "RNA" -obj.multi <- SCTransform(obj.multi, verbose = FALSE) +obj.multi <- SCTransform(obj.multi, verbose = FALSE) # normalize multiome ATAC DefaultAssay(obj.multi) <- "ATAC" obj.multi <- RunTFIDF(obj.multi) @@ -198,22 +200,18 @@ dims.atac <- 2:50 dims.rna <- 1:50 DefaultAssay(obj.multi) <- "RNA" DefaultAssay(obj.rna) <- "SCT" -obj.rna.ext <- PrepareBridgeReference(reference = obj.rna, - bridge = obj.multi, - reference.reduction = "spca", - reference.dims = dims.rna, - normalization.method = "SCT" -) +obj.rna.ext <- PrepareBridgeReference( + reference = obj.rna, bridge = obj.multi, + reference.reduction = "spca", reference.dims = dims.rna, + normalization.method = "SCT") ``` Now, we can directly find anchors between the extended reference and query objects. We use the `FindBridgeTransferAnchors` function, which translates the query dataset using the same dictionary as was used to translate the reference, and then identifies anchors in this space. The function is meant to mimic our `FindTransferAnchors` function, but to identify correspondences across modalities. ```{r, message=FALSE, warning=FALSE} -bridge.anchor <- FindBridgeTransferAnchors(extended.reference = obj.rna.ext, - query = obj.atac, - reduction = "lsiproject", - dims = dims.atac -) +bridge.anchor <- FindBridgeTransferAnchors( + extended.reference = obj.rna.ext, query = obj.atac, + reduction = "lsiproject", dims = dims.atac) ``` @@ -221,21 +219,23 @@ Once we have identified anchors, we can map the query dataset onto the reference ```{r, message=FALSE, warning=FALSE} -obj.atac <- MapQuery(anchorset = bridge.anchor, - reference = obj.rna.ext, - query = obj.atac, - refdata = list( - l1 = "celltype.l1", - l2 = "celltype.l2", - l3 = "celltype.l3"), - reduction.model = "wnn.umap" -) +obj.atac <- MapQuery( + anchorset = bridge.anchor, reference = obj.rna.ext, + query = obj.atac, + refdata = list( + l1 = "celltype.l1", + l2 = "celltype.l2", + l3 = "celltype.l3"), + reduction.model = "wnn.umap") ``` Now we can visualize the results, plotting the scATAC-seq cells based on their predicted annotations, on the reference UMAP embedding. You can see that each scATAC-seq cell has been assigned a cell name based on the scRNA-seq defined cell ontology. ```{r, message=FALSE, warning=FALSE} -DimPlot(obj.atac, group.by = "predicted.l2", reduction = "ref.umap", label = TRUE) + ggtitle("ATAC") + NoLegend() +DimPlot( + obj.atac, group.by = "predicted.l2", + reduction = "ref.umap", label = TRUE +) + ggtitle("ATAC") + NoLegend() ``` ## Assessing the mapping @@ -251,16 +251,28 @@ obj.atac <- RunUMAP(obj.atac, reduction = "lsi", dims = 2:50) Now, we visualize the predicted cluster labels on the unsupervised UMAP emebdding. We can see that predicted cluster labels (from the scRNA-seq reference) are concordant with the structure of the scATAC-seq data. However, there are some cell types (i.e. Treg), that do not appear to separate in unsupervised analysis. These may be prediction errors, or cases where the reference mapping provides additional resolution. ```{r, pbmcdimplots, message=FALSE, warning=FALSE} -DimPlot(obj.atac, group.by = "predicted.l2", reduction = "umap", label = FALSE) +DimPlot(obj.atac, group.by = "predicted.l2", reduction = "umap", label = FALSE) ``` Lastly, we validate the predicted cell types for the scATAC-seq data by examining their chromatin accessibility profiles at canonical loci. We use the `CoveragePlot` function to visualize accessibility patterns at the CD8A, FOXP3, and RORC, after grouping cells by their predicted labels. We see expected patterns in each case. For example, the PAX5 locus exhibits peaks that are accessible exclusively in B cells, and the CD8A locus shows the same in CD8 T cell subsets. Similarly, the accessibility of FOXP3, a canonical marker of regulatory T cells (Tregs), in predicted Tregs provides strong support for the accuracy of our prediction. ```{r, message=FALSE, warning=FALSE} -CoveragePlot(obj.atac, region = "PAX5", group.by = "predicted.l1", idents = c("B", "CD4 T", "Mono", "NK"), window = 200, extend.upstream = -150000) -CoveragePlot(obj.atac, region = "CD8A", group.by = "predicted.l2", idents = c("CD8 Naive", "CD4 Naive", "CD4 TCM", "CD8 TCM"), extend.downstream = 5000, extend.upstream = 5000) -CoveragePlot(obj.atac, region = "FOXP3", group.by = "predicted.l2", idents = c( "CD4 Naive", "CD4 TCM", "CD4 TEM", "Treg"), extend.downstream = 0, extend.upstream = 0) -CoveragePlot(obj.atac, region = "RORC", group.by = "predicted.l2", idents = c("CD8 Naive", "CD8 TEM", "CD8 TCM", "MAIT"), extend.downstream = 5000, extend.upstream = 5000) +CoveragePlot( + obj.atac, region = "PAX5", group.by = "predicted.l1", + idents = c("B", "CD4 T", "Mono", "NK"), window = 200, + extend.upstream = -150000) +CoveragePlot( + obj.atac, region = "CD8A", group.by = "predicted.l2", + idents = c("CD8 Naive", "CD4 Naive", "CD4 TCM", "CD8 TCM"), + extend.downstream = 5000, extend.upstream = 5000) +CoveragePlot( + obj.atac, region = "FOXP3", group.by = "predicted.l2", + idents = c( "CD4 Naive", "CD4 TCM", "CD4 TEM", "Treg"), + extend.downstream = 0, extend.upstream = 0) +CoveragePlot( + obj.atac, region = "RORC", group.by = "predicted.l2", + idents = c("CD8 Naive", "CD8 TEM", "CD8 TCM", "MAIT"), + extend.downstream = 5000, extend.upstream = 5000) ```
    diff --git a/vignettes/seurat5_sketch_analysis.Rmd b/vignettes/seurat5_sketch_analysis.Rmd index 1e5da9245..c96f24570 100644 --- a/vignettes/seurat5_sketch_analysis.Rmd +++ b/vignettes/seurat5_sketch_analysis.Rmd @@ -22,8 +22,7 @@ knitr::knit_hooks$set(time_it = local({ } })) knitr::opts_chunk$set( - tidy = TRUE, - tidy.opts = list(width.cutoff = 95), + tidy = 'styler', message = FALSE, warning = FALSE, fig.width = 10, @@ -76,12 +75,16 @@ format(object.size(obj), units = 'Mb') ## 'Sketch' a subset of cells, and load these into memory We select a subset ('sketch') of 50,000 cells (out of 1.3M). Rather than sampling all cells with uniform probability, we compute and sample based off a 'leverage score' for each cell, which reflects the magnitude of its contribution to the gene-covariance matrix, and its importance to the overall dataset. In [Hao et al, 2022](https://www.biorxiv.org/content/10.1101/2022.02.24.481684v1.full), we demonstrate that the leverage score is highest for rare populations in a dataset. Therefore, our sketched set of 50,000 cells will oversample rare populations, retaining the biological complexity of the sample while drastically compressing the dataset. -The function `SketchData` takes a normalized single-cell dataset (stored either on-disk or in-memory), and a set of variable features. It returns a Seurat object with a new assay (`sketch`), consisting of 50,000 cells, but these cells are now stored in-memory. Users can now easily switch between the in-memory and on-desk representation just by changing the default assay. +The function `SketchData` takes a normalized single-cell dataset (stored either on-disk or in-memory), and a set of variable features. It returns a Seurat object with a new assay (`sketch`), consisting of 50,000 cells, but these cells are now stored in-memory. Users can now easily switch between the in-memory and on-disk representation just by changing the default assay. ```{r, warning=FALSE, message=FALSE} obj <- NormalizeData(obj) obj <- FindVariableFeatures(obj) -obj <- SketchData(object = obj, ncells = 50000, method = 'LeverageScore', sketched.assay = 'sketch') +obj <- SketchData( + object = obj, + ncells = 50000, + method = 'LeverageScore', + sketched.assay = 'sketch') obj # switch to analyzing the full dataset (on-disk) DefaultAssay(obj) <- 'RNA' @@ -106,7 +109,12 @@ DimPlot(obj, label = T, label.size = 3, reduction = 'umap') + NoLegend() ``` ```{r,fig.height = 7, fig.width = 10} -FeaturePlot(obj, c('Igfbp7', 'Neurod6', 'Dlx2','Gad2', 'Eomes', 'Vim', 'Reln', 'Olig1', 'C1qa'), ncol = 3) +FeaturePlot( + object = obj, + features = c( + 'Igfbp7', 'Neurod6', 'Dlx2', 'Gad2', + 'Eomes', 'Vim', 'Reln', 'Olig1', 'C1qa'), + ncol = 3) ``` ## Extend results to the full datasets @@ -117,19 +125,20 @@ We can now extend the cluster labels and dimensional reductions learned on the s * Cluster labels: The `cluster_full` column in the object metadata now labels all cells in the dataset with one of the cluster labels derived from the sketched cells ```{r, warning=FALSE, message=FALSE} -obj <- ProjectData(object = obj, - assay = 'RNA', - full.reduction = 'pca.full', - sketched.assay = 'sketch', - sketched.reduction = 'pca', - umap.model = 'umap', - dims = 1:50, - refdata = list(cluster_full = 'seurat_clusters')) +obj <- ProjectData( + object = obj, + assay = 'RNA', + full.reduction = 'pca.full', + sketched.assay = 'sketch', + sketched.reduction = 'pca', + umap.model = 'umap', + dims = 1:50, + refdata = list(cluster_full = 'seurat_clusters')) # now that we have projected the full dataset, switch back to analyzing all cells DefaultAssay(obj) <- 'RNA' ``` -```{r save.img, include = FALSE, eval=TRUE} +```{r save.img, include = FALSE, eval = FALSE} p <- DimPlot(obj, label = T, label.size = 3, reduction = "ref.umap", group.by = "cluster_full", alpha = 0.1) + NoLegend() ggsave(filename = "../output/images/MouseBrain_sketch_clustering.jpg", height = 7, width = 7, plot = p, quality = 50) ``` @@ -184,7 +193,12 @@ Note that we can start to see distinct interneuron lineages emerging in this dat These results closely mirror our findings from [Mayer*, Hafemeister*, Bandler* et al, Nature 2018](https://www.nature.com/articles/nature25999), where we enriched for interneuron precursors using a Dlx6a-cre fate-mapping strategy. Here, we obtain similar results using only computational enrichment, enabled by the large size of the original dataset. ```{r,fig.height = 7, fig.width = 10} -FeaturePlot(obj.sub, c('Dlx2', 'Gad2', 'Lhx6', 'Nr2f2', 'Sst', 'Mef2c', 'Meis2', 'Id2', 'Dlx6os1'), ncol = 3) +FeaturePlot( + object = obj.sub, + features = c( + 'Dlx2', 'Gad2', 'Lhx6', 'Nr2f2', 'Sst', + 'Mef2c', 'Meis2', 'Id2', 'Dlx6os1'), + ncol = 3) ``` ```{r save.times, include=FALSE, eval=FALSE} diff --git a/vignettes/spatial_vignette_2.Rmd b/vignettes/spatial_vignette_2.Rmd index 8613dfe9d..be7da9654 100644 --- a/vignettes/spatial_vignette_2.Rmd +++ b/vignettes/spatial_vignette_2.Rmd @@ -558,7 +558,7 @@ sessionInfo() ```
    -```{r save.times, include=TRUE} +```{r save.times, include=FALSE} write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/spatial_vignette_2.csv") ``` diff --git a/vignettes/vignettes_v5.yaml b/vignettes/vignettes_v5.yaml index bd8656767..11fdf2be7 100644 --- a/vignettes/vignettes_v5.yaml +++ b/vignettes/vignettes_v5.yaml @@ -48,7 +48,7 @@ - category: References and additional documentation vignettes: - - title: Seurat 5 Essential Commands + - title: Seurat v5 Essential Commands name: seurat5_essential_commands summary: | Explore the new assay structure introduced in Seurat v5. From 5047275074f37a522aad1bd1ba4b1b7fa5ddb1a4 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Tue, 28 Mar 2023 17:05:41 -0400 Subject: [PATCH 604/979] update citations --- vignettes/COVID_SCTMapping.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vignettes/COVID_SCTMapping.Rmd b/vignettes/COVID_SCTMapping.Rmd index 1a2bedeb6..b9f9299ba 100755 --- a/vignettes/COVID_SCTMapping.Rmd +++ b/vignettes/COVID_SCTMapping.Rmd @@ -46,12 +46,12 @@ options(future.globals.maxSize = 1e9) In Seurat v5, we introduce a scalable approach for reference mapping datasets from separate studies or individuals. Reference mapping is a powerful approach to identify consistent labels across studies and perform cross-dataset analysis. We emphasize that while individual datasets are manageable in size, the aggregate of many datasets often amounts to millions of cell which do not fit in-memory. Importantly, we never simultaneously load all of the cells in-memory to maintain low memory usage. Cross-dataset analysis is often challenged by disparate or unique cell type labels. Thus, reference mapping annotates cells using a common reference for consistent cell type labels. -In this vignette, we reference map three publicly available datasets totaling 1,498,064 cells and 277 donors which are available through [CZI cellxgene collections](https://cellxgene.cziscience.com/collections):[Ahern et al. (2022) Nature](https://cellxgene.cziscience.com/collections/8f126edf-5405-4731-8374-b5ce11f53e82), [Jin et al. (2021) Science](https://cellxgene.cziscience.com/collections/b9fc3d70-5a72-4479-a046-c2cc1ab19efc), and [Yoshida et al. (2022) Nature](https://cellxgene.cziscience.com/collections/03f821b4-87be-4ff4-b65a-b5fc00061da7). Each dataset consists of PBMCs from healthy and COVID-19 donors. Using the harmonized annotations, we demonstrate how to perform differential expression across disease within cell types using pseudobulk expression. +In this vignette, we reference map three publicly available datasets totaling 1,498,064 cells and 277 donors which are available through [CZI cellxgene collections](https://cellxgene.cziscience.com/collections): [Ahern, et al., Nature 2022](https://cellxgene.cziscience.com/collections/8f126edf-5405-4731-8374-b5ce11f53e82), [Jin, et al., Science 2021](https://cellxgene.cziscience.com/collections/b9fc3d70-5a72-4479-a046-c2cc1ab19efc), and [Yoshida, et al., Nature 2022](https://cellxgene.cziscience.com/collections/03f821b4-87be-4ff4-b65a-b5fc00061da7). Each dataset consists of PBMCs from healthy and COVID-19 donors. Using the harmonized annotations, we demonstrate how to perform differential expression across disease within cell types using pseudobulk expression. Prior to running this vignette, please [install Seurat v5](articles/install.html) and see the [BPCells vignette](articles/seurat5_bpcells_interaction_vignette.html) to construct the on-disk object used in this vignette. Additionally, we map to our annotated CITE-seq reference containing 162,000 cells and 228 antibodies ([Hao*, Hao*, et al., Cell 2021](https://doi.org/10.1016/j.cell.2021.04.048)) which is available for download [here](https://zenodo.org/record/7779017#.ZCMojezMJqs). ## Load the PBMC Reference Dataset and Query Datasets -We first load the reference (available [here](https://zenodo.org/record/7779017#.ZCMojezMJqs)) and normalize the query Seurat object prepared in the [BPCells interaction vignette](articles/seurat5_bpcells_interaction_vignette.html). The query object is generated for datasets from three different studies using the `CreateSeuratObject` function, which accepts a list of BPCells matrices as input. As a result, the three datasets reside in three separate `layers` stored on-disk. +We first load the reference (available [here](https://zenodo.org/record/7779017#.ZCMojezMJqs)) and normalize the query Seurat object prepared in the [BPCells interaction vignette](articles/seurat5_bpcells_interaction_vignette.html). The query object consists of datasets from three different studies constructed using the `CreateSeuratObject` function, which accepts a list of BPCells matrices as input. Within the Seurat object, the three datasets reside the `RNA` assay in three separate `layers` on-disk. ```{r load.data} reference <- readRDS("/brahms/hartmana/vignette_data/pbmc_multimodal_2023.rds") From 364ac6e4c963ce593937ae05808f8e38a524b5fd Mon Sep 17 00:00:00 2001 From: Gesmira Date: Tue, 28 Mar 2023 18:07:12 -0400 Subject: [PATCH 605/979] edits to covid vignette --- vignettes/COVID_SCTMapping.Rmd | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/vignettes/COVID_SCTMapping.Rmd b/vignettes/COVID_SCTMapping.Rmd index b9f9299ba..3cbf824bc 100755 --- a/vignettes/COVID_SCTMapping.Rmd +++ b/vignettes/COVID_SCTMapping.Rmd @@ -44,14 +44,15 @@ options(future.globals.maxSize = 1e9) ## Introduction: Reference mapping analysis in Seurat v5 -In Seurat v5, we introduce a scalable approach for reference mapping datasets from separate studies or individuals. Reference mapping is a powerful approach to identify consistent labels across studies and perform cross-dataset analysis. We emphasize that while individual datasets are manageable in size, the aggregate of many datasets often amounts to millions of cell which do not fit in-memory. Importantly, we never simultaneously load all of the cells in-memory to maintain low memory usage. Cross-dataset analysis is often challenged by disparate or unique cell type labels. Thus, reference mapping annotates cells using a common reference for consistent cell type labels. +In Seurat v5, we introduce a scalable approach for reference mapping datasets from separate studies or individuals. Reference mapping is a powerful approach to identify consistent labels across studies and perform cross-dataset analysis. We emphasize that while individual datasets are manageable in size, the aggregate of many datasets often amounts to millions of cell which do not fit in-memory. Importantly, we never simultaneously load all of the cells in-memory to maintain low memory usage. Furthermore, cross-dataset analysis is often challenged by disparate or unique cell type labels. Through reference mapping, we annotate all cells with a common reference for consistent cell type labels. -In this vignette, we reference map three publicly available datasets totaling 1,498,064 cells and 277 donors which are available through [CZI cellxgene collections](https://cellxgene.cziscience.com/collections): [Ahern, et al., Nature 2022](https://cellxgene.cziscience.com/collections/8f126edf-5405-4731-8374-b5ce11f53e82), [Jin, et al., Science 2021](https://cellxgene.cziscience.com/collections/b9fc3d70-5a72-4479-a046-c2cc1ab19efc), and [Yoshida, et al., Nature 2022](https://cellxgene.cziscience.com/collections/03f821b4-87be-4ff4-b65a-b5fc00061da7). Each dataset consists of PBMCs from healthy and COVID-19 donors. Using the harmonized annotations, we demonstrate how to perform differential expression across disease within cell types using pseudobulk expression. +In this vignette, we reference map three publicly available datasets totaling 1,498,064 cells and 277 donors which are available through [CZI cellxgene collections](https://cellxgene.cziscience.com/collections): [Ahern, et al., Nature 2022](https://cellxgene.cziscience.com/collections/8f126edf-5405-4731-8374-b5ce11f53e82), [Jin, et al., Science 2021](https://cellxgene.cziscience.com/collections/b9fc3d70-5a72-4479-a046-c2cc1ab19efc), and [Yoshida, et al., Nature 2022](https://cellxgene.cziscience.com/collections/03f821b4-87be-4ff4-b65a-b5fc00061da7). Each dataset consists of PBMCs from both healthy donors and donors diagnosed with COVID-19. Using the harmonized annotations, we demonstrate how to prepare a pseudobulk object to perform differential expression analysis across disease within cell types. Prior to running this vignette, please [install Seurat v5](articles/install.html) and see the [BPCells vignette](articles/seurat5_bpcells_interaction_vignette.html) to construct the on-disk object used in this vignette. Additionally, we map to our annotated CITE-seq reference containing 162,000 cells and 228 antibodies ([Hao*, Hao*, et al., Cell 2021](https://doi.org/10.1016/j.cell.2021.04.048)) which is available for download [here](https://zenodo.org/record/7779017#.ZCMojezMJqs). ## Load the PBMC Reference Dataset and Query Datasets -We first load the reference (available [here](https://zenodo.org/record/7779017#.ZCMojezMJqs)) and normalize the query Seurat object prepared in the [BPCells interaction vignette](articles/seurat5_bpcells_interaction_vignette.html). The query object consists of datasets from three different studies constructed using the `CreateSeuratObject` function, which accepts a list of BPCells matrices as input. Within the Seurat object, the three datasets reside the `RNA` assay in three separate `layers` on-disk. +We first load the reference (available [here](https://zenodo.org/record/7779017#.ZCMojezMJqs)) and normalize the query Seurat object prepared in the [BPCells interaction vignette](articles/seurat5_bpcells_interaction_vignette.html). The query object consists of datasets from three different studies constructed using the `CreateSeuratObject` function, which accepts a list of BPCells matrices as input. Within the Seurat object, the three datasets reside in the `RNA` assay in three separate `layers` on-disk. + ```{r load.data} reference <- readRDS("/brahms/hartmana/vignette_data/pbmc_multimodal_2023.rds") @@ -60,7 +61,7 @@ object <- NormalizeData(object, verbose = FALSE) ``` ## Mapping -Using the same code from the [v4 reference mapping vignette](articles/multimodal_reference_mapping.html), we find anchors between reference and query in the precomputed supervised PCA. We recommend the use of supervised PCA for CITE-seq reference datasets, and demonstrate how to compute this transformation in [v4 reference mapping vignette](articles/multimodal_reference_mapping.html). In Seurat v5, all three datasets are mapped by calling `FindTransferAnchors` and `MapQuery` once. +Using the same code from the [v4 reference mapping vignette](articles/multimodal_reference_mapping.html), we find anchors between the reference and query in the precomputed supervised PCA. We recommend the use of supervised PCA for CITE-seq reference datasets, and demonstrate how to compute this transformation in [v4 reference mapping vignette](articles/multimodal_reference_mapping.html). In Seurat v5, we only need to call `FindTransferAnchors` and `MapQuery` once to map all three datasets as they are all contained within the query object. Furthermore, utilizing the on-disk capabilities of BPCells(https://github.com/bnprks/BPCells) , we map 1.5 million cells without ever loading them all into memory. ```{r} anchor <- FindTransferAnchors( @@ -82,7 +83,7 @@ object <- MapQuery( ``` ## Explore the mapping results -Next, we visualize all cells from the three studies which have been projected into a UMAP-space defined by the reference. Each cell is annotated at two levels of granularity (`predicted.celltype.l1` and `predicted.celltype.l2`). The differing ontologies used in the original annotations (`cell_type`) prevent us from directly performing integrative analysis across studies. +Next, we visualize all cells from the three studies which have been projected into a UMAP-space defined by the reference. Each cell is annotated at two levels of granularity (`predicted.celltype.l1` and `predicted.celltype.l2`). We can compare the the differing ontologies used in the original annotations (`cell_type`) to a level of the now harmonized annotations (`predicted.celltype.l2`) that were predicted from reference-mapping. Previously, the lack of standardization prevented us from directly performing integrative analysis across studies, but now we can easily compare. ```{r, fig.width=10, fig.height=6} DimPlot(object, reduction = 'ref.umap', group.by = 'cell_type',alpha = 0.1, label = TRUE, split.by = 'publication', ncol = 3, label.size = 3) + NoLegend() @@ -92,7 +93,7 @@ DimPlot(object, reduction = 'ref.umap', group.by = 'predicted.celltype.l2',alpha ``` ## Differential composition analysis -We utilize our annotations to identify differences in the proportion of different cell types between healthy individuals and COVID-19 patients. For example, we noticed a reduction in MAIT cells as well as an increase in plasmablasts among COVID-19 patients. +We utilize our harmonized annotations to identify differences in the proportion of different cell types between healthy individuals and COVID-19 patients. For example, we noticed a reduction in MAIT cells as well as an increase in plasmablasts among COVID-19 patients. ```{r} df_comp <- as.data.frame.matrix(table(object$donor_id, object$predicted.celltype.l2)) From 27d3c306f6ef0c2bc180dfbc52988835b2d001a9 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Tue, 28 Mar 2023 18:11:53 -0400 Subject: [PATCH 606/979] small edits to covid vignette --- vignettes/COVID_SCTMapping.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/COVID_SCTMapping.Rmd b/vignettes/COVID_SCTMapping.Rmd index 3cbf824bc..dde5bd326 100755 --- a/vignettes/COVID_SCTMapping.Rmd +++ b/vignettes/COVID_SCTMapping.Rmd @@ -83,7 +83,7 @@ object <- MapQuery( ``` ## Explore the mapping results -Next, we visualize all cells from the three studies which have been projected into a UMAP-space defined by the reference. Each cell is annotated at two levels of granularity (`predicted.celltype.l1` and `predicted.celltype.l2`). We can compare the the differing ontologies used in the original annotations (`cell_type`) to a level of the now harmonized annotations (`predicted.celltype.l2`) that were predicted from reference-mapping. Previously, the lack of standardization prevented us from directly performing integrative analysis across studies, but now we can easily compare. +Next, we visualize all cells from the three studies which have been projected into a UMAP-space defined by the reference. Each cell is annotated at two levels of granularity (`predicted.celltype.l1` and `predicted.celltype.l2`). We can compare the differing ontologies used in the original annotations (`cell_type`) to the now harmonized annotations (`predicted.celltype.l2`, for example) that were predicted from reference-mapping. Previously, the lack of standardization prevented us from directly performing integrative analysis across studies, but now we can easily compare. ```{r, fig.width=10, fig.height=6} DimPlot(object, reduction = 'ref.umap', group.by = 'cell_type',alpha = 0.1, label = TRUE, split.by = 'publication', ncol = 3, label.size = 3) + NoLegend() From ec4f7f11e3caf3dfc1789dc8f376e0644823afc0 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 28 Mar 2023 20:21:30 -0400 Subject: [PATCH 607/979] bump version and add author --- DESCRIPTION | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9a3677c27..b5a990923 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9039 -Date: 2023-03-18 +Version: 4.9.9.9040 +Date: 2023-03-28 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( @@ -14,6 +14,7 @@ Authors@R: c( person(given = "Paul", family = "Hoffman", email = "seurat@nygenome.org", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-7693-8957")), person(given = "Jaison", family = "Jain", email = "jjain@nygenome.org", role = "ctb", comment = c(ORCID = "0000-0002-9478-5018")), person(given = "Madeline", family = "Kowalski", email = "mkowalski@nygenome.org", role = "ctb", comment = c(ORCID = "0000-0002-5655-7620")), + person(given = "Gesmira", family = "Molla", email = 'gmolla@nygenome.org', role = 'ctb', comment = c(ORCID = '0000-0002-8628-5056')), person(given = "Efthymia", family = "Papalexi", email = "epapalexi@nygenome.org", role = "ctb", comment = c(ORCID = "0000-0001-5898-694X")), person(given = "Patrick", family = "Roelli", email = "proelli@nygenome.org", role = "ctb"), person(given = "Rahul", family = "Satija", email = "rsatija@nygenome.org", role = "ctb", comment = c(ORCID = "0000-0001-9448-8833")), From cba15a8337a3847bc8101a67f42c982fe814032b Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Tue, 28 Mar 2023 20:25:00 -0400 Subject: [PATCH 608/979] update rmd --- vignettes/COVID_SCTMapping.Rmd | 6 +-- vignettes/ParseBio_sketch_integration.Rmd | 10 ++-- vignettes/seurat5_essential_commands.Rmd | 58 ++++++++++------------- vignettes/seurat5_sketch_analysis.Rmd | 4 +- 4 files changed, 35 insertions(+), 43 deletions(-) diff --git a/vignettes/COVID_SCTMapping.Rmd b/vignettes/COVID_SCTMapping.Rmd index dde5bd326..197720366 100755 --- a/vignettes/COVID_SCTMapping.Rmd +++ b/vignettes/COVID_SCTMapping.Rmd @@ -48,10 +48,10 @@ In Seurat v5, we introduce a scalable approach for reference mapping datasets fr In this vignette, we reference map three publicly available datasets totaling 1,498,064 cells and 277 donors which are available through [CZI cellxgene collections](https://cellxgene.cziscience.com/collections): [Ahern, et al., Nature 2022](https://cellxgene.cziscience.com/collections/8f126edf-5405-4731-8374-b5ce11f53e82), [Jin, et al., Science 2021](https://cellxgene.cziscience.com/collections/b9fc3d70-5a72-4479-a046-c2cc1ab19efc), and [Yoshida, et al., Nature 2022](https://cellxgene.cziscience.com/collections/03f821b4-87be-4ff4-b65a-b5fc00061da7). Each dataset consists of PBMCs from both healthy donors and donors diagnosed with COVID-19. Using the harmonized annotations, we demonstrate how to prepare a pseudobulk object to perform differential expression analysis across disease within cell types. -Prior to running this vignette, please [install Seurat v5](articles/install.html) and see the [BPCells vignette](articles/seurat5_bpcells_interaction_vignette.html) to construct the on-disk object used in this vignette. Additionally, we map to our annotated CITE-seq reference containing 162,000 cells and 228 antibodies ([Hao*, Hao*, et al., Cell 2021](https://doi.org/10.1016/j.cell.2021.04.048)) which is available for download [here](https://zenodo.org/record/7779017#.ZCMojezMJqs). +Prior to running this vignette, please [install Seurat v5](install.html) and see the [BPCells vignette](seurat5_bpcells_interaction_vignette.html) to construct the on-disk object used in this vignette. Additionally, we map to our annotated CITE-seq reference containing 162,000 cells and 228 antibodies ([Hao*, Hao*, et al., Cell 2021](https://doi.org/10.1016/j.cell.2021.04.048)) which is available for download [here](https://zenodo.org/record/7779017#.ZCMojezMJqs). ## Load the PBMC Reference Dataset and Query Datasets -We first load the reference (available [here](https://zenodo.org/record/7779017#.ZCMojezMJqs)) and normalize the query Seurat object prepared in the [BPCells interaction vignette](articles/seurat5_bpcells_interaction_vignette.html). The query object consists of datasets from three different studies constructed using the `CreateSeuratObject` function, which accepts a list of BPCells matrices as input. Within the Seurat object, the three datasets reside in the `RNA` assay in three separate `layers` on-disk. +We first load the reference (available [here](https://zenodo.org/record/7779017#.ZCMojezMJqs)) and normalize the query Seurat object prepared in the [BPCells interaction vignette](seurat5_bpcells_interaction_vignette.html). The query object consists of datasets from three different studies constructed using the `CreateSeuratObject` function, which accepts a list of BPCells matrices as input. Within the Seurat object, the three datasets reside in the `RNA` assay in three separate `layers` on-disk. ```{r load.data} @@ -61,7 +61,7 @@ object <- NormalizeData(object, verbose = FALSE) ``` ## Mapping -Using the same code from the [v4 reference mapping vignette](articles/multimodal_reference_mapping.html), we find anchors between the reference and query in the precomputed supervised PCA. We recommend the use of supervised PCA for CITE-seq reference datasets, and demonstrate how to compute this transformation in [v4 reference mapping vignette](articles/multimodal_reference_mapping.html). In Seurat v5, we only need to call `FindTransferAnchors` and `MapQuery` once to map all three datasets as they are all contained within the query object. Furthermore, utilizing the on-disk capabilities of BPCells(https://github.com/bnprks/BPCells) , we map 1.5 million cells without ever loading them all into memory. +Using the same code from the [v4 reference mapping vignette](articles/multimodal_reference_mapping.html), we find anchors between the reference and query in the precomputed supervised PCA. We recommend the use of supervised PCA for CITE-seq reference datasets, and demonstrate how to compute this transformation in [v4 reference mapping vignette](articles/multimodal_reference_mapping.html). In Seurat v5, we only need to call `FindTransferAnchors` and `MapQuery` once to map all three datasets as they are all contained within the query object. Furthermore, utilizing the on-disk capabilities of [BPCells](https://github.com/bnprks/BPCells), we map 1.5 million cells without ever loading them all into memory. ```{r} anchor <- FindTransferAnchors( diff --git a/vignettes/ParseBio_sketch_integration.Rmd b/vignettes/ParseBio_sketch_integration.Rmd index 08d127d1e..181e1b007 100755 --- a/vignettes/ParseBio_sketch_integration.Rmd +++ b/vignettes/ParseBio_sketch_integration.Rmd @@ -40,7 +40,7 @@ In this vignette, we demonstrate how to use atomic sketch integration to harmoni * Reconstruct (integrate) the full datasets, based on the atoms * Annotate all cells in the full datasets * Identify cell-type specific differences between healthy and diabetic patients -Prior to running this vignette, please [install Seurat v5](https://satijalab.org/seurat/articles/install.html), as well as the [BPCells](https://github.com/bnprks/BPCells) package, which we use for on-disk storage. You can read more about using BPCells in Seurat v5 [here](https://satijalab.org/seurat/articles/seurat5_bpcells_interaction_vignette.html). We also recommend reading the [Sketch-based analysis in Seurat v5](https://satijalab.org/seurat/articles/seurat5_sketch_analysis.html) vignette, which introduces the concept of on-disk and in-memory storage in Seurat v5. +Prior to running this vignette, please [install Seurat v5](install.html), as well as the [BPCells](https://github.com/bnprks/BPCells) package, which we use for on-disk storage. You can read more about using BPCells in Seurat v5 [here](seurat5_bpcells_interaction_vignette.html). We also recommend reading the [Sketch-based analysis in Seurat v5](seurat5_sketch_analysis.html) vignette, which introduces the concept of on-disk and in-memory storage in Seurat v5. ```{r, warning=F, message=F} library(Seurat) library(BPCells) @@ -53,8 +53,8 @@ options(future.globals.maxSize = 3e9) options(Seurat.object.assay.version = "v5") ``` ## Create a Seurat object containing data from 24 patients -We downloaded the original dataset and donor metadata from [Parse Biosciences](https://resources.parsebiosciences.com/dataset-wt-mega-one-million-pbmc-type-1-diabetes), as an h5ad file. While the BPCells package can work directly with h5ad files, for optimal performance, we converted the dataset to the compressed sparse format used by BPCells, as described [here](LINKCONVERSIONVIGNETTE). -We create a Seurat object for this dataset. Since the input to `CreateSeuratObject` is a BPCells matrix, the data remains on-disk and is not loaded into memory. After creating the object, we split the dataset into 24 [layers](LINKTOVIGNETTE), one for each sample (i.e. patient), to facilitate integration. +We downloaded the original dataset and donor metadata from [Parse Biosciences](https://resources.parsebiosciences.com/dataset-wt-mega-one-million-pbmc-type-1-diabetes), as an h5ad file. While the BPCells package can work directly with h5ad files, for optimal performance, we converted the dataset to the compressed sparse format used by BPCells, as described [here](seurat5_bpcells_interaction_vignette.html). +We create a Seurat object for this dataset. Since the input to `CreateSeuratObject` is a BPCells matrix, the data remains on-disk and is not loaded into memory. After creating the object, we split the dataset into 24 [layers](seurat5_essential_commands.html), one for each sample (i.e. patient), to facilitate integration. ```{r, warning=F, message=F} parse.mat <- open_matrix_dir(dir = "/brahms/hartmana/vignette_data/bpcells/parse_1m_pbmc") # need to move @@ -75,8 +75,8 @@ object ``` ## Perform integration on the sketched cells across samples -Next we perform integrative analysis on the 'atoms' from each of the datasets. Here, we perform integration using the streamlined [Seurat v5 integration worfklow](https://satijalab.org/seurat/articles/seurat5_integration.html), and utilize the reference-based `RPCAIntegration` method. The function performs all corrections in low-dimensional space (rather than on the expression values themselves) to further improve speed and memory usage, and outputs a merged Seurat object where all cells have been placed in an integrated low-dimensional space (stored as `integrated.rpca`). -However, we emphasize that you can perform integration here using any analysis technique that places cells across datasets into a shared space. This includes CCA Integration, Harmony, and scVI. We demonstrate how to use these tools in Seurat v5 [here](https://satijalab.org/seurat/articles/seurat5_integration.html). +Next we perform integrative analysis on the 'atoms' from each of the datasets. Here, we perform integration using the streamlined [Seurat v5 integration worfklow](seurat5_integration.html), and utilize the reference-based `RPCAIntegration` method. The function performs all corrections in low-dimensional space (rather than on the expression values themselves) to further improve speed and memory usage, and outputs a merged Seurat object where all cells have been placed in an integrated low-dimensional space (stored as `integrated.rpca`). +However, we emphasize that you can perform integration here using any analysis technique that places cells across datasets into a shared space. This includes CCA Integration, Harmony, and scVI. We demonstrate how to use these tools in Seurat v5 [here](seurat5_integration.html). ```{r} DefaultAssay(object) <- 'sketch' object <- FindVariableFeatures(object, verbose = F) diff --git a/vignettes/seurat5_essential_commands.Rmd b/vignettes/seurat5_essential_commands.Rmd index 84d975180..529212406 100644 --- a/vignettes/seurat5_essential_commands.Rmd +++ b/vignettes/seurat5_essential_commands.Rmd @@ -1,5 +1,5 @@ --- -title: "Seurat 5 Essential Commands" +title: "Seurat v5 Essential Commands" output: html_document: theme: united @@ -41,48 +41,45 @@ library(dplyr) options(Seurat.object.assay.version = "v5") ``` -The Assay5 object is able to support different matrix data structures to best suit particular analyses. The PBMC 3k dataset contains ~3,000 cells, which can easily be stored in-memory as a sparse `dgCMatrix`. On the other hand, it can be impractical in terms of memory and computation to store and concomitantly analyze millions of cells in-memory. Thus, the mouse brain dataset stores counts on-disk in an Assay5 object. +The Assay5 object is able to support different matrix data structures to best suit particular analyses. The PBMC 3k dataset contains ~3,000 cells, which can easily be stored in-memory as a sparse `dgCMatrix`. On the other hand, it can be impractical in terms of memory and computation to store and concomitantly analyze millions of cells in-memory. Thus, the mouse brain dataset stores counts on-disk in an Assay5 object. The mouse brain 1 million cell object is constructed as described in the [BPCells vignette](seurat5_bpcells_interaction_vignette.html). ```{r matrix} pbmc3k <- LoadData("pbmc3k") mousebrain1m <- readRDS("/brahms/hartmana/vignette_data/1p3_million_mouse_brain.rds") - + # Both are Assay5's -print(class(pbmc3k[["RNA"]])) -print(class(mousebrain1m[["RNA"]])) +class(pbmc3k[["RNA"]]) +class(mousebrain1m[["RNA"]]) # Get counts layer by "$" or "[[" symbols. -pbmc3k[["RNA"]]$counts -pbmc3k[["RNA"]][["counts"]] +head(pbmc3k[["RNA"]]$counts[, Cells(pbmc3k)[1:20]]) +head(pbmc3k[["RNA"]][["counts"]][, Cells(pbmc3k)[1:20]]) # New layers can be added or deleted to assays with the "$" or "[[" symbols. pbmc3k[["RNA"]]$data <- log1p(pbmc3k[["RNA"]]$counts) pbmc3k[["RNA"]]$data <- NULL - - -# But different underlying data structures storing counts -print(class(pbmc3k[["RNA"]]$counts)) -print(class(mousebrain1m[["RNA"]]$counts)) +# But different underlying data structures storing counts +class(pbmc3k[["RNA"]]$counts) +class(mousebrain1m[["RNA"]]$counts) ``` Despite the drastic difference in dataset size, the 1.3M cell dataset occupies a small memory footprint thanks to on-disk storage. ```{r} -print(paste("PBMC 3k contains", length(colnames(pbmc3k)), "cells")) -print(paste("Mouse brain 1.3M contains", length(colnames(mousebrain1m)), "cells")) +paste("PBMC 3k contains", length(colnames(pbmc3k)), "cells") +paste("Mouse brain 1.3M contains", length(colnames(mousebrain1m)), "cells") # Despite the mouse brain dataset containing 1.3 million cells, the assay is under 350Mbs in size due to on-disk storage -print(paste("PBMC 3k assay size:", format(object.size(pbmc3k[["RNA"]]), units = "Mb"))) -print(paste("Mouse brain 1.3M assay size:", format(object.size(mousebrain1m[["RNA"]]), units = "Mb"))) +paste("PBMC 3k assay size:", format(object.size(pbmc3k[["RNA"]]), units = "Mb")) +paste("Mouse brain 1.3M assay size:", format(object.size(mousebrain1m[["RNA"]]), units = "Mb")) ``` Get cell names. Since Seurat v5 object doesn't require all assays have the same cells, `Cells()` is designed to get cell names of the default assay and `colnames()` is deigned to get cell names of the entire object ```{r} pbmc3k[["RNAsub"]] <- subset(pbmc3k[["RNA"]], cells = colnames(pbmc3k)[1:100]) DefaultAssay(pbmc3k) <- 'RNAsub' -print(length(Cells(pbmc3k))) -print(length(colnames(pbmc3k))) - +length(Cells(pbmc3k)) +length(colnames(pbmc3k)) ``` @@ -100,22 +97,22 @@ The `Seurat.object.assay.version` option can also be set to create `Assay5` or ` options(Seurat.object.assay.version = "v3") pbmc.counts <- Read10X(data.dir = "/brahms/hartmana/vignette_data/pbmc3k/filtered_gene_bc_matrices/hg19/") pbmc <- CreateSeuratObject(counts = pbmc.counts) -print(class(pbmc[["RNA"]])) +class(pbmc[["RNA"]]) # create v5 assays options(Seurat.object.assay.version = "v5") pbmc.counts <- Read10X(data.dir = "/brahms/hartmana/vignette_data/pbmc3k/filtered_gene_bc_matrices/hg19/") pbmc <- CreateSeuratObject(counts = pbmc.counts) pbmc <- NormalizeData(pbmc) -print(class(pbmc[["RNA"]])) +class(pbmc[["RNA"]]) ``` `CreateAssayObject()` and `CreateAssay5Object()` can be used to create v3 and v5 assay regardless of the setting in `Seurat.object.assay.version` ```{r} assay.v3 <- CreateAssayObject(counts = pbmc.counts) assay.v5 <- CreateAssay5Object(counts = pbmc.counts) -print(class(assay.v3)) -print(class(assay.v5)) +class(assay.v3) +class(assay.v5) # Normalized data can also be used to create Assay5 assay.v5 <- CreateAssay5Object(data = log1p(pbmc.counts)) @@ -126,15 +123,13 @@ Layers(assay.v5) assay.v5 <- CreateAssay5Object(counts = pbmc.counts, data = log1p(pbmc.counts)) Layers(assay.v5) - # A list of matrices can be used to generate multi-layer assay split.data <- sample(c("A", "B", "C"), size = ncol(pbmc), replace = TRUE) cells.split <- split(x = colnames(pbmc.counts), f = split.data) -pbmc.counts.list <- lapply(X = cells.split,FUN = function(x) pbmc.counts[,x]) +pbmc.counts.list <- lapply(X = cells.split, FUN = function(x) pbmc.counts[, x]) assay.v5 <- CreateAssay5Object(counts = pbmc.counts.list) Layers(assay.v5) - ``` Counts Layers can be split based on metadata in the assay level @@ -161,7 +156,6 @@ And then joined back together in a single layer. ```{r} pbmc[["RNA"]] <- JoinLayers(pbmc[["RNA"]]) Layers(pbmc[["RNA"]]) - ``` Dimensionality reduction data and metadata can be retrieved through the use of `FetchData`, `[[`, or V4 functions, depending on the use case. @@ -175,15 +169,13 @@ fetch_df <- FetchData(object = pbmc, layer = "counts", vars = c("rna_MS4A1", "PC head(fetch_df) # get cell embeddings -head(Embeddings(object = pbmc[['pca']])[,1:5]) -head(pbmc[['pca']][[]][,1:5]) +head(Embeddings(object = pbmc[['pca']])[, 1:5]) +head(pbmc[['pca']][[]][, 1:5]) # get feature loadings -head(Loadings(object = pbmc[['pca']])[,1:5]) -head(pbmc[['pca']][][,1:5]) +head(Loadings(object = pbmc[['pca']])[, 1:5]) +head(pbmc[['pca']][][, 1:5]) # get meta.data head(pbmc[[]]) - - ``` \ No newline at end of file diff --git a/vignettes/seurat5_sketch_analysis.Rmd b/vignettes/seurat5_sketch_analysis.Rmd index c96f24570..03d156d1d 100644 --- a/vignettes/seurat5_sketch_analysis.Rmd +++ b/vignettes/seurat5_sketch_analysis.Rmd @@ -45,7 +45,7 @@ We store sketched cells (in-memory) and the full dataset (on-disk) as two assays * Support for 'bit-packing' compression and infrastructure We demonstrate the on-disk capabilities in Seurat v5 using the [BPCells package](https://github.com/bnprks/BPCells) developed by Ben Parks in the Greenleaf Lab. This package utilizes bit-packing compression and optimized, streaming-compatible C++ code to substantially improve I/O and computational performance when working with on-disk data. -To run this vignette please install Seurat v5, using the installation instructions found [here](https://satijalab.org/seurat/articles/install.html). Additionally, you will need to install the `BPcells` package, using the installation instructions found [here](https://bnprks.github.io/BPCells/#installation). +To run this vignette please install Seurat v5, using the installation instructions found [here](install.html). Additionally, you will need to install the `BPcells` package, using the installation instructions found [here](https://bnprks.github.io/BPCells/#installation). ```{r, warning=FALSE, message=FALSE} library(Seurat) @@ -57,7 +57,7 @@ options(future.globals.maxSize = 1e9) ## Create a Seurat object with a v5 assay for on-disk storage -We start by loading the 1.3M dataset from 10x Genomics using the `open_matrix_dir` function from `BPCells`. Note that this function does not load the dataset into memory, but instead, creates a connection to the data stored on-disk. We then store this on-disk representation in the Seurat object. Note that in our [Introduction to on-disk storage vignette](https://satijalab.org/seurat/articles/seurat5_bpcells_interaction_vignette.html), we demonstrate how to create this on-disk representation. +We start by loading the 1.3M dataset from 10x Genomics using the `open_matrix_dir` function from `BPCells`. Note that this function does not load the dataset into memory, but instead, creates a connection to the data stored on-disk. We then store this on-disk representation in the Seurat object. Note that in our [Introduction to on-disk storage vignette](seurat5_bpcells_interaction_vignette.html), we demonstrate how to create this on-disk representation. ```{r} # specify that you would like to create a Seurat v5 assay From 37f95960f869ddeec8c22cbb9502b41ff889f0ec Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Tue, 28 Mar 2023 20:29:24 -0400 Subject: [PATCH 609/979] update sentence order --- vignettes/COVID_SCTMapping.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/COVID_SCTMapping.Rmd b/vignettes/COVID_SCTMapping.Rmd index 197720366..7bc9f2313 100755 --- a/vignettes/COVID_SCTMapping.Rmd +++ b/vignettes/COVID_SCTMapping.Rmd @@ -44,7 +44,7 @@ options(future.globals.maxSize = 1e9) ## Introduction: Reference mapping analysis in Seurat v5 -In Seurat v5, we introduce a scalable approach for reference mapping datasets from separate studies or individuals. Reference mapping is a powerful approach to identify consistent labels across studies and perform cross-dataset analysis. We emphasize that while individual datasets are manageable in size, the aggregate of many datasets often amounts to millions of cell which do not fit in-memory. Importantly, we never simultaneously load all of the cells in-memory to maintain low memory usage. Furthermore, cross-dataset analysis is often challenged by disparate or unique cell type labels. Through reference mapping, we annotate all cells with a common reference for consistent cell type labels. +In Seurat v5, we introduce a scalable approach for reference mapping datasets from separate studies or individuals. Reference mapping is a powerful approach to identify consistent labels across studies and perform cross-dataset analysis. We emphasize that while individual datasets are manageable in size, the aggregate of many datasets often amounts to millions of cell which do not fit in-memory. Furthermore, cross-dataset analysis is often challenged by disparate or unique cell type labels. Through reference mapping, we annotate all cells with a common reference for consistent cell type labels. Importantly, we never simultaneously load all of the cells in-memory to maintain low memory usage. In this vignette, we reference map three publicly available datasets totaling 1,498,064 cells and 277 donors which are available through [CZI cellxgene collections](https://cellxgene.cziscience.com/collections): [Ahern, et al., Nature 2022](https://cellxgene.cziscience.com/collections/8f126edf-5405-4731-8374-b5ce11f53e82), [Jin, et al., Science 2021](https://cellxgene.cziscience.com/collections/b9fc3d70-5a72-4479-a046-c2cc1ab19efc), and [Yoshida, et al., Nature 2022](https://cellxgene.cziscience.com/collections/03f821b4-87be-4ff4-b65a-b5fc00061da7). Each dataset consists of PBMCs from both healthy donors and donors diagnosed with COVID-19. Using the harmonized annotations, we demonstrate how to prepare a pseudobulk object to perform differential expression analysis across disease within cell types. From 722ecf61a0dc6ae0044cf2a8cc4dde85edf48960 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Tue, 28 Mar 2023 21:59:26 -0400 Subject: [PATCH 610/979] vignette updates --- vignettes/ParseBio_sketch_integration.Rmd | 2 + vignettes/seurat5_essential_commands.Rmd | 175 ++++++++++++++-------- 2 files changed, 118 insertions(+), 59 deletions(-) diff --git a/vignettes/ParseBio_sketch_integration.Rmd b/vignettes/ParseBio_sketch_integration.Rmd index 181e1b007..8e28c5461 100755 --- a/vignettes/ParseBio_sketch_integration.Rmd +++ b/vignettes/ParseBio_sketch_integration.Rmd @@ -35,11 +35,13 @@ knitr::opts_chunk$set( The recent increase in publicly available single-cell datasets poses a significant challenge for integrative analysis. For example, multiple tissues have now been profiled across dozens of studies, representing hundreds of individuals and millions of cells. In [Hao et al, 2022](https://www.biorxiv.org/content/10.1101/2022.02.24.481684v1) proposed a dictionary learning based method, atomic sketch integration, could also enable efficient and large-scale integrative analysis. Our procedure enables the integration of large compendiums of datasets without ever needing to load the full scale of data into memory. In [our manuscript](https://www.biorxiv.org/content/10.1101/2022.02.24.481684v1) we use atomic sketch integration to integrate millions of scRNA-seq from human lung and human PBMC. In this vignette, we demonstrate how to use atomic sketch integration to harmonize scRNA-seq experiments 1M cells, though we have used this procedure to integrate datasets of 10M+ cells as well. We analyze a dataset from Parse Biosciences, in which PBMC from 24 human samples (12 healthy donors, 12 Type-1 diabetes donors), which is available [here](https://resources.parsebiosciences.com/dataset-wt-mega-one-million-pbmc-type-1-diabetes). + * Sample a representative subset of cells ('atoms') from each dataset * Integrate the atoms from each dataset, and define a set of cell states * Reconstruct (integrate) the full datasets, based on the atoms * Annotate all cells in the full datasets * Identify cell-type specific differences between healthy and diabetic patients + Prior to running this vignette, please [install Seurat v5](install.html), as well as the [BPCells](https://github.com/bnprks/BPCells) package, which we use for on-disk storage. You can read more about using BPCells in Seurat v5 [here](seurat5_bpcells_interaction_vignette.html). We also recommend reading the [Sketch-based analysis in Seurat v5](seurat5_sketch_analysis.html) vignette, which introduces the concept of on-disk and in-memory storage in Seurat v5. ```{r, warning=F, message=F} library(Seurat) diff --git a/vignettes/seurat5_essential_commands.Rmd b/vignettes/seurat5_essential_commands.Rmd index 529212406..09d2121a9 100644 --- a/vignettes/seurat5_essential_commands.Rmd +++ b/vignettes/seurat5_essential_commands.Rmd @@ -1,5 +1,5 @@ --- -title: "Seurat v5 Essential Commands" +title: "Seurat v5 Command Cheat Sheet" output: html_document: theme: united @@ -31,7 +31,7 @@ knitr::opts_chunk$set( ) ``` -Here, we describe important commands and functions to store, access, and process data using Seurat v5. +Here, we describe important commands and functions to store, access, and process data using Seurat v5. To demonstrate commamnds, we use a dataset of 3,000 PBMC (stored in-memory), and a dataset of 1.3M E18 mouse neurons (stored on-disk), which we constructed as described in the [BPCells vignette](seurat5_bpcells_interaction_vignette.html). ```{r loaddata} library(Seurat) @@ -41,28 +41,40 @@ library(dplyr) options(Seurat.object.assay.version = "v5") ``` -The Assay5 object is able to support different matrix data structures to best suit particular analyses. The PBMC 3k dataset contains ~3,000 cells, which can easily be stored in-memory as a sparse `dgCMatrix`. On the other hand, it can be impractical in terms of memory and computation to store and concomitantly analyze millions of cells in-memory. Thus, the mouse brain dataset stores counts on-disk in an Assay5 object. The mouse brain 1 million cell object is constructed as described in the [BPCells vignette](seurat5_bpcells_interaction_vignette.html). +## Load datasets ```{r matrix} pbmc3k <- LoadData("pbmc3k") mousebrain1m <- readRDS("/brahms/hartmana/vignette_data/1p3_million_mouse_brain.rds") -# Both are Assay5's +# RNA assay is of the Assay5 class class(pbmc3k[["RNA"]]) class(mousebrain1m[["RNA"]]) +``` + +## Access and store expression data + +The `$` and double-bracket `[[]]` symbols can be used as efficient accessor functions for Seurat5 assays. -# Get counts layer by "$" or "[[" symbols. -head(pbmc3k[["RNA"]]$counts[, Cells(pbmc3k)[1:20]]) -head(pbmc3k[["RNA"]][["counts"]][, Cells(pbmc3k)[1:20]]) +```{r } -# New layers can be added or deleted to assays with the "$" or "[[" symbols. -pbmc3k[["RNA"]]$data <- log1p(pbmc3k[["RNA"]]$counts) +# access the counts matrix from the RNA assay +counts_matrix <- pbmc3k[["RNA"]]$counts + +# Add a layer +# Equivalent to running pbmc3k <-NormalizeData(pbmc3k) +pbmc3k[["RNA"]]$data <- NormalizeData(pbmc3k[["RNA"]]$counts) + +# Delete a layer pbmc3k[["RNA"]]$data <- NULL -# But different underlying data structures storing counts +# pbmc3k counts matrix is stored in-memory class(pbmc3k[["RNA"]]$counts) + +# 1.3M cell dataset counts matrix is stored on-disk class(mousebrain1m[["RNA"]]$counts) ``` + Despite the drastic difference in dataset size, the 1.3M cell dataset occupies a small memory footprint thanks to on-disk storage. ```{r} @@ -74,7 +86,10 @@ paste("PBMC 3k assay size:", format(object.size(pbmc3k[["RNA"]]), units = "Mb")) paste("Mouse brain 1.3M assay size:", format(object.size(mousebrain1m[["RNA"]]), units = "Mb")) ``` +## Access cell names and metadata + Get cell names. Since Seurat v5 object doesn't require all assays have the same cells, `Cells()` is designed to get cell names of the default assay and `colnames()` is deigned to get cell names of the entire object + ```{r} pbmc3k[["RNAsub"]] <- subset(pbmc3k[["RNA"]], cells = colnames(pbmc3k)[1:100]) DefaultAssay(pbmc3k) <- 'RNAsub' @@ -82,17 +97,26 @@ length(Cells(pbmc3k)) length(colnames(pbmc3k)) ``` +Access object metadata -We can also cast between `Assay` and `Assay5` objects with `as()`. Note that the `RNA` assay is an `Assay5` object. +```{r meta} + +# get all object metadata +pbmc_metadata <- pbmc3k[[]] + +# get list of metadata columns +colnames(pbmc_metadata) + +# get annotations stored in metadata +annotations <- pbmc3k$seurat_annotations -```{r} -pbmc3k[["RNA3"]] <- as(object = pbmc3k[["RNA"]], Class = "Assay") -pbmc3k[["RNA5"]] <- as(object = pbmc3k[["RNA3"]], Class = "Assay5") ``` -The `Seurat.object.assay.version` option can also be set to create `Assay5` or `Assay` objects when new Assays or Seurat objects are created. +## Create Seurat or Assay objects -```{r} +By setting a global option (`Seurat.object.assay.version`), you can default to creating either Seurat v3 assays, or Seurat v5 assays. The use of v3 assays is set by default upon package loading, which ensures backwards compatibiltiy with existing workflows. + +```{r create} # create v3 assays options(Seurat.object.assay.version = "v3") pbmc.counts <- Read10X(data.dir = "/brahms/hartmana/vignette_data/pbmc3k/filtered_gene_bc_matrices/hg19/") @@ -103,79 +127,112 @@ class(pbmc[["RNA"]]) options(Seurat.object.assay.version = "v5") pbmc.counts <- Read10X(data.dir = "/brahms/hartmana/vignette_data/pbmc3k/filtered_gene_bc_matrices/hg19/") pbmc <- CreateSeuratObject(counts = pbmc.counts) -pbmc <- NormalizeData(pbmc) class(pbmc[["RNA"]]) ``` `CreateAssayObject()` and `CreateAssay5Object()` can be used to create v3 and v5 assay regardless of the setting in `Seurat.object.assay.version` + ```{r} +#create a v3 assay assay.v3 <- CreateAssayObject(counts = pbmc.counts) + +#create a v5 assay assay.v5 <- CreateAssay5Object(counts = pbmc.counts) + class(assay.v3) class(assay.v5) +``` + +Assay5 objects are more flexible, and can be used to store only a data layer, with no counts data. This can be used to create Seurat objects that require less space -# Normalized data can also be used to create Assay5 +```{r} +# create an assay using only normalized data assay.v5 <- CreateAssay5Object(data = log1p(pbmc.counts)) -Layers(assay.v5) -# Simultaneous setting of counts and normalized data is possible, -# but the cells need to be the same. -assay.v5 <- CreateAssay5Object(counts = pbmc.counts, data = log1p(pbmc.counts)) -Layers(assay.v5) +# create a Seurat object based on this assay +pbmc3k_slim <- CreateSeuratObject(assay.v5) +pbmc3k_slim +``` + +We can also convert (cast) between `Assay` and `Assay5` objects with `as()`. -# A list of matrices can be used to generate multi-layer assay -split.data <- sample(c("A", "B", "C"), size = ncol(pbmc), replace = TRUE) -cells.split <- split(x = colnames(pbmc.counts), f = split.data) -pbmc.counts.list <- lapply(X = cells.split, FUN = function(x) pbmc.counts[, x]) +```{r} +# convert a v5 assay to a v3 assay +pbmc3k[["RNA3"]] <- as(object = pbmc3k[["RNA"]], Class = "Assay") -assay.v5 <- CreateAssay5Object(counts = pbmc.counts.list) -Layers(assay.v5) +# convert a v3 assay to a v5 assay +pbmc3k[["RNA5"]] <- as(object = pbmc3k[["RNA3"]], Class = "Assay5") ``` -Counts Layers can be split based on metadata in the assay level +## Working with layers + +Seurat v5 assays store data in layers. These layers can store raw, un-normalized counts (`layer='counts'`), normalized data (`layer='data'`), or z-scored/variance-stabilized data (`layer='scale.data'`). + ```{r} -pbmc <- CreateSeuratObject(counts = pbmc.counts) -pbmc <- NormalizeData(pbmc) -pbmc$split.data <- split.data -pbmc[["RNA"]] <- split(x = pbmc[["RNA"]], f = pbmc$split.data) -Layers(pbmc[["RNA"]]) +# by default, creates an RNA assay with a counts layer +obj <- CreateSeuratObject(counts = pbmc.counts) +obj + +# creates a normalized data layer +obj <- NormalizeData(obj,verbose = FALSE) +obj + +#extract only the layer names from an assay +Layers(obj[["RNA"]]) ``` -Search and get Layers name -```{r} -Layers(object = pbmc[['RNA']], search = 'counts') -Layers(object = pbmc[['RNA']], search = 'counts.B') -# it will return the exact match first. If no exact match, it will return layer names start with search input -pbmc[['RNA']]$counts <- pbmc[['RNA']]$counts.A -Layers(object = pbmc[['RNA']], search = 'counts') +Prior to performing integration analysis in Seurat v5, we can split the layers into groups. The `IntegrateLayers` function, described in [our vignette](seurat5_integration.html), will then align shared cell types across these layers. After performing integration, you can rejoin the layers. + +```{r joinsplit} +# create random batches +pbmc3k$batch <- sample(c("batchA","batchB","batchC"),ncol(pbmc3k),replace = TRUE) + +# split layers +pbmc3k[["RNA"]] <- split(pbmc3k[["RNA"]], f=pbmc3k$batch) +Layers(pbmc3k[["RNA"]]) -pbmc[['RNA']]$counts <- NULL +# rejoin layers +pbmc3k[["RNA"]] <- JoinLayers(pbmc3k[["RNA"]]) +Layers(pbmc3k[["RNA"]]) ``` -And then joined back together in a single layer. -```{r} -pbmc[["RNA"]] <- JoinLayers(pbmc[["RNA"]]) -Layers(pbmc[["RNA"]]) +If you have multiple counts matrices, you can also create a Seurat object that is initialized with multiple layers. + +```{r multilayer} +batchA_counts <- pbmc.counts[,1:200] +batchB_counts <- pbmc.counts[,201:400] +batchC_counts <- pbmc.counts[,401:600] +count_list <- list(batchA_counts,batchB_counts,batchC_counts) +names(count_list) <- c('batchA','batchB','batchC') + +# create a Seurat object initialized with multiple layers +obj <- CreateSeuratObject(counts = count_list) +Layers(obj[["RNA"]]) ``` -Dimensionality reduction data and metadata can be retrieved through the use of `FetchData`, `[[`, or V4 functions, depending on the use case. +## Accessing additional data + ```{r} -pbmc <- FindVariableFeatures(pbmc) -pbmc <- ScaleData(pbmc) -pbmc <- RunPCA(pbmc) +pbmc3k <- FindVariableFeatures(pbmc3k, verbose = FALSE) +pbmc3k <- ScaleData(pbmc3k,verbose = FALSE) +pbmc3k <- RunPCA(pbmc3k,verbose = FALSE) + +# return variable features # returns information from both assay, cell embeddings and meta.data as a data.frame -fetch_df <- FetchData(object = pbmc, layer = "counts", vars = c("rna_MS4A1", "PC_1", "nCount_RNA")) +fetch_df <- FetchData(object = pbmc3k, layer = "counts", vars = c("rna_MS4A1", "PC_1", "nCount_RNA")) head(fetch_df) # get cell embeddings -head(Embeddings(object = pbmc[['pca']])[, 1:5]) -head(pbmc[['pca']][[]][, 1:5]) +head(Embeddings(object = pbmc3k[['pca']])[, 1:5]) # get feature loadings -head(Loadings(object = pbmc[['pca']])[, 1:5]) -head(pbmc[['pca']][][, 1:5]) +head(Loadings(object = pbmc3k[['pca']])[, 1:5]) +``` -# get meta.data -head(pbmc[[]]) -``` \ No newline at end of file +
    + **Session Info** +```{r} +sessionInfo() +``` +
    From a2677648017e2253bef7a2463676805fa270f3c3 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Wed, 29 Mar 2023 13:40:43 -0400 Subject: [PATCH 611/979] update name and reference --- vignettes/ParseBio_sketch_integration.Rmd | 2 +- vignettes/vignettes_v5.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/vignettes/ParseBio_sketch_integration.Rmd b/vignettes/ParseBio_sketch_integration.Rmd index 8e28c5461..04ea2dfad 100755 --- a/vignettes/ParseBio_sketch_integration.Rmd +++ b/vignettes/ParseBio_sketch_integration.Rmd @@ -152,7 +152,7 @@ p1 + p2 + plot_layout(ncol = 1) ## Compare healthy and diabetic samples -By integrating all samples together, we can now compare healthy and diabetic cells in matched cell states. To maximize statistical power, we want to use all cells - not just the sketched cells - to perform this analysis. As recommended by [Soneson et al.](https://www.nature.com/articles/s41467-020-19894-4), we use an aggregation-based (pseudobulk) workflow. We aggregate all cells within the same cell type and sample using the `AggregateExpression` function. This returns a Seurat object where each 'cell' represents the pseudobulk profile of one cell type in one individual. +By integrating all samples together, we can now compare healthy and diabetic cells in matched cell states. To maximize statistical power, we want to use all cells - not just the sketched cells - to perform this analysis. As recommended by [Soneson et all.](https://www.nature.com/articles/nmeth.4612) and [Crowell et al.](https://www.nature.com/articles/s41467-020-19894-4), we use an aggregation-based (pseudobulk) workflow. We aggregate all cells within the same cell type and sample using the `AggregateExpression` function. This returns a Seurat object where each 'cell' represents the pseudobulk profile of one cell type in one individual. After we aggregate cells, we can perform celltype-specific differential expression between healthy and diabetic samples using DESeq2. We demonstrate this for CD14 monocytes. diff --git a/vignettes/vignettes_v5.yaml b/vignettes/vignettes_v5.yaml index 11fdf2be7..7167da282 100644 --- a/vignettes/vignettes_v5.yaml +++ b/vignettes/vignettes_v5.yaml @@ -48,7 +48,7 @@ - category: References and additional documentation vignettes: - - title: Seurat v5 Essential Commands + - title: Seurat v5 Command Cheat Sheet name: seurat5_essential_commands summary: | Explore the new assay structure introduced in Seurat v5. From e3ae922bc494fae87449bc88768ba69b130b774a Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Thu, 30 Mar 2023 13:13:45 -0400 Subject: [PATCH 612/979] Bump version; fix scale max --- DESCRIPTION | 4 ++-- R/preprocessing.R | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b5a990923..01239acdc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9040 -Date: 2023-03-28 +Version: 4.9.9.9041 +Date: 2023-03-30 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( diff --git a/R/preprocessing.R b/R/preprocessing.R index c69982656..a72b0a55d 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -4456,7 +4456,7 @@ ScaleData.IterableMatrix <- function( features.sd <- 1 } if (scale.max != Inf) { - object <- BPCells::min_by_row(mat = object, vals = scale.max*feature.sd + feature.mean) + object <- BPCells::min_by_row(mat = object, vals = scale.max * features.sd + features.mean) } scaled.data <- (object - features.mean) / features.sd return(scaled.data) From b96f1a42645f15eaa16fae522925eef10eae25e4 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Thu, 30 Mar 2023 14:33:14 -0400 Subject: [PATCH 613/979] PercentageFeatureSet for multiple layers --- R/utilities.R | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index 7acb49356..74db2e5bd 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -1312,13 +1312,32 @@ PercentageFeatureSet <- function( if (!is.null(x = features) && !is.null(x = pattern)) { warn(message = "Both pattern and features provided. Pattern is being ignored.") } - features <- features %||% grep( - pattern = pattern, - x = rownames(x = object[[assay]]), - value = TRUE - ) - percent.featureset <- colSums(x = GetAssayData(object = object, assay = assay, slot = "counts")[features, , drop = FALSE]) / - object[[paste0("nCount_", assay)]] * 100 + if (length(x = Layers(object = object, pattern = "counts")) > + 1) { + percent.featureset <- c() + for (layer in Layers(object = object, pattern = "counts")) { + features.layer <- features %||% grep( + pattern = pattern, + x = rownames(x = object[[assay]][[layer]]), + value = TRUE) + layer.data <- LayerData(object = object, + assay = assay, + layer = layer) + layer.sums <- colSums(x = layer.data[features.layer, , drop = FALSE]) + layer.perc <- layer.sums / object@meta.data[colnames(layer.data), paste0("nCount_", assay)] * 100 + percent.featureset <- c(percent.featureset, layer.perc) + } + } + else { + features <- features %||% grep( + pattern = pattern, + x = rownames(x = object[[assay]]), + value = TRUE + ) + percent.featureset <- colSums(x = GetAssayData(object = object, assay = assay, slot = "counts")[features, , drop = FALSE]) / + object[[paste0("nCount_", assay)]] * 100 + } + if (!is.null(x = col.name)) { object <- AddMetaData(object = object, metadata = percent.featureset, col.name = col.name) return(object) From 5c3f18d7c61bfe0ee3b8275a65e80d3c3939874f Mon Sep 17 00:00:00 2001 From: Gesmira Date: Fri, 31 Mar 2023 12:01:51 -0400 Subject: [PATCH 614/979] removing if else and using list format --- R/utilities.R | 42 ++++++++++++++++-------------------------- 1 file changed, 16 insertions(+), 26 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index 74db2e5bd..2079c11a8 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -1312,32 +1312,22 @@ PercentageFeatureSet <- function( if (!is.null(x = features) && !is.null(x = pattern)) { warn(message = "Both pattern and features provided. Pattern is being ignored.") } - if (length(x = Layers(object = object, pattern = "counts")) > - 1) { - percent.featureset <- c() - for (layer in Layers(object = object, pattern = "counts")) { - features.layer <- features %||% grep( - pattern = pattern, - x = rownames(x = object[[assay]][[layer]]), - value = TRUE) - layer.data <- LayerData(object = object, - assay = assay, - layer = layer) - layer.sums <- colSums(x = layer.data[features.layer, , drop = FALSE]) - layer.perc <- layer.sums / object@meta.data[colnames(layer.data), paste0("nCount_", assay)] * 100 - percent.featureset <- c(percent.featureset, layer.perc) - } - } - else { - features <- features %||% grep( - pattern = pattern, - x = rownames(x = object[[assay]]), - value = TRUE - ) - percent.featureset <- colSums(x = GetAssayData(object = object, assay = assay, slot = "counts")[features, , drop = FALSE]) / - object[[paste0("nCount_", assay)]] * 100 - } - + percent.featureset <- list() + layers <- Layers(object = object, pattern = "counts") + for (i in seq_along(along.with = layers)) { + layer <- layers[i] + features.layer <- features %||% grep( + pattern = pattern, + x = rownames(x = object[[assay]][[layer]]), + value = TRUE) + layer.data <- LayerData(object = object, + assay = assay, + layer = layer) + layer.sums <- colSums(x = layer.data[features.layer, , drop = FALSE]) + layer.perc <- layer.sums / object[[]][colnames(layer.data), paste0("nCount_", assay)] * 100 + percent.featureset[[i]] <- layer.perc + } + percent.featureset <- unlist(percent.featureset) if (!is.null(x = col.name)) { object <- AddMetaData(object = object, metadata = percent.featureset, col.name = col.name) return(object) From ffeddbebe0de84d94f50b9945c796df142f4ff27 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Sat, 1 Apr 2023 21:26:37 -0400 Subject: [PATCH 615/979] fix min_var_custom sct only sparse --- R/preprocessing5.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 631b958f1..0b3289230 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -2212,6 +2212,8 @@ FetchResiduals_reference <- function(object, } if (vst_out$arguments$min_variance == "umi_median"){ + nz_median <- median(umi@x) + min_var_custom <- (nz_median / 5)^2 min_var <- min_var_custom } else { min_var <- vst_out$arguments$min_variance From 8b265021f871ba1200799ceecc627a69cb55f2cd Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 6 Apr 2023 17:12:57 -0400 Subject: [PATCH 616/979] fix min var costom --- R/preprocessing5.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 0b3289230..df7b33e6a 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -2212,7 +2212,7 @@ FetchResiduals_reference <- function(object, } if (vst_out$arguments$min_variance == "umi_median"){ - nz_median <- median(umi@x) + nz_median <- 1 min_var_custom <- (nz_median / 5)^2 min_var <- min_var_custom } else { From 731178c4af58831136ed78c90a118527560047e3 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 6 Apr 2023 18:52:36 -0400 Subject: [PATCH 617/979] add bpcells gene index --- R/sketching.R | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/R/sketching.R b/R/sketching.R index e2c807a48..1515a1fec 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -340,7 +340,14 @@ LeverageScore.default <- function( if (isTRUE(x = verbose)) { message("Performing QR decomposition") } - sa <- S %*% object + if (inherits(x = object, what = 'IterableMatrix')) { + temp <- tempdir() + object.gene_index <- transpose_storage_order(matrix = object, tmpdir = temp) + sa <- as(object = S %*% object, Class = 'dgCMatrix') + unlink(x = temp, recursive = TRUE) + } else { + sa <- S %*% object + } if (!inherits(x = sa, what = 'dgCMatrix')) { sa <- as(object = sa, Class = 'dgCMatrix') } From 37d48aad9ca136b5cf43561cba3581596d869715 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Tue, 18 Apr 2023 09:20:38 -0400 Subject: [PATCH 618/979] bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 01239acdc..ae9190ef3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9041 -Date: 2023-03-30 +Version: 4.9.9.9042 +Date: 2023-04-18 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From 689a0313ec1a2d485031e2b1f15e23ce67e5e7ab Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 19 Apr 2023 22:18:43 -0400 Subject: [PATCH 619/979] projectData umap.full --- R/sketching.R | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/R/sketching.R b/R/sketching.R index 1515a1fec..80593e070 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -272,8 +272,16 @@ TransferSketchLabels <- function( verbose = verbose, assay = slot(object = object[[reduction]], name = 'assay.used') ) - Key(proj.umap) <- paste0('ref', Key(proj.umap)) - object[[paste0('ref.',reduction.model )]] <- proj.umap + full.umap.reduction <- rev( + x = make.unique( + names = c( + Reductions(object = object), + paste0(reduction.model, '.full' ) + ) + ) + )[1] + Key(proj.umap) <- Key(full.umap.reduction) + object[[full.umap.reduction ]] <- proj.umap } return(object) } From 9d42f1162e9a948b54157595607fff8a91ea0f07 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 19 Apr 2023 22:23:01 -0400 Subject: [PATCH 620/979] minor style --- R/sketching.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/sketching.R b/R/sketching.R index 80593e070..d2db4421e 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -280,7 +280,7 @@ TransferSketchLabels <- function( ) ) )[1] - Key(proj.umap) <- Key(full.umap.reduction) + Key(object = proj.umap) <- Key(object = full.umap.reduction) object[[full.umap.reduction ]] <- proj.umap } return(object) From a678b666066c31a3200edc1ed4a57d13d7f7dd17 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 19 Apr 2023 22:28:53 -0400 Subject: [PATCH 621/979] full.umap projectData --- R/sketching.R | 2 +- vignettes/seurat5_sketch_analysis.Rmd | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/sketching.R b/R/sketching.R index d2db4421e..63bc27767 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -276,7 +276,7 @@ TransferSketchLabels <- function( x = make.unique( names = c( Reductions(object = object), - paste0(reduction.model, '.full' ) + paste0('full.',reduction.model) ) ) )[1] diff --git a/vignettes/seurat5_sketch_analysis.Rmd b/vignettes/seurat5_sketch_analysis.Rmd index 03d156d1d..c036b4ca2 100644 --- a/vignettes/seurat5_sketch_analysis.Rmd +++ b/vignettes/seurat5_sketch_analysis.Rmd @@ -139,12 +139,12 @@ DefaultAssay(obj) <- 'RNA' ``` ```{r save.img, include = FALSE, eval = FALSE} -p <- DimPlot(obj, label = T, label.size = 3, reduction = "ref.umap", group.by = "cluster_full", alpha = 0.1) + NoLegend() +p <- DimPlot(obj, label = T, label.size = 3, reduction = "full.umap", group.by = "cluster_full", alpha = 0.1) + NoLegend() ggsave(filename = "../output/images/MouseBrain_sketch_clustering.jpg", height = 7, width = 7, plot = p, quality = 50) ``` ```{r, fig.width=5, fig.height=5} -DimPlot(obj, label = T, label.size = 3, reduction = 'ref.umap', group.by = 'cluster_full', alpha = 0.1) + NoLegend() +DimPlot(obj, label = T, label.size = 3, reduction = 'full.umap', group.by = 'cluster_full', alpha = 0.1) + NoLegend() ``` ```{r, fig.width=10, fig.height=5} From 9691e0c5e376907e26894e0e59d1e477b71eceda Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 19 Apr 2023 22:31:20 -0400 Subject: [PATCH 622/979] remove duplicate vig --- vignettes/MouseBrain_sketch_clustering.Rmd | 185 --------------------- 1 file changed, 185 deletions(-) delete mode 100755 vignettes/MouseBrain_sketch_clustering.Rmd diff --git a/vignettes/MouseBrain_sketch_clustering.Rmd b/vignettes/MouseBrain_sketch_clustering.Rmd deleted file mode 100755 index c736003b3..000000000 --- a/vignettes/MouseBrain_sketch_clustering.Rmd +++ /dev/null @@ -1,185 +0,0 @@ ---- -title: "Mouse brain sketch clustering" -output: - html_document: - theme: united - df_print: kable - pdf_document: default -date: 'Compiled: `r Sys.Date()`' ---- - -```{r setup, include=TRUE} -all_times <- list() # store the time for each chunk -knitr::knit_hooks$set(time_it = local({ - now <- NULL - function(before, options) { - if (before) { - now <<- Sys.time() - } else { - res <- difftime(Sys.time(), now, units = "secs") - all_times[[options$label]] <<- res - } - } -})) -knitr::opts_chunk$set( - tidy = TRUE, - tidy.opts = list(width.cutoff = 95), - message = FALSE, - warning = FALSE, - fig.width = 10, - time_it = TRUE, - error = TRUE -) -``` - -## load library -```{r, warning=FALSE, message=FALSE} -library(Seurat) -library(BPCells) -``` - -## load data from h5ad -```{r, warning=FALSE, message=FALSE} -t0_CreateObject <- system.time({ - -mat <- open_matrix_dir("../data/mouse_1M_neurons_counts")[,1:1e5] - -mat <- Azimuth::ConvertEnsembleToSymbol(mat = mat, species = 'mouse') - -options(Seurat.object.assay.version = "v5", Seurat.object.assay.calcn = T) -obj <- CreateSeuratObject(counts = mat ) - -}) - -``` - -## create sketch assay -```{r, warning=FALSE, message=FALSE} -t1_CreateSketchAssay <- system.time({ -obj <- NormalizeData(obj) -obj <- FindVariableFeatures(obj, layer = 'counts') -obj <- LeverageScoreSampling(object = obj, ncells = 5000, cast = 'dgCMatrix') - -}) - -``` -## Sketch assay clustering -```{r, warning=FALSE, message=FALSE} -t2_SketchClustering <- system.time({ -obj <- FindVariableFeatures(obj) -obj <- ScaleData(obj) -obj <- RunPCA(obj) -obj <- FindNeighbors(obj, dims = 1:50) -obj <- FindClusters(obj) -}) - -obj <- RunUMAP(obj, dims = 1:50, return.model = T) -``` - -```{r} -DimPlot(obj, label = T, reduction = 'umap') + NoLegend() -``` - - -```{r} -DimPlot(obj, reduction = 'umap', label = T) + NoLegend() - -``` -```{r,fig.height = 20, fig.width = 15} -features.set <- c('Aqp4', 'Sox10', 'Slc17a7', 'Aif1', 'Foxj1', 'Pax6', 'Slc17a6', 'Lum', 'Nanog', 'Gad2', 'Foxj1', 'Cldn5','Alas2') -features.gaba.set <- c('Gad1','Mef2c','Sst','Lhx6','Nr2f2','Prox1') -DefaultAssay(obj) <- 'sketch' -FeaturePlot(obj, reduction = 'umap', features = features.set, max.cutoff = "q99", min.cutoff = 'q1') -FeaturePlot(obj, reduction = 'umap', features = features.gaba.set, max.cutoff = "q99", min.cutoff = 'q1') - -``` - -## Project full cells to PCA from sketch assay -```{r, warning=FALSE, message=FALSE} -t3_ProjectEmbedding <- system.time({ - ref.emb <- ProjectCellEmbeddings(query = obj, - reference = obj, - query.assay = 'RNA', - reference.assay = 'sketch', - reduction = 'pca') -obj[['pca.orig']] <- CreateDimReducObject(embeddings = ref.emb, assay = 'RNA') -DefaultAssay(obj) <- 'RNA' -}) - - - -``` - -## Transfer labels and umap from sketch to full data -```{r, warning=FALSE, message=FALSE} -t4_transferLabel <- system.time({ - options(future.globals.maxSize = 1e9) - obj <- TransferSketchLabels(object = obj, - atoms = 'sketch', - reduction = 'pca.orig', - dims = 1:50, - refdata = list(cluster_full = 'sketch_snn_res.0.8'), - reduction.model = 'umap' - ) -}) - -``` - - -```{r} -library(ggplot2) -DimPlot(obj, label = T, reduction = 'ref.umap', group.by = 'predicted.cluster_full', alpha = 0.1) + NoLegend() - -``` - -```{r} - -obj[['pca.nn']] <- Seurat:::NNHelper(data = obj[['pca.orig']]@cell.embeddings[,1:50], - k = 30, - method = "hnsw", - metric = "cosine", - n_threads = 10) -obj <- RunUMAP(obj, nn.name = "pca.nn", reduction.name = 'umap.orig', reduction.key = 'Uo_') - -``` - -```{r} -DimPlot(obj, label = T, reduction = 'umap.orig', group.by = 'predicted.cluster_full', alpha = 0.1) + NoLegend() - -``` - -## sub type clustering -```{r} -obj.sub <- subset(obj, subset = predicted.cluster_full %in% c(5, 12)) -obj.sub[['sketch']] <- NULL -obj.sub[['RNA']] <- CastAssay(object = obj.sub[['RNA']], to = "dgCMatrix" ) -obj.sub <- FindVariableFeatures(obj.sub, layer = 'counts') -obj.sub <- ScaleData(obj.sub) -obj.sub <- RunPCA(obj.sub) -obj.sub <- RunUMAP(obj.sub, dims = 1:30) -obj.sub <- FindNeighbors(obj.sub, dims = 1:30) -obj.sub <- FindClusters(obj.sub) -``` - -```{r} -p <- DimPlot(obj.sub, label = T) + NoLegend() -p -``` - -```{r save.img, include=TRUE} -library(ggplot2) -p <- DimPlot(obj, label = T, label.size=8, reduction = "ref.umap", group.by = "predicted.cluster_full", alpha = 0.1) + NoLegend() -ggsave(filename = "../output/images/MouseBrain_sketch_clustering.jpg", height = 7, width = 7, plot = p, quality = 50) -``` - -```{r save.times, include=TRUE} -print(as.data.frame(all_times)) -write.csv(x = t(as.data.frame(all_times)), file = "../output/timings/MouseBrain_sketch_clustering.csv") -``` - -
    - **Session Info** -```{r} -sessionInfo() -``` -
    From 4a52d8a6c6f5afd03cf3e17c456c411530b840f3 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Thu, 20 Apr 2023 09:33:56 -0400 Subject: [PATCH 623/979] bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 01239acdc..7ae37e49f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9041 -Date: 2023-03-30 +Version: 4.9.9.9042 +Date: 2023-04-20 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From 22981ffa03713f400e363262932dd69a2b17affa Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Thu, 20 Apr 2023 14:50:36 -0400 Subject: [PATCH 624/979] update vignettes --- vignettes/seurat5_integration.Rmd | 6 +++--- vignettes/seurat5_sketch_analysis.Rmd | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/vignettes/seurat5_integration.Rmd b/vignettes/seurat5_integration.Rmd index 74a090485..ec1acd246 100644 --- a/vignettes/seurat5_integration.Rmd +++ b/vignettes/seurat5_integration.Rmd @@ -131,7 +131,7 @@ obj <- IntegrateLayers( ``` ```{r addscvi, include=FALSE} -scvi.reduc <- readRDS("/brahms/haoy/test/pbmcsca_scvi.dr.rds")@cell.embeddings +scvi.reduc <- readRDS("/brahms/haoy/seurat5/object/pbmcsca_scvi.dr.rds")@cell.embeddings scvi.reduc <- scvi.reduc[Cells(obj),] obj[["integrated.scvi"]] <- CreateDimReducObject(embeddings = scvi.reduc) ``` @@ -147,9 +147,9 @@ p1 <- DimPlot( group.by = c("Method", "predicted.celltype.l2", "cca_clusters"), combine = FALSE) -obj <- FindNeighbors(obj, reduction = 'integrated.scvi', dims = 1:10) +obj <- FindNeighbors(obj, reduction = 'integrated.scvi', dims = 1:30) obj <- FindClusters(obj,resolution = 2, cluster.name = 'scvi_clusters') -obj <- RunUMAP(obj, reduction = "integrated.scvi", dims = 1:10, reduction.name = 'umap.scvi') +obj <- RunUMAP(obj, reduction = "integrated.scvi", dims = 1:30, reduction.name = 'umap.scvi') p2 <- DimPlot( obj, reduction = "umap.scvi", group.by = c("Method", "predicted.celltype.l2", "scvi_clusters"), diff --git a/vignettes/seurat5_sketch_analysis.Rmd b/vignettes/seurat5_sketch_analysis.Rmd index c036b4ca2..2c9cbfa4d 100644 --- a/vignettes/seurat5_sketch_analysis.Rmd +++ b/vignettes/seurat5_sketch_analysis.Rmd @@ -121,7 +121,7 @@ FeaturePlot( We can now extend the cluster labels and dimensional reductions learned on the sketched cells to the full dataset. The `ProjectData` function projects the on-disk data, onto the `sketch` assay. It returns a Seurat object that includes a * Dimensional reduction (PCA): The `pca.full` dimensional reduction extends the `pca` reduction on the sketched cells to all cells in the dataset -* Dimensional reduction (UMAP): The `umap.full` dimensional reduction extends the `full` reduction on the sketched cells to all cells in the dataset +* Dimensional reduction (UMAP): The `full.umap` dimensional reduction extends the `full` reduction on the sketched cells to all cells in the dataset * Cluster labels: The `cluster_full` column in the object metadata now labels all cells in the dataset with one of the cluster labels derived from the sketched cells ```{r, warning=FALSE, message=FALSE} From 7f18b5d3f204c12c72fbf3f26aa7ae282f2ef5dc Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Thu, 20 Apr 2023 14:55:00 -0400 Subject: [PATCH 625/979] fix typo --- vignettes/seurat5_sketch_analysis.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/seurat5_sketch_analysis.Rmd b/vignettes/seurat5_sketch_analysis.Rmd index 2c9cbfa4d..ca092445a 100644 --- a/vignettes/seurat5_sketch_analysis.Rmd +++ b/vignettes/seurat5_sketch_analysis.Rmd @@ -121,7 +121,7 @@ FeaturePlot( We can now extend the cluster labels and dimensional reductions learned on the sketched cells to the full dataset. The `ProjectData` function projects the on-disk data, onto the `sketch` assay. It returns a Seurat object that includes a * Dimensional reduction (PCA): The `pca.full` dimensional reduction extends the `pca` reduction on the sketched cells to all cells in the dataset -* Dimensional reduction (UMAP): The `full.umap` dimensional reduction extends the `full` reduction on the sketched cells to all cells in the dataset +* Dimensional reduction (UMAP): The `full.umap` dimensional reduction extends the `umap` reduction on the sketched cells to all cells in the dataset * Cluster labels: The `cluster_full` column in the object metadata now labels all cells in the dataset with one of the cluster labels derived from the sketched cells ```{r, warning=FALSE, message=FALSE} From e75aa5992bdfb788054250a6ecbf5b8f5ccfdb62 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 20 Apr 2023 15:41:55 -0400 Subject: [PATCH 626/979] fix gene index matrix --- R/sketching.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/sketching.R b/R/sketching.R index 63bc27767..22e3e8e0d 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -351,7 +351,8 @@ LeverageScore.default <- function( if (inherits(x = object, what = 'IterableMatrix')) { temp <- tempdir() object.gene_index <- transpose_storage_order(matrix = object, tmpdir = temp) - sa <- as(object = S %*% object, Class = 'dgCMatrix') + sa <- as(object = S %*% object.gene_index, Class = 'dgCMatrix') + rm(object.gene_index) unlink(x = temp, recursive = TRUE) } else { sa <- S %*% object From 9cc8f77902bebbc72a2db181c39f3a0e931d6bc8 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 5 May 2023 16:34:42 -0400 Subject: [PATCH 627/979] fix v5 nFeature var --- R/preprocessing.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/preprocessing.R b/R/preprocessing.R index a72b0a55d..9b72c4283 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -3729,6 +3729,7 @@ FindVariableFeatures.Seurat <- function( num.bin = num.bin, binning.method = binning.method, nfeatures = nfeatures, + nselect = nfeatures, mean.cutoff = mean.cutoff, dispersion.cutoff = dispersion.cutoff, verbose = verbose, From 42ca0ecca7b57596aea02a21493c1b98ea8b1dc8 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 5 May 2023 16:37:56 -0400 Subject: [PATCH 628/979] fix getassay v5 --- R/objects.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/objects.R b/R/objects.R index 427ff2366..bee79b00f 100644 --- a/R/objects.R +++ b/R/objects.R @@ -1502,7 +1502,7 @@ Features.SCTModel <- function(x, ...) { GetAssay.Seurat <- function(object, assay = NULL, ...) { CheckDots(...) assay <- assay %||% DefaultAssay(object = object) - object.assays <- FilterObjects(object = object, classes.keep = 'Assay') + object.assays <- FilterObjects(object = object, classes.keep = c('Assay', 'Assay5')) if (!assay %in% object.assays) { stop(paste0( assay, From 37592397d9941b8ab1864f30c7ff1065949e3706 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Fri, 5 May 2023 16:44:37 -0400 Subject: [PATCH 629/979] update version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 3a4820df6..c7645b183 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9043 -Date: 2023-04-20 +Version: 4.9.9.9044 +Date: 2023-05-05 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From d68d825da32aea1cc093a47e604ddb4c39c4682f Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 9 May 2023 22:41:56 -0400 Subject: [PATCH 630/979] zero variance to 0.01 --- R/preprocessing.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/preprocessing.R b/R/preprocessing.R index 9b72c4283..98bdfe6c5 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -4453,6 +4453,7 @@ ScaleData.IterableMatrix <- function( features.sd <- sqrt(BPCells::matrix_stats( matrix = object, row_stats = 'variance')$row_stats['variance',]) + features.sd[features.sd == 0] <- 0.01 } else { features.sd <- 1 } From b4d3336e133a8e3f3991ca9a99edb0e3cf4c7fab Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 9 May 2023 22:44:12 -0400 Subject: [PATCH 631/979] bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 3a4820df6..0abca3512 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9043 -Date: 2023-04-20 +Version: 4.9.9.9044 +Date: 2023-05-10 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From 2aa0eeb3484cbf67e4bffe1244cbb0f5cedfa0b9 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 10 May 2023 10:57:21 -0400 Subject: [PATCH 632/979] fix no nCount_assay --- R/integration.R | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/R/integration.R b/R/integration.R index da998a59b..cd7b54d4d 100644 --- a/R/integration.R +++ b/R/integration.R @@ -929,8 +929,12 @@ FindTransferAnchors <- function( approx = approx.pca ) } - query_nCount_UMI <- query[[]][, paste0("nCount_", query.assay)] - names(x = query_nCount_UMI) <- colnames(x = query) + if (paste0("nCount_", query.assay) %in% colnames(query[[]])) { + query_nCount_UMI <- query[[]][, paste0("nCount_", query.assay)] + names(x = query_nCount_UMI) <- colnames(x = query) + } else { + query_nCount_UMI <- NULL + } projected.pca <- ProjectCellEmbeddings( reference = reference, reduction = reference.reduction, From 1d9bc5c8a4a8f4073698503d0ed69e691ecd87b2 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 15 May 2023 20:21:19 -0400 Subject: [PATCH 633/979] bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0abca3512..c792c8c81 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9044 -Date: 2023-05-10 +Version: 4.9.9.9045 +Date: 2023-05-15 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From 4dbfa708af125a73647de3d977631446ccfb7e08 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Mon, 15 May 2023 22:36:57 -0400 Subject: [PATCH 634/979] fix v3 cast --- R/sketching.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/sketching.R b/R/sketching.R index 22e3e8e0d..5342bc3d9 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -97,7 +97,7 @@ SketchData <- function( silent = TRUE ) } - if (!is.null(x = cast)) { + if (!is.null(x = cast) && inherits(x = sketched, what = 'Assay5')) { sketched <- CastAssay(object = sketched, to = cast, ...) } Key(object = sketched) <- Key(object = sketched.assay, quiet = TRUE) From 7c5354d225f1b53aa027d22a0422116da1c94044 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 6 Jun 2023 18:07:10 -0400 Subject: [PATCH 635/979] fix tmp bug --- R/sketching.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/sketching.R b/R/sketching.R index 5342bc3d9..53d29e073 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -353,7 +353,7 @@ LeverageScore.default <- function( object.gene_index <- transpose_storage_order(matrix = object, tmpdir = temp) sa <- as(object = S %*% object.gene_index, Class = 'dgCMatrix') rm(object.gene_index) - unlink(x = temp, recursive = TRUE) + unlink(list.files(path = temp, full.names = TRUE)) } else { sa <- S %*% object } From 3b33df999cdd0b6167ba8c743a81b96b80a7102d Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 6 Jun 2023 20:50:38 -0400 Subject: [PATCH 636/979] bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c792c8c81..2056185a2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9045 -Date: 2023-05-15 +Version: 4.9.9.9046 +Date: 2023-06-06 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From c216e6706c2009b5ae1259ca28ae9a41c3cd9e9b Mon Sep 17 00:00:00 2001 From: Gesmira Date: Thu, 8 Jun 2023 10:29:14 -0400 Subject: [PATCH 637/979] remove non-generic LogNormalize --- R/preprocessing.R | 38 -------------------------------------- 1 file changed, 38 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index 082a50027..97723df6b 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -757,44 +757,6 @@ LoadCurioSeeker <- function(data.dir, assay = "Spatial") { return(object) } -#' Normalize raw data -#' -#' Normalize count data per cell and transform to log scale -#' -#' @param data Matrix with the raw count data -#' @param scale.factor Scale the data. Default is 1e4 -#' @param verbose Print progress -#' -#' @return Returns a matrix with the normalize and log transformed data -#' -#' @importFrom methods as -#' -#' @export -#' @concept preprocessing -#' -#' @examples -#' mat <- matrix(data = rbinom(n = 25, size = 5, prob = 0.2), nrow = 5) -#' mat -#' mat_norm <- LogNormalize(data = mat) -#' mat_norm -#' -LogNormalize <- function(data, scale.factor = 1e4, verbose = TRUE) { - if (is.data.frame(x = data)) { - data <- as.matrix(x = data) - } - if (!inherits(x = data, what = 'dgCMatrix')) { - data <- as.sparse(x = data) - } - # call Rcpp function to normalize - if (verbose) { - cat("Performing log-normalization\n", file = stderr()) - } - norm.data <- LogNorm(data, scale_factor = scale.factor, display_progress = verbose) - colnames(x = norm.data) <- colnames(x = data) - rownames(x = norm.data) <- rownames(x = data) - return(norm.data) -} - #' Demultiplex samples based on classification method from MULTI-seq (McGinnis et al., bioRxiv 2018) #' #' Identify singlets, doublets and negative cells from multiplexing experiments. Annotate singlets by tags. From f695de95e8ecf9bc04dad70f235819c72c888510 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Thu, 8 Jun 2023 10:30:44 -0400 Subject: [PATCH 638/979] bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e38549b9b..ce93274ff 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9048 -Date: 2023-06-06 +Version: 4.9.9.9049 +Date: 2023-06-08 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From 797916d196a650134401b2569f51507cc183dd11 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Tue, 13 Jun 2023 08:58:54 -0400 Subject: [PATCH 639/979] bpcells data format bugs --- R/dimensional_reduction.R | 5 +++++ R/integration.R | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R index 847c86104..bfb5550a5 100644 --- a/R/dimensional_reduction.R +++ b/R/dimensional_reduction.R @@ -2016,6 +2016,11 @@ CheckFeatures <- function( if (inherits(x = data.use, what = 'dgCMatrix')) { features.var <- SparseRowVar(mat = data.use[features, ], display_progress = F) } + else if (inherits(x = data.use, what = "IterableMatrix")) { + bp.stats <- BPCells::matrix_stats(matrix = data.use, + row_stats = "variance") + features.var <- bp.stats$row_stats["variance",][features] + } else { features.var <- RowVar(x = data.use[features, ]) } diff --git a/R/integration.R b/R/integration.R index cd7b54d4d..7bde2096f 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5373,7 +5373,7 @@ ProjectSVD <- function( if (verbose) { message("Projecting new data onto SVD") } - projected.u <- as.matrix(x = crossprod(x = vt, y = data)) + projected.u <- as.matrix(t(vt), data) if (mode == "lsi") { components <- slot(object = reduction, name = 'misc') sigma <- components$d From c8e2e6771d9ba6adb333fdce47dd5738594df7ad Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Fri, 30 Jun 2023 15:18:54 -0400 Subject: [PATCH 640/979] remove pre-R-4.1.0 anonymous function shorthand pt2 --- DESCRIPTION | 4 ++-- R/objects.R | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index ce93274ff..9b40479d6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9049 -Date: 2023-06-08 +Version: 4.9.9.9050 +Date: 2023-06-30 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( diff --git a/R/objects.R b/R/objects.R index 8de5b87da..f94a77b87 100644 --- a/R/objects.R +++ b/R/objects.R @@ -524,7 +524,7 @@ DietSeurat <- function( layers <- if (isTRUE(x = dep.args[[lyr]])) { c(layers, lyr) } else { - Filter(f = \(x) x != lyr, x = layers) + Filter(f = function(x) x != lyr, x = layers) } } } From 8aea18ef23182cfc54996a032ef0fad7b1c6788b Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 5 Jul 2023 15:33:19 -0400 Subject: [PATCH 641/979] update sketch docu --- R/generics.R | 14 ++++++++++++++ R/sketching.R | 37 +++++++++++++++++++++++++++++++++++-- man/FindAllMarkers.Rd | 4 ++++ man/Seurat-package.Rd | 1 + 4 files changed, 54 insertions(+), 2 deletions(-) diff --git a/R/generics.R b/R/generics.R index b02c3de6c..95f25638f 100644 --- a/R/generics.R +++ b/R/generics.R @@ -302,8 +302,22 @@ IntegrateEmbeddings <- function(anchorset, ...) { UseMethod(generic = "IntegrateEmbeddings", object = anchorset) } +#' Leverage Score Calculation +#' +#' This function computes the leverage scores for a given object +#' It uses the concept of sketching and random projections. The function provides an approximation +#' to the leverage scores using a scalable method suitable for large matrices. +#' +#' @param object A matrix-like object +#' @param ... Arguments passed to other methods +#' +#' @references Clarkson, K. L. & Woodruff, D. P. +#' Low-rank approximation and regression in input sparsity time. +#' JACM 63, 1–45 (2017). \url{https://dl.acm.org/doi/10.1145/3019134}; +#' #' @export #' +#' LeverageScore <- function(object, ...) { UseMethod(generic = 'LeverageScore', object = object) } diff --git a/R/sketching.R b/R/sketching.R index 53d29e073..1cbd68ff9 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -13,6 +13,26 @@ NULL #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#' Sketch Data +#' +#' This function uses sketching methods to downsample high-dimensional single-cell RNA expression data, +#' which can help with scalability for large datasets. +#' +#' @param object A Seurat object. +#' @param assay Assay name. Default is NULL, in which case the default assay of the object is used. +#' @param ncells A positive integer indicating the number of cells to sample for the sketching. Default is 5000. +#' @param sketched.assay Sketched assay name. A sketch assay is created or overwrite with the sketch data. Default is 'sketch'. +#' @param method Sketching method to use. Can be 'LeverageScore' or 'Uniform'. +#' Default is 'LeverageScore'. +#' @param var.name A metadata column name to store the leverage scores. Default is 'leverage.score'. +#' @param over.write whether to overwrite existing column in the metadata. Default is FALSE. +#' @param seed A positive integer for the seed of the random number generator. Default is 123. +#' @param cast The type to cast the resulting assay to. Default is 'dgCMatrix'. +#' @param verbose Print progress and diagnostic messages +#' @param ... Arguments passed to other methods +#' +#' @return A Seurat object with the sketched data added as a new assay. +#' #' @importFrom SeuratObject CastAssay Key Key<- Layers #' #' @export @@ -290,9 +310,19 @@ TransferSketchLabels <- function( # Methods for Seurat-defined generics #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#' @param nsketch A positive integer. The number of sketches to be used in the approximation. +#' Default is 5000. +#' @param ndims A positive integer or NULL. The number of dimensions to use. If NULL, the number +#' of dimensions will default to the number of columns in the object. +#' @param method The sketching method to use, defaults to CountSketch. +#' @param eps A numeric. The error tolerance for the approximation in Johnson–Lindenstrauss embeddings, +#' defaults to 0.5. +#' @param seed A positive integer. The seed for the random number generator, defaults to 123. +#' @param verbose Print progress and diagnostic messages #' @importFrom Matrix qrR t #' @importFrom irlba irlba -#' +#' +#' @rdname LeverageScore #' @method LeverageScore default #' @export #' @@ -386,6 +416,7 @@ LeverageScore.default <- function( return(Z.score) } +#' @rdname LeverageScore #' @importFrom Matrix qrR t #' @method LeverageScore DelayedMatrix #' @export @@ -450,7 +481,7 @@ LeverageScore.DelayedMatrix <- function( return(scores) } - +#' @rdname LeverageScore #' @method LeverageScore StdAssay #' #' @export @@ -503,11 +534,13 @@ LeverageScore.StdAssay <- function( return(scores) } +#' @rdname LeverageScore #' @method LeverageScore Assay #' @export #' LeverageScore.Assay <- LeverageScore.StdAssay +#' @rdname LeverageScore #' @method LeverageScore Seurat #' @export #' diff --git a/man/FindAllMarkers.Rd b/man/FindAllMarkers.Rd index 622474624..28247dd34 100644 --- a/man/FindAllMarkers.Rd +++ b/man/FindAllMarkers.Rd @@ -22,6 +22,7 @@ FindAllMarkers( latent.vars = NULL, min.cells.feature = 3, min.cells.group = 3, + pseudocount.use = 1, mean.fxn = NULL, fc.name = NULL, base = 2, @@ -112,6 +113,9 @@ of the two groups, currently only used for poisson and negative binomial tests} \item{min.cells.group}{Minimum number of cells in one of the groups} +\item{pseudocount.use}{Pseudocount to add to averaged expression values when +calculating logFC. 0.1 by default.} + \item{mean.fxn}{Function to use for fold change or average difference calculation. If NULL, the appropriate function will be chose according to the slot used} diff --git a/man/Seurat-package.Rd b/man/Seurat-package.Rd index 69a18bf33..79a0fbc5b 100644 --- a/man/Seurat-package.Rd +++ b/man/Seurat-package.Rd @@ -56,6 +56,7 @@ Other contributors: \item Austin Hartman \email{ahartman@nygenome.org} (\href{https://orcid.org/0000-0001-7278-1852}{ORCID}) [contributor] \item Jaison Jain \email{jjain@nygenome.org} (\href{https://orcid.org/0000-0002-9478-5018}{ORCID}) [contributor] \item Madeline Kowalski \email{mkowalski@nygenome.org} (\href{https://orcid.org/0000-0002-5655-7620}{ORCID}) [contributor] + \item Gesmira Molla \email{gmolla@nygenome.org} (\href{https://orcid.org/0000-0002-8628-5056}{ORCID}) [contributor] \item Efthymia Papalexi \email{epapalexi@nygenome.org} (\href{https://orcid.org/0000-0001-5898-694X}{ORCID}) [contributor] \item Patrick Roelli \email{proelli@nygenome.org} [contributor] \item Rahul Satija \email{rsatija@nygenome.org} (\href{https://orcid.org/0000-0001-9448-8833}{ORCID}) [contributor] From a4d874b9c2ba31042dd7a43eb23a7dbf95e71ea8 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 5 Jul 2023 15:50:47 -0400 Subject: [PATCH 642/979] update sketch docu --- R/sketching.R | 60 ++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 52 insertions(+), 8 deletions(-) diff --git a/R/sketching.R b/R/sketching.R index 1cbd68ff9..a44fbf614 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -129,6 +129,29 @@ SketchData <- function( #' Project full data to the sketch assay #' +#' +#' This function allows projection of high-dimensional single-cell RNA expression data from a full dataset +#' onto the lower-dimensional embedding of the sketch of the dataset. +#' +#' @param object A Seurat object. +#' @param assay Assay name for the full data. Default is 'RNA'. +#' @param sketched.assay Sketched assay name to project onto. Default is 'sketch'. +#' @param sketched.reduction Dimensional reduction results of the sketched assay to project onto. +#' @param full.reduction Dimensional reduction name for the projected full dataset. +#' @param dims Dimensions to include in the projection. +#' @param normalization.method Normalization method to use. Can be 'LogNormalize' or 'SCT'. +#' Default is 'LogNormalize'. +#' @param refdata An optional list for label transfer from sketch to full data. Default is NULL. +#' Similar to refdata in `MapQuery` +#' @param k.weight Number of neighbors to consider when weighting labels for transfer. Default is 50. +#' @param umap.model An optional pre-computed UMAP model. Default is NULL. +#' @param recompute.neighbors Whether to recompute the neighbors for label transfer. Default is FALSE. +#' @param recompute.weights Whether to recompute the weights for label transfer. Default is FALSE. +#' @param verbose Print progress and diagnostic messages. +#' +#' @return A Seurat object with the full data projected onto the sketched dimensional reduction results. +#' The projected data are stored in the specified full reduction. +#' #' @export #' ProjectData <- function( @@ -167,7 +190,7 @@ ProjectData <- function( } object <- TransferSketchLabels(object = object, - atoms = sketched.assay, + sketched.assay = sketched.assay, reduction = full.reduction, dims = dims, k = k.weight, @@ -182,11 +205,32 @@ ProjectData <- function( #' Transfer data from sketch data to full data +#' +#' This function transfers cell type labels from a sketched dataset to a full dataset +#' based on the similarities in the lower dimensional space. +#' +#' @param object A Seurat object. +#' @param sketched.assay Sketched assay name. Default is 'sketch'. +#' @param reduction Dimensional reduction name to use for label transfer. +#' @param dims An integer vector indicating which dimensions to use for label transfer. +#' @param refdata A list of character strings indicating the metadata columns containing labels to transfer. Default is NULL. +#' Similar to refdata in `MapQuery` +#' @param k Number of neighbors to use for label transfer. Default is 50. +#' @param reduction.model Dimensional reduction model to use for label transfer. Default is NULL. +#' @param neighbors An object storing the neighbors found during the sketching process. Default is NULL. +#' @param recompute.neighbors Whether to recompute the neighbors for label transfer. Default is FALSE. +#' @param recompute.weights Whether to recompute the weights for label transfer. Default is FALSE. +#' @param verbose Print progress and diagnostic messages +#' +#' @return A Seurat object with transferred labels stored in the metadata. If a UMAP model is provided, +#' the full data are also projected onto the UMAP space, with the results stored in a new reduction, full.`reduction.model` +#' +#' #' @export #' TransferSketchLabels <- function( object, - atoms = 'sketch', + sketched.assay = 'sketch', reduction, dims, refdata = NULL, @@ -208,14 +252,14 @@ TransferSketchLabels <- function( compute.neighbors <- is.null(x = full_sketch.nn) || !all(Cells(full_sketch.nn) == Cells(object[[reduction]])) || - max(Indices(full_sketch.nn)) > ncol(object[[atoms]]) || + max(Indices(full_sketch.nn)) > ncol(object[[sketched.assay]]) || !identical(x = full_sketch.nn@alg.info$dims, y = dims) || !identical(x = full_sketch.nn@alg.info$reduction, y = reduction) || recompute.neighbors compute.weights <- is.null(x = full_sketch.weight) || !all(colnames(full_sketch.weight) == Cells(object[[reduction]])) || - !all(rownames(full_sketch.weight) == colnames(object[[atoms]])) || + !all(rownames(full_sketch.weight) == colnames(object[[sketched.assay]])) || recompute.weights || recompute.neighbors @@ -225,7 +269,7 @@ TransferSketchLabels <- function( } full_sketch.nn <- NNHelper( query = Embeddings(object[[reduction]])[, dims], - data = Embeddings(object[[reduction]])[colnames(object[[atoms]]), dims], + data = Embeddings(object[[reduction]])[colnames(object[[sketched.assay]]), dims], k = k, method = "annoy" ) @@ -238,9 +282,9 @@ TransferSketchLabels <- function( } full_sketch.weight <- FindWeightsNN(nn.obj = full_sketch.nn, query.cells = Cells(object[[reduction]]), - reference = colnames(object[[atoms]]), + reference = colnames(object[[sketched.assay]]), verbose = verbose) - rownames(full_sketch.weight) <- colnames(object[[atoms]]) + rownames(full_sketch.weight) <- colnames(object[[sketched.assay]]) colnames(full_sketch.weight) <- Cells(object[[reduction]]) } slot(object = object, name = 'tools')$TransferSketchLabels$full_sketch.nn <- full_sketch.nn @@ -265,7 +309,7 @@ TransferSketchLabels <- function( if (!label.rd %in% colnames( object[[]])) { stop(label.rd, ' is not in the meta.data') } - reference.labels <- object[[]][colnames(object[[atoms]]), label.rd] + reference.labels <- object[[]][colnames(object[[sketched.assay]]), label.rd] predicted.labels.list <- TransferLablesNN( reference.labels = reference.labels, weight.matrix = full_sketch.weight) From 235001637633193847482ec74012786c276a65da Mon Sep 17 00:00:00 2001 From: yuhanH Date: Wed, 5 Jul 2023 15:52:08 -0400 Subject: [PATCH 643/979] update docu in projectdata --- man/ProjectData.Rd | 36 +++++++++++++++++++++++++++++++++++- man/TransferSketchLabels.Rd | 33 +++++++++++++++++++++++++++++++-- 2 files changed, 66 insertions(+), 3 deletions(-) diff --git a/man/ProjectData.Rd b/man/ProjectData.Rd index 6be416e3b..ee97437a8 100644 --- a/man/ProjectData.Rd +++ b/man/ProjectData.Rd @@ -20,6 +20,40 @@ ProjectData( verbose = TRUE ) } +\arguments{ +\item{object}{A Seurat object.} + +\item{assay}{Assay name for the full data. Default is 'RNA'.} + +\item{sketched.assay}{Sketched assay name to project onto. Default is 'sketch'.} + +\item{sketched.reduction}{Dimensional reduction results of the sketched assay to project onto.} + +\item{full.reduction}{Dimensional reduction name for the projected full dataset.} + +\item{dims}{Dimensions to include in the projection.} + +\item{normalization.method}{Normalization method to use. Can be 'LogNormalize' or 'SCT'. +Default is 'LogNormalize'.} + +\item{refdata}{An optional list for label transfer from sketch to full data. Default is NULL. +Similar to refdata in `MapQuery`} + +\item{k.weight}{Number of neighbors to consider when weighting labels for transfer. Default is 50.} + +\item{umap.model}{An optional pre-computed UMAP model. Default is NULL.} + +\item{recompute.neighbors}{Whether to recompute the neighbors for label transfer. Default is FALSE.} + +\item{recompute.weights}{Whether to recompute the weights for label transfer. Default is FALSE.} + +\item{verbose}{Print progress and diagnostic messages.} +} +\value{ +A Seurat object with the full data projected onto the sketched dimensional reduction results. +The projected data are stored in the specified full reduction. +} \description{ -Project full data to the sketch assay +This function allows projection of high-dimensional single-cell RNA expression data from a full dataset +onto the lower-dimensional embedding of the sketch of the dataset. } diff --git a/man/TransferSketchLabels.Rd b/man/TransferSketchLabels.Rd index 7c3fb1306..087758756 100644 --- a/man/TransferSketchLabels.Rd +++ b/man/TransferSketchLabels.Rd @@ -6,7 +6,7 @@ \usage{ TransferSketchLabels( object, - atoms = "sketch", + sketched.assay = "sketch", reduction, dims, refdata = NULL, @@ -18,6 +18,35 @@ TransferSketchLabels( verbose = TRUE ) } +\arguments{ +\item{object}{A Seurat object.} + +\item{sketched.assay}{Sketched assay name. Default is 'sketch'.} + +\item{reduction}{Dimensional reduction name to use for label transfer.} + +\item{dims}{An integer vector indicating which dimensions to use for label transfer.} + +\item{refdata}{A list of character strings indicating the metadata columns containing labels to transfer. Default is NULL. +Similar to refdata in `MapQuery`} + +\item{k}{Number of neighbors to use for label transfer. Default is 50.} + +\item{reduction.model}{Dimensional reduction model to use for label transfer. Default is NULL.} + +\item{neighbors}{An object storing the neighbors found during the sketching process. Default is NULL.} + +\item{recompute.neighbors}{Whether to recompute the neighbors for label transfer. Default is FALSE.} + +\item{recompute.weights}{Whether to recompute the weights for label transfer. Default is FALSE.} + +\item{verbose}{Print progress and diagnostic messages} +} +\value{ +A Seurat object with transferred labels stored in the metadata. If a UMAP model is provided, +the full data are also projected onto the UMAP space, with the results stored in a new reduction, full.`reduction.model` +} \description{ -Transfer data from sketch data to full data +This function transfers cell type labels from a sketched dataset to a full dataset +based on the similarities in the lower dimensional space. } From e2f65297f9880eb2758611a180657c33565335a8 Mon Sep 17 00:00:00 2001 From: mhkowalski Date: Thu, 6 Jul 2023 10:38:20 -0400 Subject: [PATCH 644/979] add IntegrateEmbeddings parameters --- R/integration5.R | 42 +++++++++++++++++++++++++++++++++++++++--- src/RcppExports.cpp | 2 +- 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/R/integration5.R b/R/integration5.R index 0e913c7eb..5eff4f57b 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -186,6 +186,12 @@ CCAIntegration <- function( k.filter = NA, scale.layer = 'scale.data', verbose = TRUE, + dims.to.integrate = NULL, + k.weight = 100, + weight.reduction = NULL, + sd.weight = 1, + sample.tree = NULL, + preserve.order = FALSE, ...) { op <- options(Seurat.object.assay.version = "v3", Seurat.object.assay.calcn = FALSE) on.exit(expr = options(op), add = TRUE) @@ -226,8 +232,15 @@ CCAIntegration <- function( object_merged <- IntegrateEmbeddings(anchorset = anchor, reductions = orig, new.reduction.name = new.reduction, - verbose = verbose - ) + dims.to.integrate = NULL, + k.weight = 100, + weight.reduction = NULL, + sd.weight = 1, + sample.tree = NULL, + preserve.order = FALSE, + verbose = verbose, + ... + ) output.list <- list(object_merged[[new.reduction]]) names(output.list) <- c(new.reduction) return(output.list) @@ -288,6 +301,12 @@ RPCAIntegration <- function( k.filter = NA, scale.layer = 'scale.data', groups = NULL, + dims.to.integrate = NULL, + k.weight = 100, + weight.reduction = NULL, + sd.weight = 1, + sample.tree = NULL, + preserve.order = FALSE, verbose = TRUE, ...) { op <- options(Seurat.object.assay.version = "v3", Seurat.object.assay.calcn = FALSE) @@ -325,7 +344,6 @@ RPCAIntegration <- function( k.filter = k.filter, reference = reference, verbose = verbose, - ... ) slot(object = anchor, name = "object.list") <- lapply( X = slot( @@ -338,6 +356,12 @@ RPCAIntegration <- function( object_merged <- IntegrateEmbeddings(anchorset = anchor, reductions = orig, new.reduction.name = new.reduction, + dims.to.integrate = NULL, + k.weight = 100, + weight.reduction = NULL, + sd.weight = 1, + sample.tree = NULL, + preserve.order = FALSE, verbose = verbose ) @@ -365,6 +389,12 @@ JointPCAIntegration <- function( dims = 1:30, k.anchor = 20, scale.layer = 'scale.data', + dims.to.integrate = NULL, + k.weight = 100, + weight.reduction = NULL, + sd.weight = 1, + sample.tree = NULL, + preserve.order = FALSE, verbose = TRUE, groups = NULL, ... @@ -421,6 +451,12 @@ JointPCAIntegration <- function( object_merged <- IntegrateEmbeddings(anchorset = anchor, reductions = orig, new.reduction.name = new.reduction, + dims.to.integrate = NULL, + k.weight = 100, + weight.reduction = NULL, + sd.weight = 1, + sample.tree = NULL, + preserve.order = FALSE, verbose = verbose) output.list <- list(object_merged[[new.reduction]]) names(output.list) <- c(new.reduction) diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 7a3302c6b..540e5c2d8 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -402,7 +402,7 @@ BEGIN_RCPP END_RCPP } -RcppExport SEXP isnull(SEXP); +RcppExport SEXP isnull(void *); static const R_CallMethodDef CallEntries[] = { {"_Seurat_RunModularityClusteringCpp", (DL_FUNC) &_Seurat_RunModularityClusteringCpp, 9}, From c5be5c9b73f7984a743dba21db845e74a56c336d Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 6 Jul 2023 10:46:07 -0400 Subject: [PATCH 645/979] fix command bug --- R/integration.R | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/R/integration.R b/R/integration.R index cd7b54d4d..74e38e626 100644 --- a/R/integration.R +++ b/R/integration.R @@ -7616,7 +7616,10 @@ FindBridgeTransferAnchors <- function( reduction <- match.arg(arg = reduction) query.assay <- query.assay %||% DefaultAssay(query) DefaultAssay(query) <- query.assay - params <- Command(object = extended.reference, command = 'PrepareBridgeReference') + command.name <- grep(pattern = 'PrepareBridgeReference', + x = names(slot(object = extended.reference, name = 'commands')), + value = TRUE) + params <- Command(object = extended.reference, command = command.name) bridge.query.assay <- params$bridge.query.assay bridge.query.reduction <- params$bridge.query.reduction %||% params$supervised.reduction reference.reduction <- params$reference.reduction @@ -7695,8 +7698,10 @@ FindBridgeIntegrationAnchors <- function( integration.reduction <- match.arg(arg = integration.reduction) query.assay <- query.assay %||% DefaultAssay(query) DefaultAssay(query) <- query.assay - - params <- Command(object = extended.reference, command = 'PrepareBridgeReference') + command.name <- grep(pattern = 'PrepareBridgeReference', + x = names(slot(object = extended.reference, name = 'commands')), + value = TRUE) + params <- Command(object = extended.reference, command = command.name) bridge.query.assay <- params$bridge.query.assay bridge.query.reduction <- params$bridge.query.reduction %||% params$supervised.reduction reference.reduction <- params$reference.reduction From 8fc19d4e4b564a0844eadf6ada2daae20da9519c Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Thu, 6 Jul 2023 10:46:21 -0400 Subject: [PATCH 646/979] allow suppressable startup message --- DESCRIPTION | 4 ++-- R/zzz.R | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9b40479d6..c1888a97b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9050 -Date: 2023-06-30 +Version: 4.9.9.9051 +Date: 2023-07-06 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( diff --git a/R/zzz.R b/R/zzz.R index 5c20e595d..0470b84ef 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -87,7 +87,7 @@ AttachDeps <- function(deps) { #' .onAttach <- function(libname, pkgname) { AttachDeps(deps = c('SeuratObject')) - message("Loading Seurat v5 beta version \n", + packageStartupMessage("Loading Seurat v5 beta version \n", "To maintain compatibility with previous workflows, new Seurat objects ", "will use the previous object structure by default\n", "To use new Seurat v5 assays: Please run: ", From bb52cb08a92a53e5b0d98830313c03220c9c242a Mon Sep 17 00:00:00 2001 From: Gesmira Date: Thu, 6 Jul 2023 10:50:48 -0400 Subject: [PATCH 647/979] fix cca integration bpcells --- R/integration5.R | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/R/integration5.R b/R/integration5.R index 0e913c7eb..7c56df3d4 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -202,8 +202,23 @@ CCAIntegration <- function( } else { object.list <- list() for (i in seq_along(along.with = layers)) { - object.list[[i]] <- CreateSeuratObject(counts = object[[layers[i]]][features,] ) - object.list[[i]][['RNA']]$scale.data <- object[[scale.layer]][features, Cells(object.list[[i]])] + if (inherits(x = object[[layers[i]]], what = "IterableMatrix")) { + counts <- as(object = object[[layers[i]]][features, ], + Class = "dgCMatrix") + } + else { + counts <- object[[layers[i]]][features, ] + } + object.list[[i]] <- CreateSeuratObject(counts = counts) + if (inherits(x = object[[scale.layer]], what = "IterableMatrix")) { + scale.data.layer <- as.matrix(object[[scale.layer]][features, + Cells(object.list[[i]])]) + object.list[[i]][["RNA"]]$scale.data <- scale.data.layer + } + else { + object.list[[i]][["RNA"]]$scale.data <- object[[scale.layer]][features, + Cells(object.list[[i]])] + } object.list[[i]][['RNA']]$counts <- NULL } } From e2beb2e6a328037e148377fc7517eb895f83ab29 Mon Sep 17 00:00:00 2001 From: rsatija Date: Thu, 6 Jul 2023 11:45:55 -0400 Subject: [PATCH 648/979] Committing from correct machine --- R/visualization.R | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/R/visualization.R b/R/visualization.R index 7adcc6d36..156c0edf7 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -1965,6 +1965,7 @@ FeatureScatter <- function( shuffle = FALSE, seed = 1, group.by = NULL, + split.by = NULL, cols = NULL, pt.size = 1, shape.by = NULL, @@ -1973,6 +1974,7 @@ FeatureScatter <- function( combine = TRUE, slot = 'data', plot.cor = TRUE, + ncol = NULL, raster = NULL, raster.dpi = c(512, 512), jitter = FALSE @@ -2003,11 +2005,14 @@ FeatureScatter <- function( data[, group] <- factor(x = data[, group]) } } + if (!is.null(x = split.by)) { + data[, split.by] <- object[[split.by, drop = TRUE]] + } plots <- lapply( X = group.by, FUN = function(x) { - SingleCorPlot( - data = data[,c(feature1, feature2)], + plot <- SingleCorPlot( + data = data[,c(feature1, feature2,split.by)], col.by = data[, x], cols = cols, pt.size = pt.size, @@ -2019,6 +2024,18 @@ FeatureScatter <- function( raster.dpi = raster.dpi, jitter = jitter ) + if (!is.null(x = split.by)) { + plot <- plot + FacetTheme() + + facet_wrap( + facets = vars(!!sym(x = split.by)), + ncol = if (length(x = group.by) > 1 || is.null(x = ncol)) { + length(x = unique(x = data[, split.by])) + } else { + ncol + } + ) + } + plot } ) if (isTRUE(x = length(x = plots) == 1)) { From a75995970ea5b9f0ac290df32f29c31462ca5c6e Mon Sep 17 00:00:00 2001 From: Gesmira Date: Thu, 6 Jul 2023 12:18:27 -0400 Subject: [PATCH 649/979] fix crossprod --- R/integration.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/integration.R b/R/integration.R index 7bde2096f..d488a41f3 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5373,7 +5373,7 @@ ProjectSVD <- function( if (verbose) { message("Projecting new data onto SVD") } - projected.u <- as.matrix(t(vt), data) + projected.u <- as.matrix(t(vt) %*% data) if (mode == "lsi") { components <- slot(object = reduction, name = 'misc') sigma <- components$d From d0417934b5d9be6416db5c052e25f00a761f75ce Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 6 Jul 2023 14:07:07 -0400 Subject: [PATCH 650/979] remove force recal --- R/clustering.R | 9 --------- man/FindNeighbors.Rd | 6 ------ 2 files changed, 15 deletions(-) diff --git a/R/clustering.R b/R/clustering.R index a9d240c23..c20719b92 100644 --- a/R/clustering.R +++ b/R/clustering.R @@ -507,7 +507,6 @@ FindClusters.Seurat <- function( #' @param nn.eps Error bound when performing nearest neighbor seach using RANN; #' default of 0.0 implies exact nearest neighbor search #' @param verbose Whether or not to print output to the console -#' @param force.recalc Force recalculation of (S)NN. #' @param l2.norm Take L2Norm of the data #' @param cache.index Include cached index in returned Neighbor object #' (only relevant if return.neighbor = TRUE) @@ -535,7 +534,6 @@ FindNeighbors.default <- function( annoy.metric = "euclidean", nn.eps = 0, verbose = TRUE, - force.recalc = FALSE, l2.norm = FALSE, cache.index = FALSE, index = NULL, @@ -645,7 +643,6 @@ FindNeighbors.Assay <- function( annoy.metric = "euclidean", nn.eps = 0, verbose = TRUE, - force.recalc = FALSE, l2.norm = FALSE, cache.index = FALSE, ... @@ -663,7 +660,6 @@ FindNeighbors.Assay <- function( annoy.metric = annoy.metric, nn.eps = nn.eps, verbose = verbose, - force.recalc = force.recalc, l2.norm = l2.norm, return.neighbor = return.neighbor, cache.index = cache.index, @@ -688,7 +684,6 @@ FindNeighbors.dist <- function( annoy.metric = "euclidean", nn.eps = 0, verbose = TRUE, - force.recalc = FALSE, l2.norm = FALSE, cache.index = FALSE, ... @@ -705,7 +700,6 @@ FindNeighbors.dist <- function( n.trees = n.trees, annoy.metric = annoy.metric, verbose = verbose, - force.recalc = force.recalc, l2.norm = l2.norm, return.neighbor = return.neighbor, cache.index = cache.index, @@ -750,7 +744,6 @@ FindNeighbors.Seurat <- function( annoy.metric = "euclidean", nn.eps = 0, verbose = TRUE, - force.recalc = FALSE, do.plot = FALSE, graph.name = NULL, l2.norm = FALSE, @@ -775,7 +768,6 @@ FindNeighbors.Seurat <- function( annoy.metric = annoy.metric, nn.eps = nn.eps, verbose = verbose, - force.recalc = force.recalc, l2.norm = l2.norm, return.neighbor = return.neighbor, cache.index = cache.index, @@ -794,7 +786,6 @@ FindNeighbors.Seurat <- function( annoy.metric = annoy.metric, nn.eps = nn.eps, verbose = verbose, - force.recalc = force.recalc, l2.norm = l2.norm, return.neighbor = return.neighbor, cache.index = cache.index, diff --git a/man/FindNeighbors.Rd b/man/FindNeighbors.Rd index 4771bf893..b6c520bca 100644 --- a/man/FindNeighbors.Rd +++ b/man/FindNeighbors.Rd @@ -23,7 +23,6 @@ FindNeighbors(object, ...) annoy.metric = "euclidean", nn.eps = 0, verbose = TRUE, - force.recalc = FALSE, l2.norm = FALSE, cache.index = FALSE, index = NULL, @@ -42,7 +41,6 @@ FindNeighbors(object, ...) annoy.metric = "euclidean", nn.eps = 0, verbose = TRUE, - force.recalc = FALSE, l2.norm = FALSE, cache.index = FALSE, ... @@ -59,7 +57,6 @@ FindNeighbors(object, ...) annoy.metric = "euclidean", nn.eps = 0, verbose = TRUE, - force.recalc = FALSE, l2.norm = FALSE, cache.index = FALSE, ... @@ -80,7 +77,6 @@ FindNeighbors(object, ...) annoy.metric = "euclidean", nn.eps = 0, verbose = TRUE, - force.recalc = FALSE, do.plot = FALSE, graph.name = NULL, l2.norm = FALSE, @@ -127,8 +123,6 @@ default of 0.0 implies exact nearest neighbor search} \item{verbose}{Whether or not to print output to the console} -\item{force.recalc}{Force recalculation of (S)NN.} - \item{l2.norm}{Take L2Norm of the data} \item{cache.index}{Include cached index in returned Neighbor object From b7186951fa93eac459e28c9cf6460b9f6890e053 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Thu, 6 Jul 2023 14:19:53 -0400 Subject: [PATCH 651/979] update documentation, syntax, etc. for CRAN --- DESCRIPTION | 10 ++- NAMESPACE | 14 ++++ R/differential_expression.R | 10 +-- R/dimensional_reduction.R | 1 + R/generics.R | 3 +- R/integration.R | 84 +++++++++++++----------- R/integration5.R | 68 ++++++++++--------- R/preprocessing.R | 24 ++++--- R/preprocessing5.R | 66 +++++++++++++------ R/sketching.R | 73 +++++++++++---------- R/visualization.R | 3 +- R/zzz.R | 2 +- man/CCAIntegration.Rd | 29 +++++++++ man/DISP.Rd | 8 +++ man/DimPlot.Rd | 2 +- man/FindBridgeTransferAnchors.Rd | 7 ++ man/HarmonyIntegration.Rd | 8 ++- man/LeverageScore.Rd | 108 +++++++++++++++++++++++++++++++ man/LogNormalize.Rd | 2 + man/MVP.Rd | 15 ++++- man/RPCAIntegration.Rd | 20 +++--- man/ReadMtx.Rd | 9 +++ man/RidgePlot.Rd | 2 + man/RunSPCA.Rd | 2 + man/SketchData.Rd | 51 +++++++++++++++ man/UnSketchEmbeddings.Rd | 11 ++++ man/VlnPlot.Rd | 2 + 27 files changed, 478 insertions(+), 156 deletions(-) create mode 100644 man/LeverageScore.Rd create mode 100644 man/SketchData.Rd diff --git a/DESCRIPTION b/DESCRIPTION index c1888a97b..b9ec3a183 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -86,7 +86,14 @@ Imports: tibble, tools, utils, - uwot (>= 0.1.10) + uwot (>= 0.1.10), + DelayedArray, + TileDBArray, + harmony, + presto, + rhdf5, + HDF5Array, + BPCells (>= 0.0.0.9000) LinkingTo: Rcpp (>= 0.11.0), RcppEigen, RcppProgress License: MIT + file LICENSE LazyData: true @@ -137,6 +144,5 @@ Suggests: enrichR, mixtools, ggrastr, - BPCells (>= 0.0.0.9000), data.table, R.utils diff --git a/NAMESPACE b/NAMESPACE index 67bd7728b..b163c323f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -478,6 +478,13 @@ importClassesFrom(SeuratObject,Neighbor) importClassesFrom(SeuratObject,Seurat) importClassesFrom(SeuratObject,SeuratCommand) importClassesFrom(SeuratObject,SpatialImage) +importFrom(BPCells,matrix_stats) +importFrom(BPCells,transpose_storage_order) +importFrom(DelayedArray,is_sparse) +importFrom(DelayedArray,path) +importFrom(DelayedArray,setAutoBlockSize) +importFrom(HDF5Array,HDF5RealizationSink) +importFrom(HDF5Array,TENxRealizationSink) importFrom(KernSmooth,bkde) importFrom(MASS,ginv) importFrom(MASS,glm.nb) @@ -551,6 +558,7 @@ importFrom(SeuratObject,DefaultFOV) importFrom(SeuratObject,DefaultLayer) importFrom(SeuratObject,Distances) importFrom(SeuratObject,Embeddings) +importFrom(SeuratObject,EmptyDF) importFrom(SeuratObject,Features) importFrom(SeuratObject,FetchData) importFrom(SeuratObject,GetAssayData) @@ -564,6 +572,7 @@ importFrom(SeuratObject,Indices) importFrom(SeuratObject,IsGlobal) importFrom(SeuratObject,IsSparse) importFrom(SeuratObject,JS) +importFrom(SeuratObject,JoinLayers) importFrom(SeuratObject,Key) importFrom(SeuratObject,Keys) importFrom(SeuratObject,LayerData) @@ -578,6 +587,7 @@ importFrom(SeuratObject,PackageCheck) importFrom(SeuratObject,Project) importFrom(SeuratObject,Radius) importFrom(SeuratObject,Reductions) +importFrom(SeuratObject,RenameAssays) importFrom(SeuratObject,RenameCells) importFrom(SeuratObject,RenameIdents) importFrom(SeuratObject,ReorderIdent) @@ -598,6 +608,7 @@ importFrom(SeuratObject,as.Graph) importFrom(SeuratObject,as.Neighbor) importFrom(SeuratObject,as.Seurat) importFrom(SeuratObject,as.sparse) +importFrom(TileDBArray,TileDBRealizationSink) importFrom(cluster,clara) importFrom(cowplot,get_legend) importFrom(cowplot,plot_grid) @@ -726,6 +737,7 @@ importFrom(grid,pointsGrob) importFrom(grid,rasterGrob) importFrom(grid,unit) importFrom(grid,viewport) +importFrom(harmony,HarmonyMatrix) importFrom(httr,GET) importFrom(httr,accept_json) importFrom(httr,build_url) @@ -785,10 +797,12 @@ importFrom(plotly,layout) importFrom(plotly,plot_ly) importFrom(plotly,raster2uri) importFrom(png,readPNG) +importFrom(presto,wilcoxauc) importFrom(progressr,progressor) importFrom(reticulate,import) importFrom(reticulate,py_module_available) importFrom(reticulate,py_set_seed) +importFrom(rhdf5,h5delete) importFrom(rlang,"!!") importFrom(rlang,abort) importFrom(rlang,arg_match) diff --git a/R/differential_expression.R b/R/differential_expression.R index a4d2837c1..5db0df637 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -2088,6 +2088,7 @@ PerformDE <- function( #' @param verbose Print messages and progress #' @importFrom Matrix Matrix #' @importFrom sctransform correct_counts +#' @importFrom SeuratObject JoinLayers #' #' @return Returns a Seurat object with recorrected counts and data in the SCT assay. #' @export @@ -2165,8 +2166,9 @@ PrepSCTFindMarkers <- function(object, assay = "SCT", verbose = TRUE) { } umi.layers <- Layers(object = object, assay = umi.assay, search = 'counts') if (length(x = umi.layers) > 1) { - object[[umi.assay]] <- JoinLayers(object = object[[umi.assay]], - layers = "counts", new = "counts") + object[[umi.assay]] <- JoinLayers( + object = object[[umi.assay]], + layers = "counts", new = "counts") } raw_umi <- GetAssayData(object = object, assay = umi.assay, slot = "counts") corrected_counts <- Matrix( @@ -2221,7 +2223,6 @@ PrepSCTFindMarkers <- function(object, assay = "SCT", verbose = TRUE) { } PrepSCTFindMarkers.V5 <- function(object, assay = "SCT", umi.assay = "RNA", layer = "counts", verbose = TRUE) { - layers <- Layers(object = object[[umi.assay]], search = layer) dataset.names <- gsub(pattern = paste0(layer, "."), replacement = "", x = layers) for (i in seq_along(along.with = layers)) { @@ -2231,7 +2232,7 @@ PrepSCTFindMarkers.V5 <- function(object, assay = "SCT", umi.assay = "RNA", laye layer = l ) } - cells.grid <- DelayedArray::colAutoGrid(x = counts, ncol = min(ncells, ncol(counts))) + cells.grid <- DelayedArray::colAutoGrid(x = counts, ncol = min(length(Cells(object)), ncol(counts))) } # given a UMI count matrix, estimate NB theta parameter for each gene @@ -2355,6 +2356,7 @@ ValidateCellGroups <- function( #' @importFrom stats wilcox.test #' @importFrom future.apply future_sapply #' @importFrom future nbrOfWorkers +#' @importFrom presto wilcoxauc # # @export # diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R index 847c86104..f1d049855 100644 --- a/R/dimensional_reduction.R +++ b/R/dimensional_reduction.R @@ -2523,6 +2523,7 @@ RunSPCA.Assay <- function( #' @param features Features to compute SPCA on. If features=NULL, SPCA will be run #' using the variable features for the Assay. +#' @param layer Layer to run SPCA on #' #' @rdname RunSPCA #' @concept dimensional_reduction diff --git a/R/generics.R b/R/generics.R index 95f25638f..35692a2c8 100644 --- a/R/generics.R +++ b/R/generics.R @@ -326,6 +326,7 @@ LeverageScore <- function(object, ...) { #' #' @param data Matrix with the raw count data #' @param scale.factor Scale the data; default is \code{1e4} +#' @param margin Margin to normalize over #' @param verbose Print progress #' #' @return A matrix with the normalized and log-transformed data @@ -344,7 +345,7 @@ LeverageScore <- function(object, ...) { LogNormalize <- function( data, scale.factor = 1e4, - # margin = 2L, + margin = 2L, verbose = TRUE, ... ) { diff --git a/R/integration.R b/R/integration.R index cd7b54d4d..bfd192c30 100644 --- a/R/integration.R +++ b/R/integration.R @@ -712,6 +712,7 @@ ReciprocalProject <- function( #' #' @export #' @importFrom methods slot slot<- +#' @importFrom SeuratObject JoinLayers RenameAssays #' @concept integration #' @examples #' \dontrun{ @@ -806,8 +807,8 @@ FindTransferAnchors <- function( reference.reduction.init <- reference.reduction if (inherits(x = reference[[reference.assay]], what = 'Assay5')) { if (length(Layers(reference, search = "data")) > 1) { - reference[[reference.assay]] <- JoinLayers(reference[[reference.assay]], - layers = "data", new = "data") + reference[[reference.assay]] <- JoinLayers(reference[[reference.assay]], + layers = "data", new = "data") } } if (normalization.method == "SCT") { @@ -1994,26 +1995,25 @@ ProjectIntegration <- function( } emb <- UnSketchEmbeddings( atom.data = LayerData( - object = object[[sketched.assay]], - layer = layers[i], - features = features - ), - atom.cells = cells.sketch, - orig.data = LayerData( - object = object[[assay]], - layer = layers[i], - features = features - ), - embeddings = Embeddings(object = object[[reduction]]), - sketch.matrix = sketch.matrix - ) + object = object[[sketched.assay]], + layer = layers[i], + features = features + ), + atom.cells = cells.sketch, + orig.data = LayerData( + object = object[[assay]], + layer = layers[i], + features = features + ), + embeddings = Embeddings(object = object[[reduction]]), + sketch.matrix = sketch.matrix) emb.list[[i]] <- emb cells.list[[i]] <- colnames(x = emb) } - emb.all <- t(matrix(data = unlist(emb.list), - nrow = ncol(x = object[[reduction]]), - ncol = length(unlist(cells.list)) - )) + emb.all <- t(x = matrix( + data = unlist(emb.list), + nrow = ncol(x = object[[reduction]]), + ncol = length(unlist(cells.list)))) rownames(emb.all) <- unlist(cells.list) emb.all <- emb.all[colnames(object[[assay]]), ] object[[reduction.name]] <- CreateDimReducObject( @@ -4465,7 +4465,7 @@ FindWeightsNN <- function( distances <- Distances(object = nn.obj) distances <- 1 - (distances / distances[, ncol(x = distances)]) cell.index <- Indices(object = nn.obj) - weights <- Seurat:::FindWeightsC( + weights <- FindWeightsC( cells2 = 0:(length(query.cells) - 1), distances = as.matrix(x = distances), anchor_cells2 = reference.cells, @@ -5276,7 +5276,7 @@ ProjectCellEmbeddings.IterableMatrix <- function( #' @export #' ProjectCellEmbeddings.DelayedMatrix <- function( - query.data, + query, block.size = 1e9, reference, assay = NULL, @@ -5288,11 +5288,11 @@ ProjectCellEmbeddings.DelayedMatrix <- function( ) { dims <- dims %||% 1:ncol(reference[[reduction]]) assay <- assay %||% DefaultAssay(reference) - features <- intersect(rownames(query.data), + features <- intersect(rownames(query), rownames(reference[[reduction]]@feature.loadings)) - query.data <- query.data[features,] + query <- query[features,] if (IsSCT(object[[assay]])) { - # TODO: SCT reiduals projection + # TODO: SCT residuals projection } else { feature.mean <- feature.mean[features] %||% RowMeanSparse(mat = LayerData(object = reference[[assay]], layer = 'data')[features,]) @@ -5300,11 +5300,11 @@ ProjectCellEmbeddings.DelayedMatrix <- function( sqrt(RowVarSparse(mat = LayerData(object = reference[[assay]], layer = 'data')[features,])) feature.sd <- MinMax(feature.sd, max = max(feature.sd), min = 0.1) suppressMessages(setAutoBlockSize(size = block.size)) - cells.grid <- DelayedArray::colAutoGrid(x = query.data) + cells.grid <- DelayedArray::colAutoGrid(x = query) emb.list <- list() for (i in seq_len(length.out = length(x = cells.grid))) { vp <- cells.grid[[i]] - data.block <- DelayedArray::read_block(x = query.data, + data.block <- DelayedArray::read_block(x = query, viewport = vp, as.sparse = TRUE) data.block <- apply(data.block, MARGIN = 2, function(x) { @@ -5315,8 +5315,8 @@ ProjectCellEmbeddings.DelayedMatrix <- function( emb.list[[i]] <- emb.block } # list to matrix, column has to be cells - emb.mat <- t(matrix(data = unlist(emb.list), nrow = length(dims) , ncol = ncol(query.data))) - rownames(emb.mat) <- colnames(query.data) + emb.mat <- t(matrix(data = unlist(emb.list), nrow = length(dims) , ncol = ncol(query))) + rownames(emb.mat) <- colnames(query) colnames(emb.mat) <- colnames(reference[[reduction]]@cell.embeddings)[dims] } return(emb.mat) @@ -7595,6 +7595,12 @@ PrepareBridgeReference <- function ( #' and query. #' } #' } +#' @param bridge.reduction Dimensional reduction to perform when finding anchors. Can +#' be one of: +#' \itemize{ +#' \item{cca: Canonical correlation analysis} +#' \item{direct: Use assay data as a dimensional reduction} +#' } #' @param verbose Print messages and progress #' #' @export @@ -7855,19 +7861,26 @@ FastRPCAIntegration <- function( #' Transfer embeddings from sketched cells to the full data #' +#' @param atom.data Atom data +#' @param atom.cells Atom cells +#' @param orig.data Original data +#' @param embeddings Embeddings of atom cells +#' @param sketch.matrix Sketch matrix +#' #' @importFrom MASS ginv #' @importFrom Matrix t #' #' @export #' -UnSketchEmbeddings <- function(atom.data, - atom.cells = NULL, - orig.data, - embeddings, - sketch.matrix = NULL +UnSketchEmbeddings <- function( + atom.data, + atom.cells = NULL, + orig.data, + embeddings, + sketch.matrix = NULL ) { if(!all(rownames(atom.data) == rownames(orig.data))) { - stop('fetures in atom.data and orig.data are not identical') + stop('features in atom.data and orig.data are not identical') } else { features = rownames(atom.data) } @@ -7892,7 +7905,6 @@ UnSketchEmbeddings <- function(atom.data, return(emb) } - FeatureSketch <- function(features, ratio = 0.8, seed = 123) { sketch.R <- t(x = CountSketch( nsketch = round(x = ratio * length(x = features)), @@ -7901,5 +7913,3 @@ FeatureSketch <- function(features, ratio = 0.8, seed = 123) { ) return(sketch.R) } - - diff --git a/R/integration5.R b/R/integration5.R index 0e913c7eb..ada4c65a4 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -22,6 +22,7 @@ NULL #' @param features Ignored #' @param scale.layer Ignored #' @param layers Ignored +#' @param key Key for Harmony dimensional reduction #' @param ... Ignored #' #' @return ... @@ -33,7 +34,9 @@ NULL # @templateVar pkg harmony # @template note-reqdpkg #' -#' @examples +#' @importFrom harmony HarmonyMatrix +#' +#' @examples #' \dontrun{ #' # Preprocessing #' obj <- SeuratData::LoadData("pbmcsca") @@ -42,24 +45,24 @@ NULL #' obj <- FindVariableFeatures(obj) #' obj <- ScaleData(obj) #' obj <- RunPCA(obj) -#' +#' #' # After preprocessing, we integrate layers with added parameters specific to Harmony: #' obj <- IntegrateLayers(object = obj, method = HarmonyIntegration, orig.reduction = "pca", #' new.reduction = 'harmony', verbose = FALSE) -#' +#' #' # Modifying Parameters #' # We can also add arguments specific to Harmony such as theta, to give more diverse clusters #' obj <- IntegrateLayers(object = obj, method = HarmonyIntegration, orig.reduction = "pca", #' new.reduction = 'harmony', verbose = FALSE, theta = 3) #' } -#' +#' #' # Integrating SCTransformed data #' obj <- SCTransform(object = obj) -#' obj <- IntegrateLayers(object = obj, method = HarmonyIntegration, -#' orig.reduction = "pca", new.reduction = 'harmony', +#' obj <- IntegrateLayers(object = obj, method = HarmonyIntegration, +#' orig.reduction = "pca", new.reduction = 'harmony', #' assay = "SCT", verbose = FALSE) -#' -#' +#' +#' #' @export #' #' @concept integration @@ -108,7 +111,7 @@ HarmonyIntegration <- function( # verbose = verbose # ) # Run Harmony - harmony.embed <- harmony::HarmonyMatrix( + harmony.embed <- HarmonyMatrix( data_mat = Embeddings(object = orig), meta_data = groups, vars_use = 'group', @@ -144,7 +147,7 @@ attr(x = HarmonyIntegration, which = 'Seurat.method') <- 'integration' #' #' @inheritParams FindIntegrationAnchors #' @export -#' +#' #' @examples #' \dontrun{ #' # Preprocessing @@ -154,24 +157,25 @@ attr(x = HarmonyIntegration, which = 'Seurat.method') <- 'integration' #' obj <- FindVariableFeatures(obj) #' obj <- ScaleData(obj) #' obj <- RunPCA(obj) -#' -#' # After preprocessing, we integrate layers. -#' obj <- IntegrateLayers(object = obj, method = CCAIntegration, -#' orig.reduction = "pca", new.reduction = 'integrated.cca', +#' +#' # After preprocessing, we integrate layers. +#' obj <- IntegrateLayers(object = obj, method = CCAIntegration, +#' orig.reduction = "pca", new.reduction = "integrated.cca", #' verbose = FALSE) -#' +#' #' # Modifying parameters -#' # We can also specify parameters such as `k.anchor` to increase the strength of integration -#' obj <- IntegrateLayers(object = obj, method = CCAIntegration, -#' orig.reduction = "pca", new.reduction = 'integrated.cca', +#' # We can also specify parameters such as `k.anchor` to increase the strength of integration +#' obj <- IntegrateLayers(object = obj, method = CCAIntegration, +#' orig.reduction = "pca", new.reduction = "integrated.cca", #' k.anchor = 20, verbose = FALSE) #' #' # Integrating SCTransformed data #' obj <- SCTransform(object = obj) -#' obj <- IntegrateLayers(object = obj, method = CCAIntegration, -#' orig.reduction = "pca", new.reduction = 'integrated.cca', +#' obj <- IntegrateLayers(object = obj, method = CCAIntegration, +#' orig.reduction = "pca", new.reduction = "integrated.cca", #' assay = "SCT", verbose = FALSE) -#' +#' } +#' CCAIntegration <- function( object = NULL, assay = NULL, @@ -246,32 +250,32 @@ attr(x = CCAIntegration, which = 'Seurat.method') <- 'integration' #' obj <- FindVariableFeatures(obj) #' obj <- ScaleData(obj) #' obj <- RunPCA(obj) -#' +#' #' # After preprocessing, we run integration -#' obj <- IntegrateLayers(object = obj, method = RPCAIntegration, -#' orig.reduction = "pca", new.reduction = 'integrated.rpca', +#' obj <- IntegrateLayers(object = obj, method = RPCAIntegration, +#' orig.reduction = "pca", new.reduction = 'integrated.rpca', #' verbose = FALSE) -#' +#' #' # Reference-based Integration #' # Here, we use the first layer as a reference for integraion #' # Thus, we only identify anchors between the reference and the rest of the datasets, saving computational resources -#' obj <- IntegrateLayers(object = obj, method = RPCAIntegration, -#' orig.reduction = "pca", new.reduction = 'integrated.rpca', +#' obj <- IntegrateLayers(object = obj, method = RPCAIntegration, +#' orig.reduction = "pca", new.reduction = 'integrated.rpca', #' reference = 1, verbose = FALSE) #' #' # Modifying parameters #' # We can also specify parameters such as `k.anchor` to increase the strength of integration -#' obj <- IntegrateLayers(object = obj, method = RPCAIntegration, -#' orig.reduction = "pca", new.reduction = 'integrated.rpca', +#' obj <- IntegrateLayers(object = obj, method = RPCAIntegration, +#' orig.reduction = "pca", new.reduction = 'integrated.rpca', #' k.anchor = 20, verbose = FALSE) #' #' # Integrating SCTransformed data #' obj <- SCTransform(object = obj) -#' obj <- IntegrateLayers(object = obj, method = RPCAIntegration, -#' orig.reduction = "pca", new.reduction = 'integrated.rpca', +#' obj <- IntegrateLayers(object = obj, method = RPCAIntegration, +#' orig.reduction = "pca", new.reduction = 'integrated.rpca', #' assay = "SCT", verbose = FALSE) #' } -#' +#' #' @inheritParams FindIntegrationAnchors #' @export #' diff --git a/R/preprocessing.R b/R/preprocessing.R index 97723df6b..064565efa 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -530,14 +530,11 @@ Load10X_Spatial <- function( immediate. = TRUE) data.dir <- data.dir[1] } - data <- Read10X_h5(filename = file.path(data.dir, filename), - ...) - + data <- Read10X_h5(filename = file.path(data.dir, filename), ...) if (to.upper) { - data <- imap(data, ~{ - rownames(.x) <- toupper(rownames(.x)) - .x - }) + for(i in seq_along(data)) { + rownames(data[[i]]) <- toupper(rownames(data[[i]])) + } } if (is.list(data) & "Antibody Capture" %in% names(data)) { matrix_gex <- data$`Gene Expression` @@ -1503,8 +1500,11 @@ ReadAkoya <- function( #' @param features Name or remote URL of the features/genes file #' @param cell.column Specify which column of cells file to use for cell names; default is 1 #' @param feature.column Specify which column of features files to use for feature/gene names; default is 2 +#' @param cell.sep Specify the delimiter in the cell name file +#' @param feature.sep Specify the delimiter in the feature name file #' @param skip.cell Number of lines to skip in the cells file before beginning to read cell names #' @param skip.feature Number of lines to skip in the features file before beginning to gene names +#' @param mtx.transpose Transpose the matrix after reading in #' @param unique.features Make feature names unique (default TRUE) #' @param strip.suffix Remove trailing "-1" if present in all cell barcodes. #' @@ -1543,8 +1543,11 @@ ReadMtx <- function( features, cell.column = 1, feature.column = 2, + cell.sep = "\t", + feature.sep = "\t", skip.cell = 0, skip.feature = 0, + mtx.transpose = FALSE, unique.features = TRUE, strip.suffix = FALSE ) { @@ -1585,14 +1588,14 @@ ReadMtx <- function( cell.barcodes <- read.table( file = all.files[['barcode list']], header = FALSE, - sep = '\t', + sep = cell.sep, row.names = NULL, skip = skip.cell ) feature.names <- read.table( file = all.files[['feature list']], header = FALSE, - sep = '\t', + sep = feature.sep, row.names = NULL, skip = skip.feature ) @@ -1663,6 +1666,9 @@ ReadMtx <- function( feature.names <- make.unique(names = feature.names) } data <- readMM(file = all.files[['expression matrix']]) + if (mtx.transpose) { + data <- t(x = data) + } if (length(x = cell.names) != ncol(x = data)) { stop( "Matrix has ", diff --git a/R/preprocessing5.R b/R/preprocessing5.R index df7b33e6a..42a7c456f 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -241,6 +241,9 @@ FindSpatiallyVariableFeatures.StdAssay <- function( #' @rdname LogNormalize #' @method LogNormalize default +#' +#' @param margin Margin to normalize over +#' #' @export #' LogNormalize.default <- function( @@ -369,6 +372,9 @@ LogNormalize.H5ADMatrix <- function( } #' @method LogNormalize HDF5Matrix +#' @importFrom rhdf5 h5delete +#' @importFrom DelayedArray path is_sparse +#' @importFrom HDF5Array HDF5RealizationSink #' @export #' LogNormalize.HDF5Matrix <- function( @@ -380,20 +386,20 @@ LogNormalize.HDF5Matrix <- function( ... ) { check_installed(pkg = 'HDF5Array', reason = 'for working with HDF5 matrices') - fpath <- DelayedArray::path(object = data) + fpath <- path(object = data) if (.DelayedH5DExists(object = data, path = layer)) { - rhdf5::h5delete(file = fpath, name = layer) + h5delete(file = fpath, name = layer) dpath <- file.path( dirname(path = layer), paste0('.', basename(layer), '_dimnames'), fsep = '/' ) - rhdf5::h5delete(file = fpath, name = dpath) + h5delete(file = fpath, name = dpath) } - sink <- HDF5Array::HDF5RealizationSink( + sink <- HDF5RealizationSink( dim = dim(x = data), dimnames = dimnames(x = data), - as.sparse = DelayedArray::is_sparse(x = data), + as.sparse = is_sparse(x = data), filepath = fpath, name = layer ) @@ -423,7 +429,9 @@ LogNormalize.IterableMatrix <- function( data <- log1p(data * scale.factor) return(data) } + #' @method LogNormalize TileDBMatrix +#' @importFrom TileDBArray TileDBRealizationSink #' @export #' LogNormalize.TileDBMatrix <- function( @@ -435,8 +443,8 @@ LogNormalize.TileDBMatrix <- function( ... ) { check_installed( - pkg = 'TileDBArray', - reason = 'for working with TileDB matrices' + pkg = "TileDBArray", + reason = "for working with TileDB matrices" ) odir <- c( dirname(path = DelayedArray::path(object = data)), @@ -446,14 +454,15 @@ LogNormalize.TileDBMatrix <- function( # file.access returns 0 (FALSE) for true and -1 (TRUE) for false idx <- which(x = !file.access(names = odir, mode = 2L))[1L] if (rlang::is_na(x = idx)) { - abort(message = "Unable to find a directory to write normalized TileDB matrix") + abort( + message = "Unable to find a directory to write normalized TileDB matrix") } out <- file.path(odir[idx], layer) if (!file.access(names = out, mode = 0L)) { warn(message = paste(sQuote(x = out), "exists, overwriting")) unlink(x = out, recursive = TRUE, force = TRUE) } - sink <- TileDBArray::TileDBRealizationSink( + sink <- TileDBRealizationSink( dim = dim(x = data), dimnames = dimnames(x = data), type = BiocGenerics::type(x = data), @@ -474,12 +483,15 @@ LogNormalize.TileDBMatrix <- function( #' @method LogNormalize TENxMatrix #' @export +#' @importFrom HDF5Array TENxRealizationSink +#' @importFrom rhdf5 h5delete +#' @importFrom DelayedArray path #' LogNormalize.TENxMatrix <- function( data, scale.factor = 1e4, margin = 2L, - verbose= TRUE, + verbose = TRUE, layer = 'data', ... ) { @@ -786,10 +798,12 @@ VST.default <- function( #' @rdname VST #' @method VST IterableMatrix +#' @importFrom SeuratObject EmptyDF #' @export #' -VST.IterableMatrix <-function( +VST.IterableMatrix <- function( data, + margin = 1L, nselect = 2000L, span = 0.3, clip = NULL, @@ -797,14 +811,14 @@ VST.IterableMatrix <-function( ... ) { nfeatures <- nrow(x = data) - hvf.info <- SeuratObject:::EmptyDF(n = nfeatures) + hvf.info <- SeuratObject::EmptyDF(n = nfeatures) hvf.stats <- BPCells::matrix_stats( matrix = data, row_stats = 'variance')$row_stats # Calculate feature means - hvf.info$mean <- hvf.stats['mean',] + hvf.info$mean <- hvf.stats['mean' ] # Calculate feature variance - hvf.info$variance <- hvf.stats['variance',] + hvf.info$variance <- hvf.stats['variance', ] hvf.info$variance.expected <- 0L not.const <- hvf.info$variance > 0 fit <- loess( @@ -818,11 +832,11 @@ VST.IterableMatrix <-function( standard.max <- clip %||% sqrt(x = ncol(x = data)) feature.mean[feature.mean == 0] <- 0.1 data <- BPCells::min_by_row(mat = data, vals = standard.max*feature.sd + feature.mean) - data.standard <- (data - feature.mean)/feature.sd + data.standard <- (data - feature.mean) / feature.sd hvf.info$variance.standardized <- BPCells::matrix_stats( matrix = data.standard, row_stats = 'variance' - )$row_stats['variance',] + )$row_stats['variance', ] # Set variable features hvf.info$variable <- FALSE hvf.info$rank <- NA @@ -930,7 +944,7 @@ VST.dgCMatrix <- function( ... ) { nfeatures <- nrow(x = data) - hvf.info <- SeuratObject:::EmptyDF(n = nfeatures) + hvf.info <- SeuratObject::EmptyDF(n = nfeatures) # Calculate feature means hvf.info$mean <- Matrix::rowMeans(x = data) # Calculate feature variance @@ -1066,6 +1080,11 @@ CalcN <- function(object) { #' Find variable features based on dispersion #' +#' @param data Data matrix +#' @param nselect Number of top features to select based on dispersion values +#' @param verbose Display progress +#' @keywords internal +#' DISP <- function( data, nselect = 2000L, @@ -1322,7 +1341,7 @@ DISP <- function( mean.func <- .Mean var.func <- .FeatureVar } - hvf.info <- SeuratObject:::EmptyDF(n = nfeatures) + hvf.info <- SeuratObject::EmptyDF(n = nfeatures) # hvf.info$mean <- mean.func(data = data, margin = fmargin) hvf.info$mean <- rowMeans(x = data) hvf.info$variance <- var.func( @@ -2239,13 +2258,20 @@ FetchResiduals_reference <- function(object, #' Find variable features based on mean.var.plot #' +#' @param data Data matrix +#' @param nselect Number of features to select based on dispersion values +#' @param verbose Whether to print messages and progress bars +#' @param mean.cutoff Numeric of length two specifying the min and max values +#' @param dispersion.cutoff Numeric of length two specifying the min and max values +#' +#' @keywords internal +#' MVP <- function( data, verbose = TRUE, nselect = 2000L, mean.cutoff = c(0.1, 8), - dispersion.cutoff = c(1, Inf), - ... + dispersion.cutoff = c(1, Inf) ) { hvf.info <- DISP(data = data, nselect = nselect, verbose = verbose) hvf.info$variable <- FALSE diff --git a/R/sketching.R b/R/sketching.R index a44fbf614..87746f902 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -174,32 +174,32 @@ ProjectData <- function( message(full.reduction, ' is not in the object.' ,' Data from all cells will be projected to ', sketched.reduction) } - proj.emb <- ProjectCellEmbeddings(query = object, - reference = object, - query.assay = assay, - dims = dims, - normalization.method = normalization.method, - reference.assay = sketched.assay, - reduction = sketched.reduction, - verbose = verbose) + proj.emb <- ProjectCellEmbeddings( + query = object, + reference = object, + query.assay = assay, + dims = dims, + normalization.method = normalization.method, + reference.assay = sketched.assay, + reduction = sketched.reduction, + verbose = verbose) object[[full.reduction]] <- CreateDimReducObject( embeddings = proj.emb, assay = assay, key = Key(object = full.reduction, quiet = TRUE) ) } - - object <- TransferSketchLabels(object = object, - sketched.assay = sketched.assay, - reduction = full.reduction, - dims = dims, - k = k.weight, - refdata = refdata, - reduction.model = umap.model, - recompute.neighbors = recompute.neighbors, - recompute.weights = recompute.weights, - verbose = verbose - ) + object <- TransferSketchLabels( + object = object, + sketched.assay = sketched.assay, + reduction = full.reduction, + dims = dims, + k = k.weight, + refdata = refdata, + reduction.model = umap.model, + recompute.neighbors = recompute.neighbors, + recompute.weights = recompute.weights, + verbose = verbose) return(object) } @@ -249,20 +249,20 @@ TransferSketchLabels <- function( object = object, slot = 'TransferSketchLabels' )$full_sketch.weight - + compute.neighbors <- is.null(x = full_sketch.nn) || !all(Cells(full_sketch.nn) == Cells(object[[reduction]])) || max(Indices(full_sketch.nn)) > ncol(object[[sketched.assay]]) || !identical(x = full_sketch.nn@alg.info$dims, y = dims) || !identical(x = full_sketch.nn@alg.info$reduction, y = reduction) || recompute.neighbors - + compute.weights <- is.null(x = full_sketch.weight) || !all(colnames(full_sketch.weight) == Cells(object[[reduction]])) || !all(rownames(full_sketch.weight) == colnames(object[[sketched.assay]])) || recompute.weights || recompute.neighbors - + if (compute.neighbors) { if (verbose) { message("Finding sketch neighbors") @@ -280,16 +280,21 @@ TransferSketchLabels <- function( if (verbose) { message("Finding sketch weight matrix") } - full_sketch.weight <- FindWeightsNN(nn.obj = full_sketch.nn, - query.cells = Cells(object[[reduction]]), - reference = colnames(object[[sketched.assay]]), - verbose = verbose) + full_sketch.weight <- FindWeightsNN( + nn.obj = full_sketch.nn, + query.cells = Cells(object[[reduction]]), + reference.cells = colnames(object[[sketched.assay]]), + verbose = verbose) rownames(full_sketch.weight) <- colnames(object[[sketched.assay]]) colnames(full_sketch.weight) <- Cells(object[[reduction]]) } - slot(object = object, name = 'tools')$TransferSketchLabels$full_sketch.nn <- full_sketch.nn - slot(object = object, name = 'tools')$TransferSketchLabels$full_sketch.weight <- full_sketch.weight - + slot( + object = object, name = 'tools' + )$TransferSketchLabels$full_sketch.nn <- full_sketch.nn + slot( + object = object, name = 'tools' + )$TransferSketchLabels$full_sketch.weight <- full_sketch.weight + if (!is.null(refdata)) { if (length(refdata) == 1 & is.character(refdata)) { refdata <- list(refdata) @@ -365,7 +370,8 @@ TransferSketchLabels <- function( #' @param verbose Print progress and diagnostic messages #' @importFrom Matrix qrR t #' @importFrom irlba irlba -#' +#' @importFrom BPCells transpose_storage_order matrix_stats +#' #' @rdname LeverageScore #' @method LeverageScore default #' @export @@ -382,7 +388,7 @@ LeverageScore.default <- function( ) { # Check the dimensions of the object, nsketch, and ndims ncells <- ncol(x = object) - if (ncells < nsketch*1.5) { + if (ncells < nsketch * 1.5) { Z <- irlba(A = object, nv = 50, nu = 0, verbose = FALSE)$v return(rowSums(x = Z ^ 2)) } @@ -462,6 +468,7 @@ LeverageScore.default <- function( #' @rdname LeverageScore #' @importFrom Matrix qrR t +#' @importFrom DelayedArray setAutoBlockSize #' @method LeverageScore DelayedMatrix #' @export #' @@ -547,7 +554,7 @@ LeverageScore.StdAssay <- function( if (!is_quosure(x = method)) { method <- enquo(arg = method) } - scores <- SeuratObject:::EmptyDF(n = ncol(x = object)) + scores <- SeuratObject::EmptyDF(n = ncol(x = object)) row.names(x = scores) <- colnames(x = object) scores[, 1] <- NA_real_ for (i in seq_along(along.with = layer)) { diff --git a/R/visualization.R b/R/visualization.R index 7adcc6d36..8db6e1b04 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -510,6 +510,7 @@ HTOHeatmap <- function( #' @param log plot the feature axis on log scale #' @param ncol Number of columns if multiple plots are displayed #' @param slot Slot to pull expression data from (e.g. "counts" or "data") +#' @param layer Layer to pull expression data from (e.g. "counts" or "data") #' @param stack Horizontally stack plots for each feature #' @param combine Combine plots into a single \code{\link[patchwork]{patchwork}ed} #' ggplot object. If \code{FALSE}, return a list of ggplot @@ -826,7 +827,7 @@ ColorDimSplit <- function( #' @examples #' data("pbmc_small") #' DimPlot(object = pbmc_small) -#' DimPlot(object = pbmc_small, split.by = 'ident') +#' DimPlot(object = pbmc_small, split.by = 'letter.idents') #' DimPlot <- function( object, diff --git a/R/zzz.R b/R/zzz.R index 0470b84ef..5793b6760 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -90,7 +90,7 @@ AttachDeps <- function(deps) { packageStartupMessage("Loading Seurat v5 beta version \n", "To maintain compatibility with previous workflows, new Seurat objects ", "will use the previous object structure by default\n", - "To use new Seurat v5 assays: Please run: ", + "To use new Seurat v5 assays please run: ", "options(Seurat.object.assay.version = 'v5')") return(invisible(x = NULL)) } diff --git a/man/CCAIntegration.Rd b/man/CCAIntegration.Rd index 797b1147a..5c528f647 100644 --- a/man/CCAIntegration.Rd +++ b/man/CCAIntegration.Rd @@ -47,3 +47,32 @@ search space} \description{ Seurat-CCA Integration } +\examples{ +\dontrun{ +# Preprocessing +obj <- SeuratData::LoadData("pbmcsca") +obj[["RNA"]] <- split(obj[["RNA"]], f = obj$Method) +obj <- NormalizeData(obj) +obj <- FindVariableFeatures(obj) +obj <- ScaleData(obj) +obj <- RunPCA(obj) + +# After preprocessing, we integrate layers. +obj <- IntegrateLayers(object = obj, method = CCAIntegration, + orig.reduction = "pca", new.reduction = "integrated.cca", + verbose = FALSE) + +# Modifying parameters +# We can also specify parameters such as `k.anchor` to increase the strength of integration +obj <- IntegrateLayers(object = obj, method = CCAIntegration, + orig.reduction = "pca", new.reduction = "integrated.cca", + k.anchor = 20, verbose = FALSE) + +# Integrating SCTransformed data +obj <- SCTransform(object = obj) +obj <- IntegrateLayers(object = obj, method = CCAIntegration, + orig.reduction = "pca", new.reduction = "integrated.cca", + assay = "SCT", verbose = FALSE) +} + +} diff --git a/man/DISP.Rd b/man/DISP.Rd index 25a772eef..e876f0823 100644 --- a/man/DISP.Rd +++ b/man/DISP.Rd @@ -6,6 +6,14 @@ \usage{ DISP(data, nselect = 2000L, verbose = TRUE, ...) } +\arguments{ +\item{data}{Data matrix} + +\item{nselect}{Number of top features to select based on dispersion values} + +\item{verbose}{Display progress} +} \description{ Find variable features based on dispersion } +\keyword{internal} diff --git a/man/DimPlot.Rd b/man/DimPlot.Rd index 3d7fa3027..8f6cbd45e 100644 --- a/man/DimPlot.Rd +++ b/man/DimPlot.Rd @@ -134,7 +134,7 @@ For the old \code{do.hover} and \code{do.identify} functionality, please see \examples{ data("pbmc_small") DimPlot(object = pbmc_small) -DimPlot(object = pbmc_small, split.by = 'ident') +DimPlot(object = pbmc_small, split.by = 'letter.idents') } \seealso{ diff --git a/man/FindBridgeTransferAnchors.Rd b/man/FindBridgeTransferAnchors.Rd index a0c40eb22..c4dabe9aa 100644 --- a/man/FindBridgeTransferAnchors.Rd +++ b/man/FindBridgeTransferAnchors.Rd @@ -40,6 +40,13 @@ Options are: } }} +\item{bridge.reduction}{Dimensional reduction to perform when finding anchors. Can +be one of: +\itemize{ + \item{cca: Canonical correlation analysis} + \item{direct: Use assay data as a dimensional reduction} +}} + \item{verbose}{Print messages and progress} } \value{ diff --git a/man/HarmonyIntegration.Rd b/man/HarmonyIntegration.Rd index 82e382f1e..8b8f68e15 100644 --- a/man/HarmonyIntegration.Rd +++ b/man/HarmonyIntegration.Rd @@ -43,6 +43,8 @@ should be called \code{group}} \item{npcs}{If doing PCA on input matrix, number of PCs to compute.} +\item{key}{Key for Harmony dimensional reduction} + \item{theta}{Diversity clustering penalty parameter. Specify for each variable in vars_use Default theta=2. theta=0 does not encourage any diversity. Larger values of theta result in more diverse clusters.} @@ -116,10 +118,10 @@ obj <- IntegrateLayers(object = obj, method = HarmonyIntegration, orig.reduction # Integrating SCTransformed data obj <- SCTransform(object = obj) -obj <- IntegrateLayers(object = obj, method = HarmonyIntegration, - orig.reduction = "pca", new.reduction = 'harmony', +obj <- IntegrateLayers(object = obj, method = HarmonyIntegration, + orig.reduction = "pca", new.reduction = 'harmony', assay = "SCT", verbose = FALSE) - + } \seealso{ diff --git a/man/LeverageScore.Rd b/man/LeverageScore.Rd new file mode 100644 index 000000000..98aefd5c0 --- /dev/null +++ b/man/LeverageScore.Rd @@ -0,0 +1,108 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generics.R, R/sketching.R +\name{LeverageScore} +\alias{LeverageScore} +\alias{LeverageScore.default} +\alias{LeverageScore.DelayedMatrix} +\alias{LeverageScore.StdAssay} +\alias{LeverageScore.Assay} +\alias{LeverageScore.Seurat} +\title{Leverage Score Calculation} +\usage{ +LeverageScore(object, ...) + +\method{LeverageScore}{default}( + object, + nsketch = 5000L, + ndims = NULL, + method = CountSketch, + eps = 0.5, + seed = 123L, + verbose = TRUE, + ... +) + +\method{LeverageScore}{DelayedMatrix}( + object, + nsketch = 5000L, + ndims = NULL, + method = CountSketch, + eps = 0.5, + seed = 123L, + block.size = 1e+08, + verbose = TRUE, + ... +) + +\method{LeverageScore}{StdAssay}( + object, + nsketch = 5000L, + ndims = NULL, + method = CountSketch, + vf.method = NULL, + layer = "data", + eps = 0.5, + seed = 123L, + verbose = TRUE, + ... +) + +\method{LeverageScore}{Assay}( + object, + nsketch = 5000L, + ndims = NULL, + method = CountSketch, + vf.method = NULL, + layer = "data", + eps = 0.5, + seed = 123L, + verbose = TRUE, + ... +) + +\method{LeverageScore}{Seurat}( + object, + assay = NULL, + nsketch = 5000L, + ndims = NULL, + var.name = "leverage.score", + over.write = FALSE, + method = CountSketch, + vf.method = NULL, + layer = "data", + eps = 0.5, + seed = 123L, + verbose = TRUE, + ... +) +} +\arguments{ +\item{object}{A matrix-like object} + +\item{...}{Arguments passed to other methods} + +\item{nsketch}{A positive integer. The number of sketches to be used in the approximation. +Default is 5000.} + +\item{ndims}{A positive integer or NULL. The number of dimensions to use. If NULL, the number +of dimensions will default to the number of columns in the object.} + +\item{method}{The sketching method to use, defaults to CountSketch.} + +\item{eps}{A numeric. The error tolerance for the approximation in Johnson–Lindenstrauss embeddings, +defaults to 0.5.} + +\item{seed}{A positive integer. The seed for the random number generator, defaults to 123.} + +\item{verbose}{Print progress and diagnostic messages} +} +\description{ +This function computes the leverage scores for a given object +It uses the concept of sketching and random projections. The function provides an approximation +to the leverage scores using a scalable method suitable for large matrices. +} +\references{ +Clarkson, K. L. & Woodruff, D. P. +Low-rank approximation and regression in input sparsity time. +JACM 63, 1–45 (2017). \url{https://dl.acm.org/doi/10.1145/3019134}; +} diff --git a/man/LogNormalize.Rd b/man/LogNormalize.Rd index ab1917897..6d45d6321 100644 --- a/man/LogNormalize.Rd +++ b/man/LogNormalize.Rd @@ -24,6 +24,8 @@ LogNormalize(data, scale.factor = 10000, verbose = TRUE, ...) \item{verbose}{Print progress} \item{...}{Arguments passed to other methods} + +\item{margin}{Margin to normalize over} } \value{ A matrix with the normalized and log-transformed data diff --git a/man/MVP.Rd b/man/MVP.Rd index abc90a2d9..85c1281e0 100644 --- a/man/MVP.Rd +++ b/man/MVP.Rd @@ -9,10 +9,21 @@ MVP( verbose = TRUE, nselect = 2000L, mean.cutoff = c(0.1, 8), - dispersion.cutoff = c(1, Inf), - ... + dispersion.cutoff = c(1, Inf) ) } +\arguments{ +\item{data}{Data matrix} + +\item{verbose}{Whether to print messages and progress bars} + +\item{nselect}{Number of features to select based on dispersion values} + +\item{mean.cutoff}{Numeric of length two specifying the min and max values} + +\item{dispersion.cutoff}{Numeric of length two specifying the min and max values} +} \description{ Find variable features based on mean.var.plot } +\keyword{internal} diff --git a/man/RPCAIntegration.Rd b/man/RPCAIntegration.Rd index f7d6d60ea..087ecbadc 100644 --- a/man/RPCAIntegration.Rd +++ b/man/RPCAIntegration.Rd @@ -58,28 +58,28 @@ obj <- ScaleData(obj) obj <- RunPCA(obj) # After preprocessing, we run integration -obj <- IntegrateLayers(object = obj, method = RPCAIntegration, - orig.reduction = "pca", new.reduction = 'integrated.rpca', +obj <- IntegrateLayers(object = obj, method = RPCAIntegration, + orig.reduction = "pca", new.reduction = 'integrated.rpca', verbose = FALSE) - + # Reference-based Integration # Here, we use the first layer as a reference for integraion # Thus, we only identify anchors between the reference and the rest of the datasets, saving computational resources -obj <- IntegrateLayers(object = obj, method = RPCAIntegration, - orig.reduction = "pca", new.reduction = 'integrated.rpca', +obj <- IntegrateLayers(object = obj, method = RPCAIntegration, + orig.reduction = "pca", new.reduction = 'integrated.rpca', reference = 1, verbose = FALSE) # Modifying parameters # We can also specify parameters such as `k.anchor` to increase the strength of integration -obj <- IntegrateLayers(object = obj, method = RPCAIntegration, - orig.reduction = "pca", new.reduction = 'integrated.rpca', +obj <- IntegrateLayers(object = obj, method = RPCAIntegration, + orig.reduction = "pca", new.reduction = 'integrated.rpca', k.anchor = 20, verbose = FALSE) # Integrating SCTransformed data obj <- SCTransform(object = obj) -obj <- IntegrateLayers(object = obj, method = RPCAIntegration, - orig.reduction = "pca", new.reduction = 'integrated.rpca', +obj <- IntegrateLayers(object = obj, method = RPCAIntegration, + orig.reduction = "pca", new.reduction = 'integrated.rpca', assay = "SCT", verbose = FALSE) } - + } diff --git a/man/ReadMtx.Rd b/man/ReadMtx.Rd index e24620851..2c78d722f 100644 --- a/man/ReadMtx.Rd +++ b/man/ReadMtx.Rd @@ -10,8 +10,11 @@ ReadMtx( features, cell.column = 1, feature.column = 2, + cell.sep = "\\t", + feature.sep = "\\t", skip.cell = 0, skip.feature = 0, + mtx.transpose = FALSE, unique.features = TRUE, strip.suffix = FALSE ) @@ -27,10 +30,16 @@ ReadMtx( \item{feature.column}{Specify which column of features files to use for feature/gene names; default is 2} +\item{cell.sep}{Specify the delimiter in the cell name file} + +\item{feature.sep}{Specify the delimiter in the feature name file} + \item{skip.cell}{Number of lines to skip in the cells file before beginning to read cell names} \item{skip.feature}{Number of lines to skip in the features file before beginning to gene names} +\item{mtx.transpose}{Transpose the matrix after reading in} + \item{unique.features}{Make feature names unique (default TRUE)} \item{strip.suffix}{Remove trailing "-1" if present in all cell barcodes.} diff --git a/man/RidgePlot.Rd b/man/RidgePlot.Rd index fda292262..744e120e4 100644 --- a/man/RidgePlot.Rd +++ b/man/RidgePlot.Rd @@ -50,6 +50,8 @@ expression of the attribute being potted, can also pass 'increasing' or 'decreas \item{slot}{Slot to pull expression data from (e.g. "counts" or "data")} +\item{layer}{Layer to pull expression data from (e.g. "counts" or "data")} + \item{stack}{Horizontally stack plots for each feature} \item{combine}{Combine plots into a single \code{\link[patchwork]{patchwork}ed} diff --git a/man/RunSPCA.Rd b/man/RunSPCA.Rd index f3deaa81c..11c23986e 100644 --- a/man/RunSPCA.Rd +++ b/man/RunSPCA.Rd @@ -82,6 +82,8 @@ NULL will not set a seed.} \item{features}{Features to compute SPCA on. If features=NULL, SPCA will be run using the variable features for the Assay.} +\item{layer}{Layer to run SPCA on} + \item{reduction.name}{dimensional reduction name, spca by default} } \value{ diff --git a/man/SketchData.Rd b/man/SketchData.Rd new file mode 100644 index 000000000..f4b636738 --- /dev/null +++ b/man/SketchData.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sketching.R +\name{SketchData} +\alias{SketchData} +\title{Sketch Data} +\usage{ +SketchData( + object, + assay = NULL, + ncells = 5000L, + sketched.assay = "sketch", + method = c("LeverageScore", "Uniform"), + var.name = "leverage.score", + over.write = FALSE, + seed = 123L, + cast = "dgCMatrix", + verbose = TRUE, + ... +) +} +\arguments{ +\item{object}{A Seurat object.} + +\item{assay}{Assay name. Default is NULL, in which case the default assay of the object is used.} + +\item{ncells}{A positive integer indicating the number of cells to sample for the sketching. Default is 5000.} + +\item{sketched.assay}{Sketched assay name. A sketch assay is created or overwrite with the sketch data. Default is 'sketch'.} + +\item{method}{Sketching method to use. Can be 'LeverageScore' or 'Uniform'. +Default is 'LeverageScore'.} + +\item{var.name}{A metadata column name to store the leverage scores. Default is 'leverage.score'.} + +\item{over.write}{whether to overwrite existing column in the metadata. Default is FALSE.} + +\item{seed}{A positive integer for the seed of the random number generator. Default is 123.} + +\item{cast}{The type to cast the resulting assay to. Default is 'dgCMatrix'.} + +\item{verbose}{Print progress and diagnostic messages} + +\item{...}{Arguments passed to other methods} +} +\value{ +A Seurat object with the sketched data added as a new assay. +} +\description{ +This function uses sketching methods to downsample high-dimensional single-cell RNA expression data, +which can help with scalability for large datasets. +} diff --git a/man/UnSketchEmbeddings.Rd b/man/UnSketchEmbeddings.Rd index cc128c4c3..e61c1df73 100644 --- a/man/UnSketchEmbeddings.Rd +++ b/man/UnSketchEmbeddings.Rd @@ -12,6 +12,17 @@ UnSketchEmbeddings( sketch.matrix = NULL ) } +\arguments{ +\item{atom.data}{Atom data} + +\item{atom.cells}{Atom cells} + +\item{orig.data}{Original data} + +\item{embeddings}{Embeddings of atom cells} + +\item{sketch.matrix}{Sketch matrix} +} \description{ Transfer embeddings from sketched cells to the full data } diff --git a/man/VlnPlot.Rd b/man/VlnPlot.Rd index 749010d2c..d138d3286 100644 --- a/man/VlnPlot.Rd +++ b/man/VlnPlot.Rd @@ -66,6 +66,8 @@ expression of the attribute being potted, can also pass 'increasing' or 'decreas \item{slot}{Slot to pull expression data from (e.g. "counts" or "data")} +\item{layer}{Layer to pull expression data from (e.g. "counts" or "data")} + \item{split.plot}{plot each group of the split violin plots by multiple or single violin shapes.} From 0e3c96a020a6d2d4341e42c48ae3ea352b2c867c Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 6 Jul 2023 14:24:12 -0400 Subject: [PATCH 652/979] update sketch message --- R/sketching.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/sketching.R b/R/sketching.R index a44fbf614..1bf7fe4b8 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -78,7 +78,7 @@ SketchData <- function( var.name = var.name, over.write = over.write, seed = seed, - verbose = verbose, + verbose = FALSE, ... ) } else if (method == 'Uniform') { @@ -415,7 +415,7 @@ LeverageScore.default <- function( stopifnot(is.function(x = method)) # Run the sketching if (isTRUE(x = verbose)) { - message("sampling ", nsketch, " cells") + message("sampling ", nsketch, " cells for random sketching") } S <- method(nsketch = nsketch, ncells = ncells, seed = seed, ...) object <- t(x = object) From 1ae11936f5d130e83b0b83480c3731490f75ea8b Mon Sep 17 00:00:00 2001 From: rsatija Date: Thu, 6 Jul 2023 15:05:22 -0400 Subject: [PATCH 653/979] DotPlot, FeaturePlot, DimPlot, FeatureScatter, VlnPlot now all support split.by='ident') --- R/visualization.R | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/R/visualization.R b/R/visualization.R index 156c0edf7..924663c6b 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -893,7 +893,7 @@ DimPlot <- function( data[, shape.by] <- object[[shape.by, drop = TRUE]] } if (!is.null(x = split.by)) { - data[, split.by] <- object[[split.by, drop = TRUE]] + data[, split.by] <- FetchData(object,split.by)[split.by] } if (isTRUE(x = shuffle)) { set.seed(seed = seed) @@ -2006,7 +2006,7 @@ FeatureScatter <- function( } } if (!is.null(x = split.by)) { - data[, split.by] <- object[[split.by, drop = TRUE]] + data[, split.by] <- FetchData(object,split.by)[split.by] } plots <- lapply( X = group.by, @@ -4315,8 +4315,8 @@ BarcodeInflectionsPlot <- function(object) { #' DotPlot <- function( object, - assay = NULL, features, + assay = NULL, cols = c("lightgrey", "blue"), col.min = -2.5, col.max = 2.5, @@ -4372,10 +4372,10 @@ DotPlot <- function( id.levels <- levels(x = data.features$id) data.features$id <- as.vector(x = data.features$id) if (!is.null(x = split.by)) { - splits <- object[[split.by, drop = TRUE]][cells, drop = TRUE] + splits <- FetchData(object,split.by)[cells,split.by] if (split.colors) { if (length(x = unique(x = splits)) > length(x = cols)) { - stop("Not enough colors for the number of groups") + stop(paste0("Need to specify at least ", length(x = unique(x = splits)), " colors using the cols parameter")) } cols <- cols[1:length(x = unique(x = splits))] names(x = cols) <- unique(x = splits) @@ -4441,7 +4441,7 @@ DotPlot <- function( FUN = function(x) { data.use <- data.plot[data.plot$features.plot == x, 'avg.exp'] if (scale) { - data.use <- scale(x = data.use) + data.use <- scale(x = log1p(data.use)) data.use <- MinMax(data = data.use, min = col.min, max = col.max) } else { data.use <- log1p(x = data.use) @@ -4461,18 +4461,8 @@ DotPlot <- function( data.plot$pct.exp[data.plot$pct.exp < dot.min] <- NA data.plot$pct.exp <- data.plot$pct.exp * 100 if (split.colors) { - splits.use <- vapply( - X = as.character(x = data.plot$id), - FUN = gsub, - FUN.VALUE = character(length = 1L), - pattern = paste0( - '^((', - paste(sort(x = levels(x = object), decreasing = TRUE), collapse = '|'), - ')_)' - ), - replacement = '', - USE.NAMES = FALSE - ) + splits.use <- unlist(lapply(data.plot$id, function(x) + sub(paste0(".*_(",paste(sort(unique(splits),decreasing = TRUE), collapse = '|'),")$"), "\\1", x) )) data.plot$colors <- mapply( FUN = function(color, value) { return(colorRampPalette(colors = c('grey', color))(20)[value]) @@ -6677,7 +6667,7 @@ ExIPlot <- function( if (is.null(x = split.by)) { split <- NULL } else { - split <- object[[split.by, drop = TRUE]][cells] + split <- FetchData(object,split.by)[cells,split.by] if (!is.factor(x = split)) { split <- factor(x = split) } From 047b929b36a81c9b3dbe508c9f24500eee01f562 Mon Sep 17 00:00:00 2001 From: rsatija Date: Thu, 6 Jul 2023 15:27:51 -0400 Subject: [PATCH 654/979] Updated documentation and also fixed DotPlot scale coloring issue --- R/visualization.R | 17 +++--- man/ColorDimSplit.Rd | 4 +- man/DimPlot.Rd | 4 +- man/DotPlot.Rd | 10 ++-- man/FeaturePlot.Rd | 12 ++--- man/FeatureScatter.Rd | 5 ++ man/ImageDimPlot.Rd | 4 +- man/ImageFeaturePlot.Rd | 17 +++--- man/IntegrateData.Rd | 6 +-- man/IntegrateEmbeddings.Rd | 6 +-- man/LeverageScore.Rd | 108 +++++++++++++++++++++++++++++++++++++ man/PolyFeaturePlot.Rd | 5 +- man/Seurat-package.Rd | 2 +- man/SketchData.Rd | 51 ++++++++++++++++++ man/VlnPlot.Rd | 3 +- man/reexports.Rd | 2 +- 16 files changed, 209 insertions(+), 47 deletions(-) create mode 100644 man/LeverageScore.Rd create mode 100644 man/SketchData.Rd diff --git a/R/visualization.R b/R/visualization.R index 924663c6b..b93a77114 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -579,7 +579,8 @@ RidgePlot <- function( #' @inheritParams RidgePlot #' @param pt.size Point size for points #' @param alpha Alpha value for points -#' @param split.by A variable to split the violin plots by, +#' @param split.by A factor in object metadata to split the plot by, pass 'ident' +#' to split by cell identity' #' @param split.plot plot each group of the split violin plots by multiple or #' single violin shapes. #' @param adjust Adjust parameter for geom_violin @@ -770,8 +771,8 @@ ColorDimSplit <- function( #' @param reduction Which dimensionality reduction to use. If not specified, first searches for umap, then tsne, then pca #' @param group.by Name of one or more metadata columns to group (color) cells by #' (for example, orig.ident); pass 'ident' to group by identity class -#' @param split.by Name of a metadata column to split plot by; -#' see \code{\link{FetchData}} for more details +#' @param split.by A factor in object metadata to split the plot by, pass 'ident' +#' to split by cell identity' #' @param shape.by If NULL, all points are circles (default). You can specify any #' cell attribute (that can be pulled with FetchData) allowing for both #' different colors and different shapes on cells. Only applicable if \code{raster = FALSE}. @@ -983,8 +984,8 @@ DimPlot <- function( #' } #' @param min.cutoff,max.cutoff Vector of minimum and maximum cutoff values for each feature, #' may specify quantile in the form of 'q##' where '##' is the quantile (eg, 'q1', 'q10') -#' @param split.by A factor in object metadata to split the feature plot by, pass 'ident' -#' to split by cell identity'; similar to the old \code{FeatureHeatmap} +#' @param split.by A factor in object metadata to split the plot by, pass 'ident' +#' to split by cell identity' #' @param keep.scale How to handle the color scale across multiple plots. Options are: #' \itemize{ #' \item{"feature" (default; by row/feature scaling):}{ The plots for each individual feature are scaled to the maximum expression of the feature across the conditions provided to 'split.by'.} @@ -1931,6 +1932,8 @@ CellScatter <- function( #' @param cols Colors to use for identity class plotting. #' @param pt.size Size of the points on the plot #' @param shape.by Ignored for now +#' @param split.by A factor in object metadata to split the feature plot by, pass 'ident' +#' to split by cell identity' #' @param span Spline span in loess function call, if \code{NULL}, no spline added #' @param smooth Smooth the graph (similar to smoothScatter) #' @param slot Slot to pull data from, should be one of 'counts', 'data', or 'scale.data' @@ -4278,8 +4281,8 @@ BarcodeInflectionsPlot <- function(object) { #' @param dot.scale Scale the size of the points, similar to cex #' @param idents Identity classes to include in plot (default is all) #' @param group.by Factor to group the cells by -#' @param split.by Factor to split the groups by (replicates the functionality -#' of the old SplitDotPlotGG); +#' @param split.by A factor in object metadata to split the plot by, pass 'ident' +#' to split by cell identity' #' see \code{\link{FetchData}} for more details #' @param cluster.idents Whether to order identities by hierarchical clusters #' based on given features, default is FALSE diff --git a/man/ColorDimSplit.Rd b/man/ColorDimSplit.Rd index 1f85d1974..214e773c9 100644 --- a/man/ColorDimSplit.Rd +++ b/man/ColorDimSplit.Rd @@ -37,8 +37,8 @@ See \code{\link{DiscretePalette}} for details.} \item{\code{reduction}}{Which dimensionality reduction to use. If not specified, first searches for umap, then tsne, then pca} \item{\code{group.by}}{Name of one or more metadata columns to group (color) cells by (for example, orig.ident); pass 'ident' to group by identity class} - \item{\code{split.by}}{Name of a metadata column to split plot by; -see \code{\link{FetchData}} for more details} + \item{\code{split.by}}{A factor in object metadata to split the plot by, pass 'ident' +to split by cell identity'} \item{\code{shape.by}}{If NULL, all points are circles (default). You can specify any cell attribute (that can be pulled with FetchData) allowing for both different colors and different shapes on cells. Only applicable if \code{raster = FALSE}.} diff --git a/man/DimPlot.Rd b/man/DimPlot.Rd index 3d7fa3027..dc8bbdb67 100644 --- a/man/DimPlot.Rd +++ b/man/DimPlot.Rd @@ -62,8 +62,8 @@ See \code{\link{DiscretePalette}} for details.} \item{group.by}{Name of one or more metadata columns to group (color) cells by (for example, orig.ident); pass 'ident' to group by identity class} -\item{split.by}{Name of a metadata column to split plot by; -see \code{\link{FetchData}} for more details} +\item{split.by}{A factor in object metadata to split the plot by, pass 'ident' +to split by cell identity'} \item{shape.by}{If NULL, all points are circles (default). You can specify any cell attribute (that can be pulled with FetchData) allowing for both diff --git a/man/DotPlot.Rd b/man/DotPlot.Rd index 31441433d..f59e9fa2e 100644 --- a/man/DotPlot.Rd +++ b/man/DotPlot.Rd @@ -7,8 +7,8 @@ \usage{ DotPlot( object, - assay = NULL, features, + assay = NULL, cols = c("lightgrey", "blue"), col.min = -2.5, col.max = 2.5, @@ -27,12 +27,12 @@ DotPlot( \arguments{ \item{object}{Seurat object} -\item{assay}{Name of assay to use, defaults to the active assay} - \item{features}{Input vector of features, or named list of feature vectors if feature-grouped panels are desired (replicates the functionality of the old SplitDotPlotGG)} +\item{assay}{Name of assay to use, defaults to the active assay} + \item{cols}{Colors to plot: the name of a palette from \code{RColorBrewer::brewer.pal.info}, a pair of colors defining a gradient, or 3+ colors defining multiple gradients (if split.by is set)} @@ -53,8 +53,8 @@ gene will have no dot drawn.} \item{group.by}{Factor to group the cells by} -\item{split.by}{Factor to split the groups by (replicates the functionality -of the old SplitDotPlotGG); +\item{split.by}{A factor in object metadata to split the plot by, pass 'ident' + to split by cell identity' see \code{\link{FetchData}} for more details} \item{cluster.idents}{Whether to order identities by hierarchical clusters diff --git a/man/FeaturePlot.Rd b/man/FeaturePlot.Rd index 6778ee488..f379647a3 100644 --- a/man/FeaturePlot.Rd +++ b/man/FeaturePlot.Rd @@ -10,12 +10,8 @@ FeaturePlot( features, dims = c(1, 2), cells = NULL, - cols = if (blend) { - c("lightgrey", "#ff0000", "#00ff00") - } else { - - c("lightgrey", "blue") - }, + cols = if (blend) { c("lightgrey", "#ff0000", "#00ff00") } else { + c("lightgrey", "blue") }, pt.size = NULL, alpha = 1, order = FALSE, @@ -79,8 +75,8 @@ may specify quantile in the form of 'q##' where '##' is the quantile (eg, 'q1', \item{reduction}{Which dimensionality reduction to use. If not specified, first searches for umap, then tsne, then pca} -\item{split.by}{A factor in object metadata to split the feature plot by, pass 'ident' -to split by cell identity'; similar to the old \code{FeatureHeatmap}} +\item{split.by}{A factor in object metadata to split the plot by, pass 'ident' +to split by cell identity'} \item{keep.scale}{How to handle the color scale across multiple plots. Options are: \itemize{ diff --git a/man/FeatureScatter.Rd b/man/FeatureScatter.Rd index a6634d840..ffded2917 100644 --- a/man/FeatureScatter.Rd +++ b/man/FeatureScatter.Rd @@ -13,6 +13,7 @@ FeatureScatter( shuffle = FALSE, seed = 1, group.by = NULL, + split.by = NULL, cols = NULL, pt.size = 1, shape.by = NULL, @@ -21,6 +22,7 @@ FeatureScatter( combine = TRUE, slot = "data", plot.cor = TRUE, + ncol = NULL, raster = NULL, raster.dpi = c(512, 512), jitter = FALSE @@ -44,6 +46,9 @@ useful for crowded plots if points of interest are being buried. (default is FAL \item{group.by}{Name of one or more metadata columns to group (color) cells by (for example, orig.ident); pass 'ident' to group by identity class} +\item{split.by}{A factor in object metadata to split the feature plot by, pass 'ident' +to split by cell identity'} + \item{cols}{Colors to use for identity class plotting.} \item{pt.size}{Size of the points on the plot} diff --git a/man/ImageDimPlot.Rd b/man/ImageDimPlot.Rd index a24182594..412ab0efb 100644 --- a/man/ImageDimPlot.Rd +++ b/man/ImageDimPlot.Rd @@ -44,8 +44,8 @@ segmentation boundaries} \item{group.by}{Name of one or more metadata columns to group (color) cells by (for example, orig.ident); pass 'ident' to group by identity class} -\item{split.by}{Name of a metadata column to split plot by; -see \code{\link{FetchData}} for more details} +\item{split.by}{A factor in object metadata to split the plot by, pass 'ident' +to split by cell identity'} \item{cols}{Vector of colors, each color corresponds to an identity class. This may also be a single character or numeric value corresponding to a palette as specified by \code{\link[RColorBrewer]{brewer.pal.info}}. diff --git a/man/ImageFeaturePlot.Rd b/man/ImageFeaturePlot.Rd index 0e375c441..00547b73a 100644 --- a/man/ImageFeaturePlot.Rd +++ b/man/ImageFeaturePlot.Rd @@ -9,12 +9,8 @@ ImageFeaturePlot( features, fov = NULL, boundaries = NULL, - cols = if (isTRUE(x = blend)) { - c("lightgrey", "#ff0000", "#00ff00") - } else { - - c("lightgrey", "firebrick1") - }, + cols = if (isTRUE(x = blend)) { c("lightgrey", "#ff0000", "#00ff00") } else { + c("lightgrey", "firebrick1") }, size = 0.5, min.cutoff = NA, max.cutoff = NA, @@ -68,11 +64,14 @@ When blend is \code{TRUE}, takes anywhere from 1-3 colors: \item{size}{Point size for cells when plotting centroids} -\item{min.cutoff, max.cutoff}{Vector of minimum and maximum cutoff values for each feature, +\item{min.cutoff}{Vector of minimum and maximum cutoff values for each feature, may specify quantile in the form of 'q##' where '##' is the quantile (eg, 'q1', 'q10')} -\item{split.by}{A factor in object metadata to split the feature plot by, pass 'ident' -to split by cell identity'; similar to the old \code{FeatureHeatmap}} +\item{max.cutoff}{Vector of minimum and maximum cutoff values for each feature, +may specify quantile in the form of 'q##' where '##' is the quantile (eg, 'q1', 'q10')} + +\item{split.by}{A factor in object metadata to split the plot by, pass 'ident' +to split by cell identity'} \item{molecules}{A vector of molecules to plot} diff --git a/man/IntegrateData.Rd b/man/IntegrateData.Rd index e08bd682e..c02543005 100644 --- a/man/IntegrateData.Rd +++ b/man/IntegrateData.Rd @@ -64,12 +64,10 @@ should be encoded in a matrix, where each row represents one of the pairwise integration steps. Negative numbers specify a dataset, positive numbers specify the integration results from a given row (the format of the merge matrix included in the \code{\link{hclust}} function output). For example: -\code{matrix(c(-2, 1, -3, -1), ncol = 2)} gives: - -\if{html}{\out{
    }}\preformatted{ [,1] [,2] +\code{matrix(c(-2, 1, -3, -1), ncol = 2)} gives:\preformatted{ [,1] [,2] [1,] -2 -3 [2,] 1 -1 -}\if{html}{\out{
    }} +} Which would cause dataset 2 and 3 to be integrated first, then the resulting object integrated with dataset 1. diff --git a/man/IntegrateEmbeddings.Rd b/man/IntegrateEmbeddings.Rd index dc0469132..c3f96ffa5 100644 --- a/man/IntegrateEmbeddings.Rd +++ b/man/IntegrateEmbeddings.Rd @@ -75,12 +75,10 @@ should be encoded in a matrix, where each row represents one of the pairwise integration steps. Negative numbers specify a dataset, positive numbers specify the integration results from a given row (the format of the merge matrix included in the \code{\link{hclust}} function output). For example: -\code{matrix(c(-2, 1, -3, -1), ncol = 2)} gives: - -\if{html}{\out{
    }}\preformatted{ [,1] [,2] +\code{matrix(c(-2, 1, -3, -1), ncol = 2)} gives:\preformatted{ [,1] [,2] [1,] -2 -3 [2,] 1 -1 -}\if{html}{\out{
    }} +} Which would cause dataset 2 and 3 to be integrated first, then the resulting object integrated with dataset 1. diff --git a/man/LeverageScore.Rd b/man/LeverageScore.Rd new file mode 100644 index 000000000..98aefd5c0 --- /dev/null +++ b/man/LeverageScore.Rd @@ -0,0 +1,108 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generics.R, R/sketching.R +\name{LeverageScore} +\alias{LeverageScore} +\alias{LeverageScore.default} +\alias{LeverageScore.DelayedMatrix} +\alias{LeverageScore.StdAssay} +\alias{LeverageScore.Assay} +\alias{LeverageScore.Seurat} +\title{Leverage Score Calculation} +\usage{ +LeverageScore(object, ...) + +\method{LeverageScore}{default}( + object, + nsketch = 5000L, + ndims = NULL, + method = CountSketch, + eps = 0.5, + seed = 123L, + verbose = TRUE, + ... +) + +\method{LeverageScore}{DelayedMatrix}( + object, + nsketch = 5000L, + ndims = NULL, + method = CountSketch, + eps = 0.5, + seed = 123L, + block.size = 1e+08, + verbose = TRUE, + ... +) + +\method{LeverageScore}{StdAssay}( + object, + nsketch = 5000L, + ndims = NULL, + method = CountSketch, + vf.method = NULL, + layer = "data", + eps = 0.5, + seed = 123L, + verbose = TRUE, + ... +) + +\method{LeverageScore}{Assay}( + object, + nsketch = 5000L, + ndims = NULL, + method = CountSketch, + vf.method = NULL, + layer = "data", + eps = 0.5, + seed = 123L, + verbose = TRUE, + ... +) + +\method{LeverageScore}{Seurat}( + object, + assay = NULL, + nsketch = 5000L, + ndims = NULL, + var.name = "leverage.score", + over.write = FALSE, + method = CountSketch, + vf.method = NULL, + layer = "data", + eps = 0.5, + seed = 123L, + verbose = TRUE, + ... +) +} +\arguments{ +\item{object}{A matrix-like object} + +\item{...}{Arguments passed to other methods} + +\item{nsketch}{A positive integer. The number of sketches to be used in the approximation. +Default is 5000.} + +\item{ndims}{A positive integer or NULL. The number of dimensions to use. If NULL, the number +of dimensions will default to the number of columns in the object.} + +\item{method}{The sketching method to use, defaults to CountSketch.} + +\item{eps}{A numeric. The error tolerance for the approximation in Johnson–Lindenstrauss embeddings, +defaults to 0.5.} + +\item{seed}{A positive integer. The seed for the random number generator, defaults to 123.} + +\item{verbose}{Print progress and diagnostic messages} +} +\description{ +This function computes the leverage scores for a given object +It uses the concept of sketching and random projections. The function provides an approximation +to the leverage scores using a scalable method suitable for large matrices. +} +\references{ +Clarkson, K. L. & Woodruff, D. P. +Low-rank approximation and regression in input sparsity time. +JACM 63, 1–45 (2017). \url{https://dl.acm.org/doi/10.1145/3019134}; +} diff --git a/man/PolyFeaturePlot.Rd b/man/PolyFeaturePlot.Rd index 59a75466d..30d0bdcad 100644 --- a/man/PolyFeaturePlot.Rd +++ b/man/PolyFeaturePlot.Rd @@ -33,7 +33,10 @@ PolyFeaturePlot( \item{ncol}{Number of columns to split the plot into} -\item{min.cutoff, max.cutoff}{Vector of minimum and maximum cutoff values for each feature, +\item{min.cutoff}{Vector of minimum and maximum cutoff values for each feature, +may specify quantile in the form of 'q##' where '##' is the quantile (eg, 'q1', 'q10')} + +\item{max.cutoff}{Vector of minimum and maximum cutoff values for each feature, may specify quantile in the form of 'q##' where '##' is the quantile (eg, 'q1', 'q10')} \item{common.scale}{...} diff --git a/man/Seurat-package.Rd b/man/Seurat-package.Rd index 79a0fbc5b..91fe25746 100644 --- a/man/Seurat-package.Rd +++ b/man/Seurat-package.Rd @@ -6,7 +6,7 @@ \alias{Seurat-package} \title{Seurat: Tools for Single Cell Genomics} \description{ -A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) \doi{10.1038/nbt.3192}, Macosko E, Basu A, Satija R, et al (2015) \doi{10.1016/j.cell.2015.05.002}, Stuart T, Butler A, et al (2019) \doi{10.1016/j.cell.2019.05.031}, and Hao, Hao, et al (2020) \doi{10.1101/2020.10.12.335331} for more details. +A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. } \section{Package options}{ diff --git a/man/SketchData.Rd b/man/SketchData.Rd new file mode 100644 index 000000000..f4b636738 --- /dev/null +++ b/man/SketchData.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sketching.R +\name{SketchData} +\alias{SketchData} +\title{Sketch Data} +\usage{ +SketchData( + object, + assay = NULL, + ncells = 5000L, + sketched.assay = "sketch", + method = c("LeverageScore", "Uniform"), + var.name = "leverage.score", + over.write = FALSE, + seed = 123L, + cast = "dgCMatrix", + verbose = TRUE, + ... +) +} +\arguments{ +\item{object}{A Seurat object.} + +\item{assay}{Assay name. Default is NULL, in which case the default assay of the object is used.} + +\item{ncells}{A positive integer indicating the number of cells to sample for the sketching. Default is 5000.} + +\item{sketched.assay}{Sketched assay name. A sketch assay is created or overwrite with the sketch data. Default is 'sketch'.} + +\item{method}{Sketching method to use. Can be 'LeverageScore' or 'Uniform'. +Default is 'LeverageScore'.} + +\item{var.name}{A metadata column name to store the leverage scores. Default is 'leverage.score'.} + +\item{over.write}{whether to overwrite existing column in the metadata. Default is FALSE.} + +\item{seed}{A positive integer for the seed of the random number generator. Default is 123.} + +\item{cast}{The type to cast the resulting assay to. Default is 'dgCMatrix'.} + +\item{verbose}{Print progress and diagnostic messages} + +\item{...}{Arguments passed to other methods} +} +\value{ +A Seurat object with the sketched data added as a new assay. +} +\description{ +This function uses sketching methods to downsample high-dimensional single-cell RNA expression data, +which can help with scalability for large datasets. +} diff --git a/man/VlnPlot.Rd b/man/VlnPlot.Rd index 749010d2c..167b832c9 100644 --- a/man/VlnPlot.Rd +++ b/man/VlnPlot.Rd @@ -52,7 +52,8 @@ expression of the attribute being potted, can also pass 'increasing' or 'decreas \item{group.by}{Group (color) cells in different ways (for example, orig.ident)} -\item{split.by}{A variable to split the violin plots by,} +\item{split.by}{A factor in object metadata to split the plot by, pass 'ident' +to split by cell identity'} \item{adjust}{Adjust parameter for geom_violin} diff --git a/man/reexports.Rd b/man/reexports.Rd index fa7258d42..6320f6d47 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -73,6 +73,6 @@ below to see their documentation. \describe{ \item{generics}{\code{\link[generics]{components}}} - \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}} + \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}} }} From 33f52949e43e30629c0074560d7c6c55840adc4b Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 6 Jul 2023 16:02:52 -0400 Subject: [PATCH 655/979] fix umap multiple input --- R/dimensional_reduction.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R index 847c86104..a50c19bc3 100644 --- a/R/dimensional_reduction.R +++ b/R/dimensional_reduction.R @@ -1792,6 +1792,12 @@ RunUMAP.Seurat <- function( if (sum(c(is.null(x = dims), is.null(x = features), is.null(x = graph))) < 2) { stop("Please specify only one of the following arguments: dims, features, or graph") } + if (sum(!is.null(x = dims), !is.null(x = nn.name), + !is.null(x = graph), !is.null(x = features)) != 1) { + stop("Only one parameter among 'dims', 'nn.name', 'graph', or 'features' ", + "should be used at a time to run UMAP") + } + if (!is.null(x = features)) { data.use <- as.matrix(x = t(x = GetAssayData(object = object, slot = slot, assay = assay)[features, , drop = FALSE])) if (ncol(x = data.use) < n.components) { From 5849c44e9fba18ba38f63246148a0469cd856541 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 6 Jul 2023 16:21:19 -0400 Subject: [PATCH 656/979] fix scatter plot --- R/visualization.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/R/visualization.R b/R/visualization.R index 7adcc6d36..5b1fe40d3 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -7907,6 +7907,12 @@ SingleCorPlot <- function( x = colnames(x = data), fixed = TRUE ) + names.plot <- colnames(x = data) <- gsub( + pattern = ' ', + replacement = '.', + x = colnames(x = data), + fixed = TRUE + ) if (ncol(x = data) < 2) { msg <- "Too few variables passed" if (ncol(x = data) == 1) { From 0b55e606e73a80cef06f2fea0752485dcb76a3c5 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 6 Jul 2023 16:31:18 -0400 Subject: [PATCH 657/979] fix parse bio data --- vignettes/ParseBio_sketch_integration.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/ParseBio_sketch_integration.Rmd b/vignettes/ParseBio_sketch_integration.Rmd index 04ea2dfad..14df811c3 100755 --- a/vignettes/ParseBio_sketch_integration.Rmd +++ b/vignettes/ParseBio_sketch_integration.Rmd @@ -55,7 +55,7 @@ options(future.globals.maxSize = 3e9) options(Seurat.object.assay.version = "v5") ``` ## Create a Seurat object containing data from 24 patients -We downloaded the original dataset and donor metadata from [Parse Biosciences](https://resources.parsebiosciences.com/dataset-wt-mega-one-million-pbmc-type-1-diabetes), as an h5ad file. While the BPCells package can work directly with h5ad files, for optimal performance, we converted the dataset to the compressed sparse format used by BPCells, as described [here](seurat5_bpcells_interaction_vignette.html). +We downloaded the original dataset and donor metadata from [Parse Biosciences](https://cdn.parsebiosciences.com/1M_PBMC_T1D_Parse.zip). While the BPCells package can work directly with h5ad files, for optimal performance, we converted the dataset to the compressed sparse format used by BPCells, as described [here](seurat5_bpcells_interaction_vignette.html). We create a Seurat object for this dataset. Since the input to `CreateSeuratObject` is a BPCells matrix, the data remains on-disk and is not loaded into memory. After creating the object, we split the dataset into 24 [layers](seurat5_essential_commands.html), one for each sample (i.e. patient), to facilitate integration. ```{r, warning=F, message=F} parse.mat <- open_matrix_dir(dir = "/brahms/hartmana/vignette_data/bpcells/parse_1m_pbmc") From b98b4be6df5607710ddf45b3f392fa973258a2bd Mon Sep 17 00:00:00 2001 From: yuhanH Date: Thu, 6 Jul 2023 16:37:28 -0400 Subject: [PATCH 658/979] update manuscript --- vignettes/ParseBio_sketch_integration.Rmd | 4 ++-- vignettes/seurat5_integration_bridge.Rmd | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/vignettes/ParseBio_sketch_integration.Rmd b/vignettes/ParseBio_sketch_integration.Rmd index 14df811c3..9c576ae45 100755 --- a/vignettes/ParseBio_sketch_integration.Rmd +++ b/vignettes/ParseBio_sketch_integration.Rmd @@ -32,9 +32,9 @@ knitr::opts_chunk$set( ) ``` -The recent increase in publicly available single-cell datasets poses a significant challenge for integrative analysis. For example, multiple tissues have now been profiled across dozens of studies, representing hundreds of individuals and millions of cells. In [Hao et al, 2022](https://www.biorxiv.org/content/10.1101/2022.02.24.481684v1) proposed a dictionary learning based method, atomic sketch integration, could also enable efficient and large-scale integrative analysis. Our procedure enables the integration of large compendiums of datasets without ever needing to load the full scale of data into memory. In [our manuscript](https://www.biorxiv.org/content/10.1101/2022.02.24.481684v1) we use atomic sketch integration to integrate millions of scRNA-seq from human lung and human PBMC. +The recent increase in publicly available single-cell datasets poses a significant challenge for integrative analysis. For example, multiple tissues have now been profiled across dozens of studies, representing hundreds of individuals and millions of cells. In [Hao et al, 2023](https://www.nature.com/articles/s41587-023-01767-y) proposed a dictionary learning based method, atomic sketch integration, could also enable efficient and large-scale integrative analysis. Our procedure enables the integration of large compendiums of datasets without ever needing to load the full scale of data into memory. In [our manuscript](https://www.nature.com/articles/s41587-023-01767-y) we use atomic sketch integration to integrate millions of scRNA-seq from human lung and human PBMC. -In this vignette, we demonstrate how to use atomic sketch integration to harmonize scRNA-seq experiments 1M cells, though we have used this procedure to integrate datasets of 10M+ cells as well. We analyze a dataset from Parse Biosciences, in which PBMC from 24 human samples (12 healthy donors, 12 Type-1 diabetes donors), which is available [here](https://resources.parsebiosciences.com/dataset-wt-mega-one-million-pbmc-type-1-diabetes). +In this vignette, we demonstrate how to use atomic sketch integration to harmonize scRNA-seq experiments 1M cells, though we have used this procedure to integrate datasets of 10M+ cells as well. We analyze a dataset from Parse Biosciences, in which PBMC from 24 human samples (12 healthy donors, 12 Type-1 diabetes donors), which is available [here](https://cdn.parsebiosciences.com/1M_PBMC_T1D_Parse.zip). * Sample a representative subset of cells ('atoms') from each dataset * Integrate the atoms from each dataset, and define a set of cell states diff --git a/vignettes/seurat5_integration_bridge.Rmd b/vignettes/seurat5_integration_bridge.Rmd index 941b39b21..e90f7547a 100644 --- a/vignettes/seurat5_integration_bridge.Rmd +++ b/vignettes/seurat5_integration_bridge.Rmd @@ -34,7 +34,7 @@ knitr::opts_chunk$set( In the same way that read mapping tools have transformed genome sequence analysis, the ability to map new datasets to established references represents an exciting opportunity for the field of single-cell genomics. Along with others in the community, we have developed [tools to map and interpret query datasets](https://satijalab.org/seurat/articles/multimodal_reference_mapping.html), and have also constructed a [set of scRNA-seq datasets for diverse mammalian tissues](http://azimuth.hubmapconsortium.org). -A key challenge is to extend this reference mapping framework to technologies that do not measure gene expression, even if the underlying reference is based on scRNA-seq. In [Hao et al, bioRxiv 2022](https://www.biorxiv.org/content/10.1101/2022.02.24.481684v1), we introduce 'bridge integration', which enables the mapping of complementary technologies (like scATAC-seq, scDNAme, CyTOF), onto scRNA-seq references, using a 'multi-omic' dataset as a molecular bridge. In this vignette, we demonstrate how to map an scATAC-seq dataset of human PBMC, onto our previously constructed [PBMC reference](https://azimuth.hubmapconsortium.org/references/human_pbmc/). We use a publicly available 10x multiome dataset, which simultaneously measures gene expression and chromatin accessibility in the same cell, as a bridge dataset. +A key challenge is to extend this reference mapping framework to technologies that do not measure gene expression, even if the underlying reference is based on scRNA-seq. In [Hao et al, Nat Biotechnol 2023](https://www.nature.com/articles/s41587-023-01767-y), we introduce 'bridge integration', which enables the mapping of complementary technologies (like scATAC-seq, scDNAme, CyTOF), onto scRNA-seq references, using a 'multi-omic' dataset as a molecular bridge. In this vignette, we demonstrate how to map an scATAC-seq dataset of human PBMC, onto our previously constructed [PBMC reference](https://azimuth.hubmapconsortium.org/references/human_pbmc/). We use a publicly available 10x multiome dataset, which simultaneously measures gene expression and chromatin accessibility in the same cell, as a bridge dataset. In this vignette we demonstrate: From 7d145561ce711fdaa378677145e1056a107d8b55 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 7 Jul 2023 08:41:03 -0400 Subject: [PATCH 659/979] fix var features --- R/preprocessing5.R | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index df7b33e6a..671a76dcb 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -160,14 +160,8 @@ FindVariableFeatures.StdAssay <- function( rownames(x = hvf.info) <- Features(x = object, layer = layer[i]) object[colnames(x = hvf.info)] <- hvf.info } - var.name <- paste( - 'vf', - key, - layer[i], - 'variable', - sep = '_' - ) - VariableFeatures(object = object) <- rownames(hvf.info)[hvf.info[,var.name]] + object@meta.data$var.features <- NULL + VariableFeatures(object = object) <- VariableFeatures(object = object, nfeatures = nselect) return(object) } From 21f05ec9789c657f76d02f4a12803d551bda19b5 Mon Sep 17 00:00:00 2001 From: rsatija Date: Fri, 7 Jul 2023 10:57:45 -0400 Subject: [PATCH 660/979] AddModuleScore now defaults to data slot --- R/utilities.R | 4 +- man/AddModuleScore.Rd | 3 + man/Cells.Rd | 5 -- man/CreateSCTAssayObject.Rd | 11 ---- man/FeaturePlot.Rd | 8 +-- man/GetImage.Rd | 8 --- man/GetTissueCoordinates.Rd | 4 -- man/HVFInfo.SCTAssay.Rd | 22 -------- man/ImageFeaturePlot.Rd | 13 ++--- man/IntegrateData.Rd | 6 +- man/IntegrateEmbeddings.Rd | 6 +- man/LeverageScore.Rd | 108 ++++++++++++++++++++++++++++++++++++ man/Load10X_Spatial.Rd | 2 - man/PolyFeaturePlot.Rd | 5 +- man/Radius.Rd | 3 - man/RenameCells.Rd | 7 --- man/STARmap-class.Rd | 15 ----- man/Seurat-package.Rd | 2 +- man/SketchData.Rd | 51 +++++++++++++++++ man/SlideSeq-class.Rd | 15 ----- man/VariableFeaturePlot.Rd | 15 ----- man/as.Seurat.Rd | 4 -- man/as.sparse.Rd | 8 +-- man/merge.SCTAssay.Rd | 11 ---- man/reexports.Rd | 2 +- 25 files changed, 187 insertions(+), 151 deletions(-) create mode 100644 man/LeverageScore.Rd create mode 100644 man/SketchData.Rd diff --git a/R/utilities.R b/R/utilities.R index 2079c11a8..164a734fc 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -125,6 +125,7 @@ AddAzimuthScores <- function(object, filename) { #' @param search Search for symbol synonyms for features in \code{features} that #' don't match features in \code{object}? Searches the HGNC's gene names #' database; see \code{\link{UpdateSymbolList}} for more details +#' @param slot Slot to calculate score values off of. Defaults to data slot (i.e log-normalized counts) #' @param ... Extra parameters passed to \code{\link{UpdateSymbolList}} #' #' @return Returns a Seurat object with module scores added to object meta data; @@ -179,6 +180,7 @@ AddModuleScore <- function( name = 'Cluster', seed = 1, search = FALSE, + slot = 'data', ... ) { if (!is.null(x = seed)) { @@ -187,7 +189,7 @@ AddModuleScore <- function( assay.old <- DefaultAssay(object = object) assay <- assay %||% assay.old DefaultAssay(object = object) <- assay - assay.data <- GetAssayData(object = object) + assay.data <- GetAssayData(object = object,assay = assay, slot = slot) features.old <- features if (k) { .NotYetUsed(arg = 'k') diff --git a/man/AddModuleScore.Rd b/man/AddModuleScore.Rd index 141dd6a5a..efafd56d2 100644 --- a/man/AddModuleScore.Rd +++ b/man/AddModuleScore.Rd @@ -15,6 +15,7 @@ AddModuleScore( name = "Cluster", seed = 1, search = FALSE, + slot = "data", ... ) } @@ -48,6 +49,8 @@ programs, the results will be stored as \code{name1}, \code{name2}, don't match features in \code{object}? Searches the HGNC's gene names database; see \code{\link{UpdateSymbolList}} for more details} +\item{slot}{Slot to calculate score values off of. Defaults to data slot (i.e log-normalized counts)} + \item{...}{Extra parameters passed to \code{\link{UpdateSymbolList}}} } \value{ diff --git a/man/Cells.Rd b/man/Cells.Rd index 4191764b5..8455fe837 100644 --- a/man/Cells.Rd +++ b/man/Cells.Rd @@ -15,11 +15,6 @@ \method{Cells}{VisiumV1}(x, ...) } -\arguments{ -\item{x}{An object} - -\item{...}{Arguments passed to other methods} -} \description{ Get Cell Names } diff --git a/man/CreateSCTAssayObject.Rd b/man/CreateSCTAssayObject.Rd index 70f30f633..9f62a9c0e 100644 --- a/man/CreateSCTAssayObject.Rd +++ b/man/CreateSCTAssayObject.Rd @@ -15,21 +15,10 @@ CreateSCTAssayObject( ) } \arguments{ -\item{counts}{Unnormalized data such as raw counts or TPMs} - -\item{data}{Prenormalized data; if provided, do not pass \code{counts}} - \item{scale.data}{a residual matrix} \item{umi.assay}{The UMI assay name. Default is RNA} -\item{min.cells}{Include features detected in at least this many cells. Will -subset the counts matrix as well. To reintroduce excluded features, create a -new object with a lower cutoff} - -\item{min.features}{Include cells where at least this many features are -detected} - \item{SCTModel.list}{list of SCTModels} } \description{ diff --git a/man/FeaturePlot.Rd b/man/FeaturePlot.Rd index 6778ee488..8df3185d9 100644 --- a/man/FeaturePlot.Rd +++ b/man/FeaturePlot.Rd @@ -10,12 +10,8 @@ FeaturePlot( features, dims = c(1, 2), cells = NULL, - cols = if (blend) { - c("lightgrey", "#ff0000", "#00ff00") - } else { - - c("lightgrey", "blue") - }, + cols = if (blend) { c("lightgrey", "#ff0000", "#00ff00") } else { + c("lightgrey", "blue") }, pt.size = NULL, alpha = 1, order = FALSE, diff --git a/man/GetImage.Rd b/man/GetImage.Rd index a0d134863..43f64ed09 100644 --- a/man/GetImage.Rd +++ b/man/GetImage.Rd @@ -12,14 +12,6 @@ \method{GetImage}{VisiumV1}(object, mode = c("grob", "raster", "plotly", "raw"), ...) } -\arguments{ -\item{object}{An object} - -\item{mode}{How to return the image; should accept one of \dQuote{grob}, -\dQuote{raster}, \dQuote{plotly}, or \dQuote{raw}} - -\item{...}{Arguments passed to other methods} -} \description{ Get Image Data } diff --git a/man/GetTissueCoordinates.Rd b/man/GetTissueCoordinates.Rd index 5ccb69d68..759743d43 100644 --- a/man/GetTissueCoordinates.Rd +++ b/man/GetTissueCoordinates.Rd @@ -18,10 +18,6 @@ ) } \arguments{ -\item{object}{An object} - -\item{...}{Arguments passed to other methods} - \item{qhulls}{return qhulls instead of centroids} \item{scale}{A factor to scale the coordinates by; choose from: 'tissue', diff --git a/man/HVFInfo.SCTAssay.Rd b/man/HVFInfo.SCTAssay.Rd index 6e26995d8..1f4be45cb 100644 --- a/man/HVFInfo.SCTAssay.Rd +++ b/man/HVFInfo.SCTAssay.Rd @@ -6,28 +6,6 @@ \usage{ \method{HVFInfo}{SCTAssay}(object, selection.method, status = FALSE, ...) } -\arguments{ -\item{object}{An object} - -\item{selection.method}{Which method to pull. For \code{HVFInfo} and -\code{VariableFeatures}, choose one from one of the -following: -\itemize{ - \item \dQuote{vst} - \item \dQuote{sctransform} or \dQuote{sct} - \item \dQuote{mean.var.plot}, \dQuote{dispersion}, \dQuote{mvp}, or - \dQuote{disp} -} -For \code{SVFInfo} and \code{SpatiallyVariableFeatures}, choose from: -\itemize{ - \item \dQuote{markvariogram} - \item \dQuote{moransi} -}} - -\item{status}{Add variable status to the resulting data frame} - -\item{...}{Arguments passed to other methods} -} \description{ Get variable feature information from \code{\link{SCTAssay}} objects } diff --git a/man/ImageFeaturePlot.Rd b/man/ImageFeaturePlot.Rd index 0e375c441..1caf4bedc 100644 --- a/man/ImageFeaturePlot.Rd +++ b/man/ImageFeaturePlot.Rd @@ -9,12 +9,8 @@ ImageFeaturePlot( features, fov = NULL, boundaries = NULL, - cols = if (isTRUE(x = blend)) { - c("lightgrey", "#ff0000", "#00ff00") - } else { - - c("lightgrey", "firebrick1") - }, + cols = if (isTRUE(x = blend)) { c("lightgrey", "#ff0000", "#00ff00") } else { + c("lightgrey", "firebrick1") }, size = 0.5, min.cutoff = NA, max.cutoff = NA, @@ -68,7 +64,10 @@ When blend is \code{TRUE}, takes anywhere from 1-3 colors: \item{size}{Point size for cells when plotting centroids} -\item{min.cutoff, max.cutoff}{Vector of minimum and maximum cutoff values for each feature, +\item{min.cutoff}{Vector of minimum and maximum cutoff values for each feature, +may specify quantile in the form of 'q##' where '##' is the quantile (eg, 'q1', 'q10')} + +\item{max.cutoff}{Vector of minimum and maximum cutoff values for each feature, may specify quantile in the form of 'q##' where '##' is the quantile (eg, 'q1', 'q10')} \item{split.by}{A factor in object metadata to split the feature plot by, pass 'ident' diff --git a/man/IntegrateData.Rd b/man/IntegrateData.Rd index e08bd682e..c02543005 100644 --- a/man/IntegrateData.Rd +++ b/man/IntegrateData.Rd @@ -64,12 +64,10 @@ should be encoded in a matrix, where each row represents one of the pairwise integration steps. Negative numbers specify a dataset, positive numbers specify the integration results from a given row (the format of the merge matrix included in the \code{\link{hclust}} function output). For example: -\code{matrix(c(-2, 1, -3, -1), ncol = 2)} gives: - -\if{html}{\out{
    }}\preformatted{ [,1] [,2] +\code{matrix(c(-2, 1, -3, -1), ncol = 2)} gives:\preformatted{ [,1] [,2] [1,] -2 -3 [2,] 1 -1 -}\if{html}{\out{
    }} +} Which would cause dataset 2 and 3 to be integrated first, then the resulting object integrated with dataset 1. diff --git a/man/IntegrateEmbeddings.Rd b/man/IntegrateEmbeddings.Rd index dc0469132..c3f96ffa5 100644 --- a/man/IntegrateEmbeddings.Rd +++ b/man/IntegrateEmbeddings.Rd @@ -75,12 +75,10 @@ should be encoded in a matrix, where each row represents one of the pairwise integration steps. Negative numbers specify a dataset, positive numbers specify the integration results from a given row (the format of the merge matrix included in the \code{\link{hclust}} function output). For example: -\code{matrix(c(-2, 1, -3, -1), ncol = 2)} gives: - -\if{html}{\out{
    }}\preformatted{ [,1] [,2] +\code{matrix(c(-2, 1, -3, -1), ncol = 2)} gives:\preformatted{ [,1] [,2] [1,] -2 -3 [2,] 1 -1 -}\if{html}{\out{
    }} +} Which would cause dataset 2 and 3 to be integrated first, then the resulting object integrated with dataset 1. diff --git a/man/LeverageScore.Rd b/man/LeverageScore.Rd new file mode 100644 index 000000000..98aefd5c0 --- /dev/null +++ b/man/LeverageScore.Rd @@ -0,0 +1,108 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generics.R, R/sketching.R +\name{LeverageScore} +\alias{LeverageScore} +\alias{LeverageScore.default} +\alias{LeverageScore.DelayedMatrix} +\alias{LeverageScore.StdAssay} +\alias{LeverageScore.Assay} +\alias{LeverageScore.Seurat} +\title{Leverage Score Calculation} +\usage{ +LeverageScore(object, ...) + +\method{LeverageScore}{default}( + object, + nsketch = 5000L, + ndims = NULL, + method = CountSketch, + eps = 0.5, + seed = 123L, + verbose = TRUE, + ... +) + +\method{LeverageScore}{DelayedMatrix}( + object, + nsketch = 5000L, + ndims = NULL, + method = CountSketch, + eps = 0.5, + seed = 123L, + block.size = 1e+08, + verbose = TRUE, + ... +) + +\method{LeverageScore}{StdAssay}( + object, + nsketch = 5000L, + ndims = NULL, + method = CountSketch, + vf.method = NULL, + layer = "data", + eps = 0.5, + seed = 123L, + verbose = TRUE, + ... +) + +\method{LeverageScore}{Assay}( + object, + nsketch = 5000L, + ndims = NULL, + method = CountSketch, + vf.method = NULL, + layer = "data", + eps = 0.5, + seed = 123L, + verbose = TRUE, + ... +) + +\method{LeverageScore}{Seurat}( + object, + assay = NULL, + nsketch = 5000L, + ndims = NULL, + var.name = "leverage.score", + over.write = FALSE, + method = CountSketch, + vf.method = NULL, + layer = "data", + eps = 0.5, + seed = 123L, + verbose = TRUE, + ... +) +} +\arguments{ +\item{object}{A matrix-like object} + +\item{...}{Arguments passed to other methods} + +\item{nsketch}{A positive integer. The number of sketches to be used in the approximation. +Default is 5000.} + +\item{ndims}{A positive integer or NULL. The number of dimensions to use. If NULL, the number +of dimensions will default to the number of columns in the object.} + +\item{method}{The sketching method to use, defaults to CountSketch.} + +\item{eps}{A numeric. The error tolerance for the approximation in Johnson–Lindenstrauss embeddings, +defaults to 0.5.} + +\item{seed}{A positive integer. The seed for the random number generator, defaults to 123.} + +\item{verbose}{Print progress and diagnostic messages} +} +\description{ +This function computes the leverage scores for a given object +It uses the concept of sketching and random projections. The function provides an approximation +to the leverage scores using a scalable method suitable for large matrices. +} +\references{ +Clarkson, K. L. & Woodruff, D. P. +Low-rank approximation and regression in input sparsity time. +JACM 63, 1–45 (2017). \url{https://dl.acm.org/doi/10.1145/3019134}; +} diff --git a/man/Load10X_Spatial.Rd b/man/Load10X_Spatial.Rd index 84c8c0ec8..00fe68a3b 100644 --- a/man/Load10X_Spatial.Rd +++ b/man/Load10X_Spatial.Rd @@ -20,8 +20,6 @@ and the image data in a subdirectory called \code{spatial}} \item{filename}{Name of H5 file containing the feature barcode matrix} -\item{assay}{Name of the initial assay} - \item{slice}{Name for the stored image of the tissue slice} \item{filter.matrix}{Only keep spots that have been determined to be over diff --git a/man/PolyFeaturePlot.Rd b/man/PolyFeaturePlot.Rd index 59a75466d..30d0bdcad 100644 --- a/man/PolyFeaturePlot.Rd +++ b/man/PolyFeaturePlot.Rd @@ -33,7 +33,10 @@ PolyFeaturePlot( \item{ncol}{Number of columns to split the plot into} -\item{min.cutoff, max.cutoff}{Vector of minimum and maximum cutoff values for each feature, +\item{min.cutoff}{Vector of minimum and maximum cutoff values for each feature, +may specify quantile in the form of 'q##' where '##' is the quantile (eg, 'q1', 'q10')} + +\item{max.cutoff}{Vector of minimum and maximum cutoff values for each feature, may specify quantile in the form of 'q##' where '##' is the quantile (eg, 'q1', 'q10')} \item{common.scale}{...} diff --git a/man/Radius.Rd b/man/Radius.Rd index a0b1a74db..a1c5d6a8a 100644 --- a/man/Radius.Rd +++ b/man/Radius.Rd @@ -12,9 +12,6 @@ \method{Radius}{VisiumV1}(object) } -\arguments{ -\item{object}{An image object} -} \description{ Get Spot Radius } diff --git a/man/RenameCells.Rd b/man/RenameCells.Rd index 105b7d1e9..bf4dd21f9 100644 --- a/man/RenameCells.Rd +++ b/man/RenameCells.Rd @@ -15,13 +15,6 @@ \method{RenameCells}{VisiumV1}(object, new.names = NULL, ...) } -\arguments{ -\item{object}{An object} - -\item{new.names}{vector of new cell names} - -\item{...}{Arguments passed to other methods} -} \description{ Rename Cells in an Object } diff --git a/man/STARmap-class.Rd b/man/STARmap-class.Rd index 30ab87d92..25d6a3fc5 100644 --- a/man/STARmap-class.Rd +++ b/man/STARmap-class.Rd @@ -8,20 +8,5 @@ \description{ The STARmap class } -\section{Slots}{ - - -\describe{ -\item{\code{assay}}{Name of assay to associate image data with; will give this image -priority for visualization when the assay is set as the active/default assay -in a \code{Seurat} object} - -\item{\code{key}}{A one-length character vector with the object's key; keys must -be one or more alphanumeric characters followed by an underscore -\dQuote{\code{_}} (regex pattern -\dQuote{\code{^[a-zA-Z][a-zA-Z0-9]*_$}})} -} -} - \concept{objects} \concept{spatial} diff --git a/man/Seurat-package.Rd b/man/Seurat-package.Rd index 79a0fbc5b..91fe25746 100644 --- a/man/Seurat-package.Rd +++ b/man/Seurat-package.Rd @@ -6,7 +6,7 @@ \alias{Seurat-package} \title{Seurat: Tools for Single Cell Genomics} \description{ -A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) \doi{10.1038/nbt.3192}, Macosko E, Basu A, Satija R, et al (2015) \doi{10.1016/j.cell.2015.05.002}, Stuart T, Butler A, et al (2019) \doi{10.1016/j.cell.2019.05.031}, and Hao, Hao, et al (2020) \doi{10.1101/2020.10.12.335331} for more details. +A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. } \section{Package options}{ diff --git a/man/SketchData.Rd b/man/SketchData.Rd new file mode 100644 index 000000000..f4b636738 --- /dev/null +++ b/man/SketchData.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sketching.R +\name{SketchData} +\alias{SketchData} +\title{Sketch Data} +\usage{ +SketchData( + object, + assay = NULL, + ncells = 5000L, + sketched.assay = "sketch", + method = c("LeverageScore", "Uniform"), + var.name = "leverage.score", + over.write = FALSE, + seed = 123L, + cast = "dgCMatrix", + verbose = TRUE, + ... +) +} +\arguments{ +\item{object}{A Seurat object.} + +\item{assay}{Assay name. Default is NULL, in which case the default assay of the object is used.} + +\item{ncells}{A positive integer indicating the number of cells to sample for the sketching. Default is 5000.} + +\item{sketched.assay}{Sketched assay name. A sketch assay is created or overwrite with the sketch data. Default is 'sketch'.} + +\item{method}{Sketching method to use. Can be 'LeverageScore' or 'Uniform'. +Default is 'LeverageScore'.} + +\item{var.name}{A metadata column name to store the leverage scores. Default is 'leverage.score'.} + +\item{over.write}{whether to overwrite existing column in the metadata. Default is FALSE.} + +\item{seed}{A positive integer for the seed of the random number generator. Default is 123.} + +\item{cast}{The type to cast the resulting assay to. Default is 'dgCMatrix'.} + +\item{verbose}{Print progress and diagnostic messages} + +\item{...}{Arguments passed to other methods} +} +\value{ +A Seurat object with the sketched data added as a new assay. +} +\description{ +This function uses sketching methods to downsample high-dimensional single-cell RNA expression data, +which can help with scalability for large datasets. +} diff --git a/man/SlideSeq-class.Rd b/man/SlideSeq-class.Rd index 60cdb125f..d9859af4f 100644 --- a/man/SlideSeq-class.Rd +++ b/man/SlideSeq-class.Rd @@ -14,19 +14,4 @@ The SlideSeq class represents spatial information from the Slide-seq platform \item{\code{coordinates}}{...} }} -\section{Slots}{ - - -\describe{ -\item{\code{assay}}{Name of assay to associate image data with; will give this image -priority for visualization when the assay is set as the active/default assay -in a \code{Seurat} object} - -\item{\code{key}}{A one-length character vector with the object's key; keys must -be one or more alphanumeric characters followed by an underscore -\dQuote{\code{_}} (regex pattern -\dQuote{\code{^[a-zA-Z][a-zA-Z0-9]*_$}})} -} -} - \concept{spatial} diff --git a/man/VariableFeaturePlot.Rd b/man/VariableFeaturePlot.Rd index 9320b3f3d..97ccbf073 100644 --- a/man/VariableFeaturePlot.Rd +++ b/man/VariableFeaturePlot.Rd @@ -26,21 +26,6 @@ VariableFeaturePlot( \item{log}{Plot the x-axis in log scale} -\item{selection.method}{Which method to pull. For \code{HVFInfo} and -\code{VariableFeatures}, choose one from one of the -following: -\itemize{ - \item \dQuote{vst} - \item \dQuote{sctransform} or \dQuote{sct} - \item \dQuote{mean.var.plot}, \dQuote{dispersion}, \dQuote{mvp}, or - \dQuote{disp} -} -For \code{SVFInfo} and \code{SpatiallyVariableFeatures}, choose from: -\itemize{ - \item \dQuote{markvariogram} - \item \dQuote{moransi} -}} - \item{assay}{Assay to pull variable features from} \item{raster}{Convert points to raster format, default is \code{NULL} diff --git a/man/as.Seurat.Rd b/man/as.Seurat.Rd index 58ffd4e64..b02d98696 100644 --- a/man/as.Seurat.Rd +++ b/man/as.Seurat.Rd @@ -17,16 +17,12 @@ ) } \arguments{ -\item{x}{An object to convert to class \code{Seurat}} - \item{slot}{Slot to store expression data as} \item{assay}{Name of assays to convert; set to \code{NULL} for all assays to be converted} \item{verbose}{Show progress updates} -\item{...}{Arguments passed to other methods} - \item{counts}{name of the SingleCellExperiment assay to store as \code{counts}; set to \code{NULL} if only normalized data are present} diff --git a/man/as.sparse.Rd b/man/as.sparse.Rd index 481122512..cbeca89a6 100644 --- a/man/as.sparse.Rd +++ b/man/as.sparse.Rd @@ -16,12 +16,12 @@ ) } \arguments{ -\item{x}{An object} +\item{x}{any \R object.} -\item{...}{Arguments passed to other methods} +\item{...}{additional arguments to be passed to or from methods.} -\item{row.names}{\code{NULL} or a character vector giving the row names for -the data; missing values are not allowed} +\item{row.names}{\code{NULL} or a character vector giving the row + names for the data frame. Missing values are not allowed.} \item{optional}{logical. If \code{TRUE}, setting row names and converting column names (to syntactic names: see diff --git a/man/merge.SCTAssay.Rd b/man/merge.SCTAssay.Rd index f976cbc1d..5012c54bf 100644 --- a/man/merge.SCTAssay.Rd +++ b/man/merge.SCTAssay.Rd @@ -16,20 +16,9 @@ \arguments{ \item{x}{A \code{\link[SeuratObject]{Seurat}} object} -\item{y}{A single \code{Seurat} object or a list of \code{Seurat} objects} - -\item{add.cell.ids}{A character vector of \code{length(x = c(x, y))}; -appends the corresponding values to the start of each objects' cell names} - -\item{merge.data}{Merge the data slots instead of just merging the counts -(which requires renormalization); this is recommended if the same -normalization approach was applied to all objects} - \item{na.rm}{If na.rm = TRUE, this will only preserve residuals that are present in all SCTAssays being merged. Otherwise, missing residuals will be populated with NAs.} - -\item{...}{Arguments passed to other methods} } \description{ Merge SCTAssay objects diff --git a/man/reexports.Rd b/man/reexports.Rd index fa7258d42..e14c1212e 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -73,6 +73,6 @@ below to see their documentation. \describe{ \item{generics}{\code{\link[generics]{components}}} - \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}} + \item{SeuratObject}{\code{\link[SeuratObject]{\%iff\%}}, \code{\link[SeuratObject]{\%||\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject]{Index}}, \code{\link[SeuratObject]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject]{Misc<-}}, \code{\link[SeuratObject]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject]{RenameIdents}}, \code{\link[SeuratObject]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject]{SVFInfo}}, \code{\link[SeuratObject]{SetAssayData}}, \code{\link[SeuratObject]{SetIdent}}, \code{\link[SeuratObject]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}} }} From 9813dac73e2f5e24c2835225942d392a43ebadd7 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Fri, 7 Jul 2023 12:02:40 -0400 Subject: [PATCH 661/979] adding warning --- R/integration5.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/integration5.R b/R/integration5.R index 7c56df3d4..fc24339d6 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -203,6 +203,8 @@ CCAIntegration <- function( object.list <- list() for (i in seq_along(along.with = layers)) { if (inherits(x = object[[layers[i]]], what = "IterableMatrix")) { + warning("Converting BPCells matrix to dgCMatrix for integration", + "as on-disk CCA Integration is not currently supported", call. = FALSE, immediate. = TRUE) counts <- as(object = object[[layers[i]]][features, ], Class = "dgCMatrix") } From a2c4610586c77b7e2e9c03011748c9348f49111b Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 7 Jul 2023 12:25:05 -0400 Subject: [PATCH 662/979] convert dgt to dgc CLR norm --- R/preprocessing5.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 671a76dcb..dbe50d283 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -536,6 +536,10 @@ NormalizeData.default <- function( } }, 'CLR' = { + if (inherits(x = object, what = 'dgTMatrix')) { + warning('Convert input dgTMatrix into dgCMatrix') + object <- as(object = object, Class = 'dgCMatrix') + } if (!inherits(x = object, what = 'dgCMatrix') && !inherits(x = object, what = 'matrix')) { stop('CLR normalization only supports for dense and dgCMatrix') From 68eca04f552046b4d9815baea751677280983ff1 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Fri, 7 Jul 2023 12:29:57 -0400 Subject: [PATCH 663/979] remove DelayedArray code --- DESCRIPTION | 11 +- NAMESPACE | 17 -- R/generics.R | 12 +- R/integration.R | 100 ++------ R/integration5.R | 34 ++- R/objects.R | 49 ++-- R/preprocessing.R | 4 + R/preprocessing5.R | 622 ++++++++++++--------------------------------- R/sketching.R | 107 -------- R/tree.R | 4 +- R/utilities.R | 189 ++------------ R/visualization.R | 4 + 12 files changed, 286 insertions(+), 867 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b9ec3a183..3ff2dc58a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -87,12 +87,6 @@ Imports: tools, utils, uwot (>= 0.1.10), - DelayedArray, - TileDBArray, - harmony, - presto, - rhdf5, - HDF5Array, BPCells (>= 0.0.0.9000) LinkingTo: Rcpp (>= 0.11.0), RcppEigen, RcppProgress License: MIT + file LICENSE @@ -145,4 +139,7 @@ Suggests: mixtools, ggrastr, data.table, - R.utils + R.utils, + presto, + DelayedArray, + harmony diff --git a/NAMESPACE b/NAMESPACE index b163c323f..19c23fb77 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -55,16 +55,10 @@ S3method(HVFInfo,SCTAssay) S3method(IntegrateEmbeddings,IntegrationAnchorSet) S3method(IntegrateEmbeddings,TransferAnchorSet) S3method(LeverageScore,Assay) -S3method(LeverageScore,DelayedMatrix) S3method(LeverageScore,Seurat) S3method(LeverageScore,StdAssay) S3method(LeverageScore,default) -S3method(LogNormalize,DelayedMatrix) -S3method(LogNormalize,H5ADMatrix) -S3method(LogNormalize,HDF5Matrix) S3method(LogNormalize,IterableMatrix) -S3method(LogNormalize,TENxMatrix) -S3method(LogNormalize,TileDBMatrix) S3method(LogNormalize,V3Matrix) S3method(LogNormalize,data.frame) S3method(LogNormalize,default) @@ -76,7 +70,6 @@ S3method(NormalizeData,StdAssay) S3method(NormalizeData,V3Matrix) S3method(NormalizeData,default) S3method(ProjectCellEmbeddings,Assay) -S3method(ProjectCellEmbeddings,DelayedMatrix) S3method(ProjectCellEmbeddings,IterableMatrix) S3method(ProjectCellEmbeddings,SCTAssay) S3method(ProjectCellEmbeddings,Seurat) @@ -140,7 +133,6 @@ S3method(ScaleFactors,VisiumV1) S3method(ScoreJackStraw,DimReduc) S3method(ScoreJackStraw,JackStrawData) S3method(ScoreJackStraw,Seurat) -S3method(VST,DelayedMatrix) S3method(VST,IterableMatrix) S3method(VST,default) S3method(VST,dgCMatrix) @@ -480,11 +472,6 @@ importClassesFrom(SeuratObject,SeuratCommand) importClassesFrom(SeuratObject,SpatialImage) importFrom(BPCells,matrix_stats) importFrom(BPCells,transpose_storage_order) -importFrom(DelayedArray,is_sparse) -importFrom(DelayedArray,path) -importFrom(DelayedArray,setAutoBlockSize) -importFrom(HDF5Array,HDF5RealizationSink) -importFrom(HDF5Array,TENxRealizationSink) importFrom(KernSmooth,bkde) importFrom(MASS,ginv) importFrom(MASS,glm.nb) @@ -608,7 +595,6 @@ importFrom(SeuratObject,as.Graph) importFrom(SeuratObject,as.Neighbor) importFrom(SeuratObject,as.Seurat) importFrom(SeuratObject,as.sparse) -importFrom(TileDBArray,TileDBRealizationSink) importFrom(cluster,clara) importFrom(cowplot,get_legend) importFrom(cowplot,plot_grid) @@ -737,7 +723,6 @@ importFrom(grid,pointsGrob) importFrom(grid,rasterGrob) importFrom(grid,unit) importFrom(grid,viewport) -importFrom(harmony,HarmonyMatrix) importFrom(httr,GET) importFrom(httr,accept_json) importFrom(httr,build_url) @@ -797,12 +782,10 @@ importFrom(plotly,layout) importFrom(plotly,plot_ly) importFrom(plotly,raster2uri) importFrom(png,readPNG) -importFrom(presto,wilcoxauc) importFrom(progressr,progressor) importFrom(reticulate,import) importFrom(reticulate,py_module_available) importFrom(reticulate,py_set_seed) -importFrom(rhdf5,h5delete) importFrom(rlang,"!!") importFrom(rlang,abort) importFrom(rlang,arg_match) diff --git a/R/generics.R b/R/generics.R index 35692a2c8..8bf817997 100644 --- a/R/generics.R +++ b/R/generics.R @@ -116,6 +116,7 @@ FindClusters <- function(object, ...) { #' @export #' #' @examples +#' \dontrun{ #' data("pbmc_small") #' # Find markers for cluster 2 #' markers <- FindMarkers(object = pbmc_small, ident.1 = 2) @@ -133,7 +134,8 @@ FindClusters <- function(object, ...) { #' markers <- FindMarkers(object = pbmc_small, ident.1 = 'clustertree', ident.2 = 5) #' head(x = markers) #' } -#' +#' } +#' #' @rdname FindMarkers #' @export FindMarkers #' @@ -230,9 +232,11 @@ FindSpatiallyVariableFeatures <- function(object, ...) { #' Otherwise, log2 fold change is returned with column named "avg_log2_FC". #' #' @examples +#' \dontrun{ #' data("pbmc_small") #' FoldChange(pbmc_small, ident.1 = 1) -#' +#' } +#' #' @param object A Seurat object #' @param ... Arguments passed to other methods #' @rdname FoldChange @@ -469,6 +473,7 @@ PseudobulkExpression <- function(object, ...) { #' @seealso \code{\link{merge.Seurat}} #' #' @examples +#' \dontrun{ #' data("pbmc_small") #' pbmc_small #' # As CCA requires two datasets, we will split our test object into two just for this example @@ -479,7 +484,8 @@ PseudobulkExpression <- function(object, ...) { #' pbmc_cca <- RunCCA(object1 = pbmc1, object2 = pbmc2) #' # Print results #' print(x = pbmc_cca[["cca"]]) -#' +#' } +#' #' @rdname RunCCA #' @export RunCCA #' diff --git a/R/integration.R b/R/integration.R index bfd192c30..4a7f1358d 100644 --- a/R/integration.R +++ b/R/integration.R @@ -807,8 +807,9 @@ FindTransferAnchors <- function( reference.reduction.init <- reference.reduction if (inherits(x = reference[[reference.assay]], what = 'Assay5')) { if (length(Layers(reference, search = "data")) > 1) { - reference[[reference.assay]] <- JoinLayers(reference[[reference.assay]], - layers = "data", new = "data") + reference[[reference.assay]] <- JoinLayers( + reference[[reference.assay]], + layers = "data", new = "data") } } if (normalization.method == "SCT") { @@ -884,7 +885,10 @@ FindTransferAnchors <- function( if (is.null(x = reference.reduction)) { reference.reduction <- "pca" if (verbose) { - message("Performing PCA on the provided query using ", length(x = features), " features as input.") + message( + "Performing PCA on the provided query using ", + length(x = features), + " features as input.") } if (normalization.method == "LogNormalize") { query <- ScaleData( @@ -4934,7 +4938,8 @@ ProjectCellEmbeddings.Seurat <- function( verbose = TRUE, nCount_UMI = NULL, feature.mean = NULL, - feature.sd = NULL + feature.sd = NULL, + ... ) { if (verbose) { message("Projecting cell embeddings") @@ -4987,7 +4992,8 @@ ProjectCellEmbeddings.Assay <- function( verbose = TRUE, nCount_UMI = NULL, feature.mean = NULL, - feature.sd = NULL + feature.sd = NULL, + ... ) { features <- Reduce( f = intersect, @@ -5036,7 +5042,8 @@ ProjectCellEmbeddings.SCTAssay <- function( verbose = TRUE, nCount_UMI = NULL, feature.mean = NULL, - feature.sd = NULL + feature.sd = NULL, + ... ) { if (normalization.method != 'SCT') { warning('Query data is SCT normalized, but normalization.method is set to LogNormalize') @@ -5072,7 +5079,8 @@ ProjectCellEmbeddings.StdAssay <- function( verbose = TRUE, nCount_UMI = NULL, feature.mean = NULL, - feature.sd = NULL + feature.sd = NULL, + ... ) { reference.assay <- reference.assay %||% DefaultAssay(object = reference) features <- Reduce( @@ -5134,7 +5142,8 @@ ProjectCellEmbeddings.default <- function( features = NULL, nCount_UMI = NULL, feature.mean = NULL, - feature.sd = NULL + feature.sd = NULL, + ... ){ features <- features %||% rownames(x = Loadings(object = reference[[reduction]])) if (normalization.method == 'SCT') { @@ -5206,7 +5215,8 @@ ProjectCellEmbeddings.IterableMatrix <- function( nCount_UMI = NULL, feature.mean = NULL, feature.sd = NULL, - block.size = 10000 + block.size = 10000, + ... ) { features <- features %||% rownames(x = Loadings(object = reference[[reduction]])) features <- intersect(x = features, y = rownames(query)) @@ -5214,8 +5224,7 @@ ProjectCellEmbeddings.IterableMatrix <- function( reference.SCT.model <- slot(object = reference[[reference.assay]], name = "SCTModel.list")[[1]] cells.grid <- split( x = 1:ncol(query), - f = ceiling(seq_along(along.with = 1:ncol(query))/block.size) - ) + f = ceiling(seq_along(along.with = 1:ncol(query)) / block.size)) proj.list <- list() for (i in seq_along(along.with = cells.grid)) { query.i <- FetchResiduals_reference( @@ -5223,7 +5232,7 @@ ProjectCellEmbeddings.IterableMatrix <- function( reference.SCT.model = reference.SCT.model, features = features, nCount_UMI = nCount_UMI[colnames(query)[cells.grid[[i]]]]) - proj.list[[i]] <- t(Loadings(object = reference[[reduction]])[features,dims]) %*% query.i + proj.list[[i]] <- t(Loadings(object = reference[[reduction]])[features, dims]) %*% query.i } proj.pca <- t(matrix( data = unlist(x = proj.list), @@ -5235,13 +5244,14 @@ ProjectCellEmbeddings.IterableMatrix <- function( )) } else { query <- query[features,] - reference.data <- LayerData(object = reference[[reference.assay]], layer = 'data')[features,] + reference.data <- LayerData(object = reference[[reference.assay]], layer = 'data')[features, ] if (is.null(x = feature.mean)) { if (inherits(x = reference.data, what = 'dgCMatrix')) { feature.mean <- RowMeanSparse(mat = reference.data) } else if (inherits(x = reference.data, what = "IterableMatrix")) { - bp.stats <- BPCells::matrix_stats(matrix = reference.data, - row_stats = "variance") + bp.stats <- BPCells::matrix_stats( + matrix = reference.data, + row_stats = "variance") feature.mean <- bp.stats$row_stats["mean",] } else { feature.mean <- rowMeans(mat = reference.data) @@ -5262,8 +5272,8 @@ ProjectCellEmbeddings.IterableMatrix <- function( } feature.mean[is.na(x = feature.mean)] <- 1 } - query.scale <- BPCells::min_by_row(mat = query, vals = 10*feature.sd + feature.mean) - query.scale <- (query.scale - feature.mean)/feature.sd + query.scale <- BPCells::min_by_row(mat = query, vals = 10 * feature.sd + feature.mean) + query.scale <- (query.scale - feature.mean) / feature.sd proj.pca <- t(query.scale) %*% Loadings(object = reference[[reduction]])[features,dims] rownames(x = proj.pca) <- colnames(x = query) colnames(x = proj.pca) <- colnames(x = Embeddings(object = reference[[reduction]]))[dims] @@ -5271,60 +5281,6 @@ ProjectCellEmbeddings.IterableMatrix <- function( return(proj.pca) } -#' @rdname ProjectCellEmbeddings -#' @method ProjectCellEmbeddings DelayedMatrix -#' @export -#' -ProjectCellEmbeddings.DelayedMatrix <- function( - query, - block.size = 1e9, - reference, - assay = NULL, - reduction, - normalization.method = NULL, - dims = NULL, - feature.mean = NULL, - feature.sd = NULL -) { - dims <- dims %||% 1:ncol(reference[[reduction]]) - assay <- assay %||% DefaultAssay(reference) - features <- intersect(rownames(query), - rownames(reference[[reduction]]@feature.loadings)) - query <- query[features,] - if (IsSCT(object[[assay]])) { - # TODO: SCT residuals projection - } else { - feature.mean <- feature.mean[features] %||% - RowMeanSparse(mat = LayerData(object = reference[[assay]], layer = 'data')[features,]) - feature.sd <- feature.sd[features] %||% - sqrt(RowVarSparse(mat = LayerData(object = reference[[assay]], layer = 'data')[features,])) - feature.sd <- MinMax(feature.sd, max = max(feature.sd), min = 0.1) - suppressMessages(setAutoBlockSize(size = block.size)) - cells.grid <- DelayedArray::colAutoGrid(x = query) - emb.list <- list() - for (i in seq_len(length.out = length(x = cells.grid))) { - vp <- cells.grid[[i]] - data.block <- DelayedArray::read_block(x = query, - viewport = vp, - as.sparse = TRUE) - data.block <- apply(data.block, MARGIN = 2, function(x) { - x <- (x - feature.mean)/feature.sd - return(x) - }) - emb.block <- t(reference[[reduction]]@feature.loadings[features,dims]) %*% data.block - emb.list[[i]] <- emb.block - } - # list to matrix, column has to be cells - emb.mat <- t(matrix(data = unlist(emb.list), nrow = length(dims) , ncol = ncol(query))) - rownames(emb.mat) <- colnames(query) - colnames(emb.mat) <- colnames(reference[[reduction]]@cell.embeddings)[dims] - } - return(emb.mat) -} - - - - # Project new data onto SVD (LSI or PCA) # # A = U∑V SVD @@ -7886,7 +7842,7 @@ UnSketchEmbeddings <- function( } atom.cells <- atom.cells %||% colnames(x = atom.data) if (inherits(x = orig.data, what = 'DelayedMatrix') ) { - matrix.prod.function <- crossprod_DelayedAssay + stop("PseudobulkExpression does not support DelayedMatrix objects") } else if(inherits(x = orig.data, what = 'IterableMatrix')) { matrix.prod.function <- crossprod_BPCells } else { diff --git a/R/integration5.R b/R/integration5.R index ada4c65a4..918f97113 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -13,7 +13,6 @@ NULL #' Harmony Integration #' -#' @inheritParams harmony::HarmonyMatrix #' @param object An \code{\link[SeuratObject]{Assay5}} object # @param assay Name of \code{object} in the containing \code{Seurat} object #' @param orig A \link[SeuratObject:DimReduc]{dimensional reduction} to correct @@ -34,8 +33,6 @@ NULL # @templateVar pkg harmony # @template note-reqdpkg #' -#' @importFrom harmony HarmonyMatrix -#' #' @examples #' \dontrun{ #' # Preprocessing @@ -51,17 +48,16 @@ NULL #' new.reduction = 'harmony', verbose = FALSE) #' #' # Modifying Parameters -#' # We can also add arguments specific to Harmony such as theta, to give more diverse clusters +#' # We can also add arguments specific to Harmony such as theta, to give more diverse clusters #' obj <- IntegrateLayers(object = obj, method = HarmonyIntegration, orig.reduction = "pca", #' new.reduction = 'harmony', verbose = FALSE, theta = 3) -#' } #' #' # Integrating SCTransformed data #' obj <- SCTransform(object = obj) #' obj <- IntegrateLayers(object = obj, method = HarmonyIntegration, #' orig.reduction = "pca", new.reduction = 'harmony', #' assay = "SCT", verbose = FALSE) -#' +#' } #' #' @export #' @@ -111,7 +107,7 @@ HarmonyIntegration <- function( # verbose = verbose # ) # Run Harmony - harmony.embed <- HarmonyMatrix( + harmony.embed <- harmony::HarmonyMatrix( data_mat = Embeddings(object = orig), meta_data = groups, vars_use = 'group', @@ -145,7 +141,7 @@ attr(x = HarmonyIntegration, which = 'Seurat.method') <- 'integration' #' Seurat-CCA Integration #' -#' @inheritParams FindIntegrationAnchors +#' @inheritParams RPCAIntegration #' @export #' #' @examples @@ -175,7 +171,7 @@ attr(x = HarmonyIntegration, which = 'Seurat.method') <- 'integration' #' orig.reduction = "pca", new.reduction = "integrated.cca", #' assay = "SCT", verbose = FALSE) #' } -#' +#' CCAIntegration <- function( object = NULL, assay = NULL, @@ -241,7 +237,23 @@ attr(x = CCAIntegration, which = 'Seurat.method') <- 'integration' #' Seurat-RPCA Integration #' -#' @examples +#' @param object A \code{Seurat} object +#' @param assay Name of \code{Assay} in the \code{Seurat} object +#' @param layers Names of layers in \code{assay} +#' @param orig A \link[SeuratObject:DimReduc]{dimensional reduction} to correct +#' @param new.reduction Name of new integrated dimensional reduction +#' @param reference A reference \code{Seurat} object +#' @param features A vector of features to use for integration +#' @param normalization.method Name of normalization method used: LogNormalize +#' or SCT +#' @param dims Dimensions of dimensional reduction to use for integration +#' @param k.filter Number of anchors to filter +#' @param scale.layer Name of scaled layer in \code{Assay} +#' @param groups A one-column data frame with grouping information +#' @param verbose Print progress +#' @param ... Additional arguments passed to \code{FindIntegrationAnchors} +#' +#' @examples #' \dontrun{ #' # Preprocessing #' obj <- SeuratData::LoadData("pbmcsca") @@ -276,7 +288,6 @@ attr(x = CCAIntegration, which = 'Seurat.method') <- 'integration' #' assay = "SCT", verbose = FALSE) #' } #' -#' @inheritParams FindIntegrationAnchors #' @export #' RPCAIntegration <- function( @@ -354,6 +365,7 @@ attr(x = RPCAIntegration, which = 'Seurat.method') <- 'integration' #' Seurat-Joint PCA Integration #' +#' @inheritParams RPCAIntegration #' @inheritParams FindIntegrationAnchors #' @export #' diff --git a/R/objects.R b/R/objects.R index f94a77b87..c0fce535f 100644 --- a/R/objects.R +++ b/R/objects.R @@ -227,9 +227,11 @@ SCTModel <- setClass( #' @concept objects #' #' @examples +#' \dontrun{ #' # SCTAssay objects are generated from SCTransform #' pbmc_small <- SCTransform(pbmc_small) #' pbmc_small[["SCT"]] +#' } #' SCTAssay <- setClass( Class = 'SCTAssay', @@ -480,6 +482,9 @@ CreateSCTAssayObject <- function( #' @param graphs Only keep a subset of Graphs specified here (if \code{NULL}, #' remove all Graphs) #' @param misc Preserve the \code{misc} slot; default is \code{TRUE} +#' @param counts Preserve the count matrices for the assays specified +#' @param data Preserve the data matrices for the assays specified +#' @param scale.data Preserve the scale data matrices for the assays specified #' @param ... Ignored #' #' @return \code{object} with only the sub-object specified retained @@ -1427,7 +1432,7 @@ Cells.SCTModel <- function(x, ...) { #' @method Cells SCTAssay #' @export #' -Cells.SCTAssay <- function(x, layer = NA) { +Cells.SCTAssay <- function(x, layer = NA, ...) { layer <- layer %||% levels(x = x)[1L] if (rlang::is_na(x = layer)) { return(colnames(x = x)) @@ -1471,12 +1476,13 @@ Cells.VisiumV1 <- function(x, ...) { #' @method Features SCTAssay #' @export #' -Features.SCTAssay <- function(x, layer = NA) { +Features.SCTAssay <- function(x, layer = NA, ...) { layer <- layer %||% DefaultLayer(object = x) if (rlang::is_na(x = layer)) { return(rownames(x = x)) } - layer <- rlang::arg_match(arg = layer, values = c(Layers(object = x), levels(x = x))) + layer <- rlang::arg_match( + arg = layer, values = c(Layers(object = x), levels(x = x))) if (layer %in% levels(x = x)) { return(Features(x = components(object = x, model = layer))) } @@ -1504,7 +1510,8 @@ Features.SCTModel <- function(x, ...) { GetAssay.Seurat <- function(object, assay = NULL, ...) { CheckDots(...) assay <- assay %||% DefaultAssay(object = object) - object.assays <- FilterObjects(object = object, classes.keep = c('Assay', 'Assay5')) + object.assays <- FilterObjects( + object = object, classes.keep = c('Assay', 'Assay5')) if (!assay %in% object.assays) { stop(paste0( assay, @@ -1648,8 +1655,10 @@ GetTissueCoordinates.VisiumV1 <- function( ) { cols <- cols %||% colnames(x = slot(object = object, name = 'coordinates')) if (!is.null(x = scale)) { - coordinates <- slot(object = object, name = 'coordinates')[, c('imagerow', 'imagecol')] - scale <- match.arg(arg = scale, choices = c('spot', 'fiducial', 'hires', 'lowres')) + coordinates <- slot( + object = object, name = 'coordinates')[, c('imagerow', 'imagecol')] + scale <- match.arg( + arg = scale, choices = c('spot', 'fiducial', 'hires', 'lowres')) scale.use <- ScaleFactors(object = object)[[scale]] coordinates <- coordinates * scale.use } else { @@ -1670,20 +1679,22 @@ GetTissueCoordinates.VisiumV1 <- function( #' @seealso \code{\link[SeuratObject]{HVFInfo}} #' #' @examples +#' \dontrun{ #' # Get the HVF info directly from an SCTAssay object #' pbmc_small <- SCTransform(pbmc_small) -#' HVFInfo(pbmc_small[["SCT"]], selection.method = 'sct')[1:5, ] +#' HVFInfo(pbmc_small[["SCT"]], method = 'sct')[1:5, ] +#' } #' -HVFInfo.SCTAssay <- function(object, selection.method, status = FALSE, ...) { +HVFInfo.SCTAssay <- function(object, method, status = FALSE, ...) { CheckDots(...) disp.methods <- c('mean.var.plot', 'dispersion', 'disp') - if (tolower(x = selection.method) %in% disp.methods) { - selection.method <- 'mvp' + if (tolower(x = method) %in% disp.methods) { + method <- 'mvp' } - selection.method <- switch( - EXPR = tolower(x = selection.method), + method <- switch( + EXPR = tolower(x = method), 'sctransform' = 'sct', - selection.method + method ) vars <- c('gmean', 'variance', 'residual_variance') hvf.info <- SCTResults(object = object, slot = "feature.attributes")[,vars] @@ -1924,7 +1935,7 @@ SCTResults.Seurat <- function(object, assay = "SCT", slot, model = NULL, ...) { #' @method VariableFeatures SCTModel #' @export #' -VariableFeatures.SCTModel <- function(object, nfeatures = 3000, ...) { +VariableFeatures.SCTModel <- function(object, selection.method = NULL, nfeatures = 3000, ...) { if (!is_scalar_integerish(x = nfeatures) || (!is_na(x = nfeatures < 1L) && nfeatures < 1L)) { abort(message = "'nfeatures' must be a single positive integer") } @@ -1944,6 +1955,7 @@ VariableFeatures.SCTModel <- function(object, nfeatures = 3000, ...) { #' VariableFeatures.SCTAssay <- function( object, + selection.method = NULL, layer = NULL, nfeatures = NULL, simplify = TRUE, @@ -1956,8 +1968,8 @@ VariableFeatures.SCTAssay <- function( if (is.null(x = layer)) { layer <- levels(x = object) } - if (simplify == TRUE & use.var.features == TRUE & length(var.features.existing)>=nfeatures){ - return (head(x = var.features.existing, n = nfeatures)) + if (simplify == TRUE & use.var.features == TRUE & length(var.features.existing) >= nfeatures){ + return(head(x = var.features.existing, n = nfeatures)) } layer <- match.arg(arg = layer, choices = levels(x = object), several.ok = TRUE) @@ -1976,7 +1988,7 @@ VariableFeatures.SCTAssay <- function( USE.NAMES = TRUE ) if (isFALSE(x = simplify)){ - return (vf.list) + return(vf.list) } var.features <- sort( x = table(unlist(x = vf.list, use.names = FALSE)), @@ -2038,7 +2050,6 @@ ScaleFactors.VisiumV1 <- function(object, ...) { return(slot(object = object, name = 'scale.factors')) } -#' @rdname FetchData #' @method FetchData VisiumV1 #' @export #' @concept spatial @@ -2082,7 +2093,7 @@ FetchData.VisiumV1 <- function( #' @method components SCTAssay #' @export #' -components.SCTAssay <- function(object, model) { +components.SCTAssay <- function(object, model, ...) { model <- rlang::arg_match(arg = model, values = levels(x = object)) return(slot(object = object, name = 'SCTModel.list')[[model]]) } diff --git a/R/preprocessing.R b/R/preprocessing.R index 064565efa..0709c0349 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -363,9 +363,11 @@ HTODemux <- function( #' @seealso \code{\link[sctransform]{get_residuals}} #' #' @examples +#' \dontrun{ #' data("pbmc_small") #' pbmc_small <- SCTransform(object = pbmc_small, variable.features.n = 20) #' pbmc_small <- GetResidual(object = pbmc_small, features = c('MS4A1', 'TCL1A')) +#' } #' GetResidual <- function( object, @@ -4079,6 +4081,7 @@ FindSpatiallyVariableFeatures.Seurat <- function( LogNormalize.data.frame <- function( data, scale.factor = 1e4, + margin = 2L, verbose = TRUE, ... ) { @@ -4097,6 +4100,7 @@ LogNormalize.data.frame <- function( LogNormalize.V3Matrix <- function( data, scale.factor = 1e4, + margin = 2L, verbose = TRUE, ... ) { diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 42a7c456f..9e933bd51 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -241,9 +241,9 @@ FindSpatiallyVariableFeatures.StdAssay <- function( #' @rdname LogNormalize #' @method LogNormalize default -#' +#' #' @param margin Margin to normalize over -#' +#' #' @export #' LogNormalize.default <- function( @@ -280,140 +280,6 @@ LogNormalize.default <- function( return(data) } -#' @method LogNormalize DelayedMatrix -#' @export -#' -LogNormalize.DelayedMatrix <- function( - data, - scale.factor = 1e4, - margin = 2L, - verbose = TRUE, - sink = NULL, - ... -) { - check_installed( - pkg = 'DelayedArray', - reason = 'for working with delayed matrices' - ) - if (is.null(x = sink)) { - sink <- DelayedArray::AutoRealizationSink( - dim = dim(x = data), - dimnames = dimnames(x = data), - as.sparse = DelayedArray::is_sparse(x = data) - ) - } - if (!inherits(x = sink, what = 'RealizationSink')) { - abort(message = "'sink' must be a RealizationSink") - } else if (inherits(x = sink, what = 'arrayRealizationSink')) { - # arrayRealizationSinks do not support write_block with rowAutoGrid or colAutoGrid - # Because of course they don't - abort(message = "Array RealizationSinks are not supported due to issues with {DelayedArray}") - } else if (!all(dim(x = sink) == dim(x = data))) { - abort(message = "'sink' must be the same size as 'data'") - } - if (!margin %in% c(1L, 2L)) { - abort(message = "'margin' must be 1 or 2") - } - grid <- if (margin == 1L) { - DelayedArray::rowAutoGrid(x = data) - } else { - DelayedArray::colAutoGrid(x = data) - } - sparse <- DelayedArray::is_sparse(x = data) - if (isTRUE(x = verbose)) { - pb <- txtProgressBar(file = stderr(), style = 3) - } - for (i in seq_len(length.out = length(x = grid))) { - vp <- grid[[i]] - x <- DelayedArray::read_block(x = data, viewport = vp, as.sparse = sparse) - if (isTRUE(x = sparse)) { - x <- as(object = x, Class = 'dgCMatrix') - } - x <- LogNormalize( - data = x, - scale.factor = scale.factor, - margin = margin, - verbose = FALSE, - ... - ) - DelayedArray::write_block(sink = sink, viewport = vp, block = x) - if (isTRUE(x = verbose)) { - setTxtProgressBar(pb = pb, value = i / length(x = grid)) - } - } - if (isTRUE(x = verbose)) { - close(con = pb) - } - DelayedArray::close(con = sink) - return(as(object = sink, Class = "DelayedArray")) -} - -#' @method LogNormalize H5ADMatrix -#' @export -#' -LogNormalize.H5ADMatrix <- function( - data, - scale.factor = 1e4, - margin = 2L, - verbose = TRUE, - layer = 'data', - ... -) { - results <- LogNormalize.HDF5Matrix( - data = data, - scale.factor = scale.factor, - margin = margin, - verbose = verbose, - layer = file.path('layers', layer, fsep = '/'), - ... - ) - rpath <- slot(object = slot(object = results, name = 'seed'), name = 'filepath') - return(HDF5Array::H5ADMatrix(filepath = rpath, layer = layer)) -} - -#' @method LogNormalize HDF5Matrix -#' @importFrom rhdf5 h5delete -#' @importFrom DelayedArray path is_sparse -#' @importFrom HDF5Array HDF5RealizationSink -#' @export -#' -LogNormalize.HDF5Matrix <- function( - data, - scale.factor = 1e4, - margin = 2L, - verbose = TRUE, - layer = 'data', - ... -) { - check_installed(pkg = 'HDF5Array', reason = 'for working with HDF5 matrices') - fpath <- path(object = data) - if (.DelayedH5DExists(object = data, path = layer)) { - h5delete(file = fpath, name = layer) - dpath <- file.path( - dirname(path = layer), - paste0('.', basename(layer), '_dimnames'), - fsep = '/' - ) - h5delete(file = fpath, name = dpath) - } - sink <- HDF5RealizationSink( - dim = dim(x = data), - dimnames = dimnames(x = data), - as.sparse = is_sparse(x = data), - filepath = fpath, - name = layer - ) - return(LogNormalize.DelayedMatrix( - data = data, - scale.factor = scale.factor, - margin = margin, - verbose = verbose, - sink = sink, - ... - )) -} - - #' @method LogNormalize IterableMatrix #' @export #' @@ -430,93 +296,6 @@ LogNormalize.IterableMatrix <- function( return(data) } -#' @method LogNormalize TileDBMatrix -#' @importFrom TileDBArray TileDBRealizationSink -#' @export -#' -LogNormalize.TileDBMatrix <- function( - data, - scale.factor = 1e4, - margin = 2L, - verbose= TRUE, - layer = 'data', - ... -) { - check_installed( - pkg = "TileDBArray", - reason = "for working with TileDB matrices" - ) - odir <- c( - dirname(path = DelayedArray::path(object = data)), - getwd(), - tempdir(check = TRUE) - ) - # file.access returns 0 (FALSE) for true and -1 (TRUE) for false - idx <- which(x = !file.access(names = odir, mode = 2L))[1L] - if (rlang::is_na(x = idx)) { - abort( - message = "Unable to find a directory to write normalized TileDB matrix") - } - out <- file.path(odir[idx], layer) - if (!file.access(names = out, mode = 0L)) { - warn(message = paste(sQuote(x = out), "exists, overwriting")) - unlink(x = out, recursive = TRUE, force = TRUE) - } - sink <- TileDBRealizationSink( - dim = dim(x = data), - dimnames = dimnames(x = data), - type = BiocGenerics::type(x = data), - path = out, - attr = layer, - sparse = DelayedArray::is_sparse(x = data) - ) - return(NextMethod( - generic = 'LogNormalize', - object = data, - scale.factor = scale.factor, - margin = margin, - verbose = verbose, - sink = sink, - ... - )) -} - -#' @method LogNormalize TENxMatrix -#' @export -#' @importFrom HDF5Array TENxRealizationSink -#' @importFrom rhdf5 h5delete -#' @importFrom DelayedArray path -#' -LogNormalize.TENxMatrix <- function( - data, - scale.factor = 1e4, - margin = 2L, - verbose = TRUE, - layer = 'data', - ... -) { - check_installed(pkg = 'HDF5Array', reason = 'for working with HDF5 matrices') - fpath <- DelayedArray::path(object = data) - if (.DelayedH5DExists(object = data, path = layer)) { - rhdf5::h5delete(file = fpath, name = layer) - } - sink <- HDF5Array::TENxRealizationSink( - dim = dim(x = data), - dimnames = dimnames(x = data), - filepath = fpath, - group = layer - ) - return(NextMethod( - generic = 'LogNormalize', - object = data, - scale.factor = scale.factor, - margin = margin, - verbose = verbose, - sink = sink, - ... - )) -} - #' @importFrom SeuratObject IsSparse #' #' @method NormalizeData default @@ -580,50 +359,6 @@ NormalizeData.default <- function( return(normalized) } -.DelayedH5DExists <- function(object, path) { - check_installed(pkg = 'HDF5Array', reason = 'for working with HDF5 files') - if (!inherits(x = object, what = c('HDF5Array', 'H5ADMatrix', 'TENxMatrix'))) { - abort(message = "'object' must be an HDF5Array or H5ADMatrix") - } - on.exit(expr = rhdf5::h5closeAll(), add = TRUE) - fpath <- DelayedArray::path(object = object) - h5loc <- rhdf5::H5Fopen( - name = fpath, - flags = 'H5F_ACC_RDWR', - fapl = NULL, - native = FALSE - ) - return(rhdf5::H5Lexists(h5loc = h5loc, name = path)) -} - -# #' @method NormalizeData DelayedArray -# #' @export -# #' -# NormalizeData.DelayedArray <- function( -# object, -# method = c('LogNormalize'), -# scale.factor = 1e4, -# cmargin = 2L, -# margin = 1L, -# layer = 'data', -# verbose = TRUE, -# ... -# ) { -# method <- arg_match(arg = method) -# normalized <- switch( -# EXPR = method, -# LogNormalize = LogNormalize( -# data = object, -# scale.factor = scale.factor, -# margin = 2L, -# verbose = TRUE, -# layer = layer, -# ... -# ) -# ) -# return(normalized) -# } - #' @importFrom SeuratObject Cells DefaultLayer DefaultLayer<- Features #' LayerData LayerData<- #' @@ -850,84 +585,6 @@ VST.IterableMatrix <- function( return(hvf.info) } -#' @method VST DelayedMatrix -#' @export -#' -VST.DelayedMatrix <- function( - data, - margin = 2L, - nselect = 2000L, - span = 0.3, - clip = NULL, - verbose = TRUE, - block.size = 1e8, - ... -) { - check_installed( - pkg = 'DelayedArray', - reason = 'for working with delayed matrices' - ) - if (!margin %in% c(1L, 2L)) { - abort(message = "'margin' must be 1 or 2") - } - nfeatures <- dim(x = data)[-margin] - ncells <- dim(x = data)[margin] - hvf.info <- SeuratObject::EmptyDF(n = nfeatures) - hvf.info$mean <- RowMeanDelayedAssay(x = data, block.size = block.size) - # Calculate feature variance - hvf.info$variance <- RowVarDelayedAssay(x = data, block.size = block.size) - hvf.info$variance.expected <- 0L - not.const <- hvf.info$variance > 0 - fit <- loess( - formula = log10(x = variance) ~ log10(x = mean), - data = hvf.info[not.const, , drop = TRUE], - span = span - ) - hvf.info$variance.expected[not.const] <- 10 ^ fit$fitted - - suppressMessages(setAutoBlockSize(size = block.size)) - grid <- if (margin == 1L) { - DelayedArray::rowAutoGrid(x = data) - } else { - DelayedArray::colAutoGrid(x = data) - } - sparse <- DelayedArray::is_sparse(x = data) - if (sparse) { - sweep.func <- SweepSparse - rowsum.func <- RowSumSparse - } else { - sweep.func <- sweep - rowsum.func <- rowSums2 - } - var_stand.list <- list() - for (i in seq_len(length.out = length(x = grid))) { - vp <- grid[[i]] - block <- DelayedArray::read_block(x = data, viewport = vp, as.sparse = sparse) - block <- as(object = block, Class = 'dgCMatrix') - block.stat <- SparseRowVarStd(mat = block, - mu = hvf.info$mean, - sd = sqrt(hvf.info$variance.expected), - vmax = clip %||% sqrt(x = ncol(x = data)), - display_progress = FALSE) - - var_stand.list[[i]] <- block.stat * (ncol(block) - 1) - } - hvf.info$variance.standardized <- Reduce(f = '+', x = var_stand.list)/ - (ncol(data) - 1) - # Set variable features - hvf.info$variable <- FALSE - hvf.info$rank <- NA - vf <- head( - x = order(hvf.info$variance.standardized, decreasing = TRUE), - n = nselect - ) - hvf.info$variable[vf] <- TRUE - hvf.info$rank[vf] <- seq_along(along.with = vf) - rownames(hvf.info) <- rownames(data) - - return(hvf.info) -} - #' @importFrom Matrix rowMeans #' #' @rdname VST @@ -1013,6 +670,13 @@ VST.matrix <- function( #' Calculate dispersion of features #' +#' @param object Data matrix +#' @param mean.function Function to calculate mean +#' @param dispersion.function Function to calculate dispersion +#' @param num.bin Number of bins to use +#' @param binning.method Method to use for binning. Options are 'equal_width' or 'equal_frequency' +#' @param verbose Display progress +#' @keywords internal #' CalcDispersion <- function( object, @@ -1024,7 +688,8 @@ CalcDispersion <- function( ... ) { if (!inherits(x = object, what = c('dgCMatrix', 'matrix'))) { - stop('mean.var.plot and dispersion methods only support dense and sparse matrix input') + stop('mean.var.plot and dispersion methods only \ + support dense and sparse matrix input') } if (inherits(x = object, what = 'matrix')) { object <- as.sparse(x = object) @@ -1032,7 +697,8 @@ CalcDispersion <- function( feature.mean <- mean.function(object, verbose) feature.dispersion <- dispersion.function(object, verbose) - names(x = feature.mean) <- names(x = feature.dispersion) <- rownames(x = object) + names(x = feature.mean) <- names( + x = feature.dispersion) <- rownames(x = object) feature.dispersion[is.na(x = feature.dispersion)] <- 0 feature.mean[is.na(x = feature.mean)] <- 0 data.x.breaks <- switch( @@ -1054,27 +720,29 @@ CalcDispersion <- function( feature.dispersion.scaled <- (feature.dispersion - mean.y[as.numeric(x = data.x.bin)]) / sd.y[as.numeric(x = data.x.bin)] names(x = feature.dispersion.scaled) <- names(x = feature.mean) - hvf.info <- data.frame(feature.mean, feature.dispersion, feature.dispersion.scaled) + hvf.info <- data.frame( + feature.mean, feature.dispersion, feature.dispersion.scaled) rownames(x = hvf.info) <- rownames(x = object) - colnames(x = hvf.info) <- paste0('mvp.', c('mean', 'dispersion', 'dispersion.scaled')) + colnames(x = hvf.info) <- paste0( + 'mvp.', c('mean', 'dispersion', 'dispersion.scaled')) return(hvf.info) } #' @importFrom SeuratObject .CalcN #' -CalcN <- function(object) { - return(.CalcN(object)) +CalcN <- function(object, ...) { + return(.CalcN(object, ...)) } #' @method .CalcN IterableMatrix #' @export #' -.CalcN.IterableMatrix <- function(object) { +.CalcN.IterableMatrix <- function(object, ...) { col_stat <- BPCells::matrix_stats(matrix = object, col_stats = 'mean')$col_stats return(list( - nCount = round(col_stat['mean',] *nrow(object)), - nFeature = col_stat['nonzero',] + nCount = round(col_stat['mean', ] * nrow(object)), + nFeature = col_stat['nonzero', ] )) } @@ -1393,7 +1061,6 @@ DISP <- function( ################################# SCTransform ################################## ################################################################################ - #' @importFrom SeuratObject Cells DefaultLayer DefaultLayer<- Features #' LayerData LayerData<- #' @@ -1420,13 +1087,20 @@ SCTransform.StdAssay <- function( verbose = TRUE, ... ) { - if (!is.null(reference.SCT.model)){ + check_installed( + pkg = "DelayedArray", + reason = "for running SCTransform on v5 assays" + ) + if (!is.null(reference.SCT.model)) { do.correct.umi <- FALSE do.center <- FALSE } olayer <- layer <- unique(x = layer) layers <- Layers(object = object, search = layer) - dataset.names <- gsub(pattern = paste0(layer, "."), replacement = "", x = layers) + dataset.names <- gsub( + pattern = paste0(layer, "."), + replacement = "", + x = layers) sct.assay.list <- list() for (dataset.index in seq_along(along.with = layers)) { l <- layers[dataset.index] @@ -1443,38 +1117,45 @@ SCTransform.StdAssay <- function( ) sparse <- DelayedArray::is_sparse(x = counts) ## Sample cells - cells.grid <- DelayedArray::colAutoGrid(x = counts, ncol = min(ncells, ncol(counts))) + cells.grid <- DelayedArray::colAutoGrid( + x = counts, ncol = min(ncells, ncol(counts))) # if there is no reference model we randomly select a subset of cells # TODO: randomize this set of cells variable.feature.list <- list() - GetSCT.Chunked <- function(vp, reference.SCT.model = NULL, do.correct.umi = TRUE){ + GetSCT.Chunked <- function( + vp, + reference.SCT.model = NULL, + do.correct.umi = TRUE + ) { # counts here is global - block <- DelayedArray::read_block(x = counts, - viewport = vp, - as.sparse = sparse) + block <- DelayedArray::read_block( + x = counts, + viewport = vp, + as.sparse = sparse) counts.chunk <- as(object = block, Class = 'dgCMatrix') cell.attr.object <- cell.attr[colnames(x = counts.chunk),, drop=FALSE] if (!identical(rownames(cell.attr.object), colnames(counts.chunk))) { stop("cell attribute row names must match column names of count matrix") } - vst.out <- SCTransform(object = counts.chunk, - cell.attr = cell.attr.object, - reference.SCT.model = reference.SCT.model, - do.correct.umi = do.correct.umi, - ncells = ncells, - residual.features = residual.features, - variable.features.n = variable.features.n, - variable.features.rv.th = variable.features.rv.th, - vars.to.regress = vars.to.regress, - do.scale = FALSE, - do.center = FALSE, - clip.range = clip.range, - conserve.memory = conserve.memory, - return.only.var.genes = return.only.var.genes, - seed.use = seed.use, - verbose = FALSE, - ...) + vst.out <- SCTransform( + object = counts.chunk, + cell.attr = cell.attr.object, + reference.SCT.model = reference.SCT.model, + do.correct.umi = do.correct.umi, + ncells = ncells, + residual.features = residual.features, + variable.features.n = variable.features.n, + variable.features.rv.th = variable.features.rv.th, + vars.to.regress = vars.to.regress, + do.scale = FALSE, + do.center = FALSE, + clip.range = clip.range, + conserve.memory = conserve.memory, + return.only.var.genes = return.only.var.genes, + seed.use = seed.use, + verbose = FALSE, + ...) residual.type <- vst.out[['residual_type']] %||% 'pearson' sct.method <- vst.out[['sct.method']] # create output assay and put (corrected) umi counts in count slot @@ -1510,15 +1191,22 @@ SCTransform.StdAssay <- function( Misc(object = assay.out, slot = 'vst.out') <- vst.out assay.out <- as(object = assay.out, Class = "SCTAssay") # does not like character(0) keys being merged - return (assay.out) + return(assay.out) } local.reference.SCT.model <- NULL if (is.null(reference.SCT.model)){ # No reference model so just select the some block of cells set.seed(seed = seed.use) - selected.block <- sample(x = seq.int(from = 1, to = length(cells.grid)), size = 1) + selected.block <- sample( + x = seq.int(from = 1, to = length(cells.grid)), + size = 1) if (verbose){ - message("Using block ", selected.block, " from ", dataset.names[[dataset.index]], " to learn model.") + message( + "Using block ", + selected.block, + " from ", + dataset.names[[dataset.index]], + " to learn model.") } vp <- cells.grid[[selected.block]] @@ -1557,14 +1245,18 @@ SCTransform.StdAssay <- function( for (i in seq_len(length.out = length(x = cells.grid))) { vp <- cells.grid[[i]] if (verbose){ - message("Getting residuals for block ", i, "(of ", length(cells.grid), ") for ", dataset.names[[dataset.index]], " dataset") + message( + "Getting residuals for block ", + i, "(of ", length(cells.grid), ") for ", + dataset.names[[dataset.index]], " dataset") } - block <- DelayedArray::read_block(x = counts, - viewport = vp, - as.sparse = TRUE) + block <- DelayedArray::read_block( + x = counts, + viewport = vp, + as.sparse = TRUE) counts.vp <- as(object = block, Class = 'dgCMatrix') - cell.attr.object <- cell.attr[colnames(x = counts.vp),, drop=FALSE] + cell.attr.object <- cell.attr[colnames(x = counts.vp), , drop = FALSE] vst_out <- vst_out.reference cell_attr <- data.frame( umi = colSums(counts.vp), @@ -1576,27 +1268,27 @@ SCTransform.StdAssay <- function( if (return.only.var.genes){ new_residual <- get_residuals( vst_out = vst_out, - umi = counts.vp[variable.features,], + umi = counts.vp[variable.features, ], residual_type = "pearson", min_variance = min_var, res_clip_range = res_clip_range, - verbosity = FALSE#as.numeric(x = verbose) * 2 + verbosity = FALSE # as.numeric(x = verbose) * 2 ) } else { new_residual <- get_residuals( vst_out = vst_out, - umi = counts.vp[all.features,], + umi = counts.vp[all.features, ], residual_type = "pearson", min_variance = min_var, res_clip_range = res_clip_range, - verbosity = FALSE#as.numeric(x = verbose) * 2 + verbosity = FALSE # as.numeric(x = verbose) * 2 ) } vst_out$y <- new_residual corrected_counts[[i]] <- correct_counts( x = vst_out, umi = counts.vp[all_features,], - verbosity = FALSE# as.numeric(x = verbose) * 2 + verbosity = FALSE # as.numeric(x = verbose) * 2 ) residuals[[i]] <- new_residual cell_attrs[[i]] <- cell_attr @@ -1761,7 +1453,6 @@ SCTransform.StdAssay <- function( return(merged.assay) } - #' Calculate pearson residuals of features not in the scale.data #' #' This function calls sctransform::get_residuals. @@ -1797,16 +1488,17 @@ SCTransform.StdAssay <- function( #' pbmc_small <- SCTransform(object = pbmc_small, variable.features.n = 20) #' pbmc_small <- GetResidual(object = pbmc_small, features = c('MS4A1', 'TCL1A')) #' -FetchResiduals <- function(object, - features, - assay = NULL, - umi.assay = "RNA", - layer = "counts", - clip.range = NULL, - reference.SCT.model = NULL, - replace.value = FALSE, - na.rm = TRUE, - verbose = TRUE) { +FetchResiduals <- function( + object, + features, + assay = NULL, + umi.assay = "RNA", + layer = "counts", + clip.range = NULL, + reference.SCT.model = NULL, + replace.value = FALSE, + na.rm = TRUE, + verbose = TRUE) { assay <- assay %||% DefaultAssay(object = object) if (IsSCT(assay = object[[assay]])) { object[[assay]] <- as(object[[assay]], "SCTAssay") @@ -1853,7 +1545,7 @@ FetchResiduals <- function(object, paste(features.orig, collapse = ", "), call. = FALSE ) - return (NULL) + return(NULL) } #if (length(x = sct.models) > 1 & verbose) { # message("This SCTAssay contains multiple SCT models. Computing residuals for cells using") #} @@ -1925,47 +1617,57 @@ FetchResiduals <- function(object, new.scale <- new.scale[!rowAnyNAs(x = new.scale), ] } - return(new.scale[features,]) + return(new.scale[features, ]) } - -# Calculate pearson residuals of features not in the scale.data -# This function is the secondary function under FetchResiduals -# -# @param object A seurat object -# @param assay Name of the assay of the seurat object generated by SCTransform. Default -# is "SCT" -# @param umi.assay Name of the assay of the seurat object to fetch UMIs from. Default -# is "RNA" -# @param layer Name of the layer under `umi.assay` to fetch UMIs from. Default is -# "counts" -# @param layer.cells Vector of cells to calculate the residual for. Default is NULL -# which uses all cells in the layer -# @param SCTModel Which SCTmodel to use from the object for calculating the residual. -# Will be ignored if reference.SCT.model is set -# @param reference.SCT.model If a reference SCT model should be used for calculating -# the residuals. When set to not NULL, ignores the `SCTModel` paramater. -# @param new_features A vector of features to calculate the residuals for -# @param clip.range Numeric of length two specifying the min and max values the Pearson residual will be clipped to. Useful if you want to change the clip.range. -# @param replace.value Whether to replace the value of residuals if it already exists -# @param verbose Whether to print messages and progress bars -# -# @return Returns a matrix containing centered pearson residuals of added features -# +#' Calculate pearson residuals of features not in the scale.data +#' This function is the secondary function under FetchResiduals +#' +#' @param object A seurat object +#' @param assay Name of the assay of the seurat object generated by +#' SCTransform. Default is "SCT" +#' @param umi.assay Name of the assay of the seurat object to fetch +#' UMIs from. Default is "RNA" +#' @param layer Name of the layer under `umi.assay` to fetch UMIs from. +#' Default is "counts" +#' @param layer.cells Vector of cells to calculate the residual for. +#' Default is NULL which uses all cells in the layer +#' @param SCTModel Which SCTmodel to use from the object for calculating +#' the residual. Will be ignored if reference.SCT.model is set +#' @param reference.SCT.model If a reference SCT model should be used +#' for calculating the residuals. When set to not NULL, ignores the `SCTModel` +#' paramater. +#' @param new_features A vector of features to calculate the residuals for +#' @param clip.range Numeric of length two specifying the min and max values +#' the Pearson residual will be clipped to. Useful if you want to change the +#' clip.range. +#' @param replace.value Whether to replace the value of residuals if it +#' already exists +#' @param verbose Whether to print messages and progress bars +#' +#' @return Returns a matrix containing centered pearson residuals of +#' added features +#' #' @importFrom sctransform get_residuals #' @importFrom Matrix colSums # -FetchResidualSCTModel <- function(object, - assay = "SCT", - umi.assay = "RNA", - layer = "counts", - layer.cells = NULL, - SCTModel = NULL, - reference.SCT.model = NULL, - new_features = NULL, - clip.range = NULL, - replace.value = FALSE, - verbose = FALSE) { +FetchResidualSCTModel <- function( + object, + assay = "SCT", + umi.assay = "RNA", + layer = "counts", + layer.cells = NULL, + SCTModel = NULL, + reference.SCT.model = NULL, + new_features = NULL, + clip.range = NULL, + replace.value = FALSE, + verbose = FALSE +) { + check_installed( + pkg = "DelayedArray", + reason = "for running SCTransform on v5 assays" + ) model.cells <- character() model.features <- Features(x = object, assay = assay) if (is.null(x = reference.SCT.model)){ @@ -1988,15 +1690,6 @@ FetchResidualSCTModel <- function(object, scale.data.cells.common <- intersect(scale.data.cells, layer.cells) scale.data.cells <- intersect(x = scale.data.cells, y = scale.data.cells.common) if (length(x = setdiff(x = layer.cells, y = scale.data.cells)) == 0) { - # existing.scale.data <- suppressWarnings(GetAssayData(object = object, assay = assay, slot = "scale.data")) - #full.scale.data <- matrix(data = NA, nrow = nrow(x = existing.scale.data), - # ncol = length(x = layer.cells), dimnames = list(rownames(x = existing.scale.data), layer.cells)) - #full.scale.data[rownames(x = existing.scale.data), colnames(x = existing.scale.data)] <- existing.scale.data - #existing_features <- names(x = which(x = !apply( - # X = full.scale.data, - # MARGIN = 1, - # FUN = anyNA - #))) existing_features <- rownames(x = existing.scale.data) } else { existing_features <- character() @@ -2006,8 +1699,8 @@ FetchResidualSCTModel <- function(object, } else { features_to_compute <- setdiff(x = new_features, y = existing_features) } - if (length(features_to_compute)<1){ - return (existing.scale.data[intersect(x = rownames(x = scale.data.cells), y = new_features),,drop=FALSE]) + if (length(features_to_compute) < 1) { + return (existing.scale.data[intersect(x = rownames(x = scale.data.cells), y = new_features), , drop=FALSE]) } if (is.null(x = reference.SCT.model) & length(x = setdiff(x = model.cells, y = scale.data.cells)) == 0) { @@ -2065,7 +1758,6 @@ FetchResidualSCTModel <- function(object, ) # iterate over 2k cells at once - #cells.grid <- DelayedArray::colAutoGrid(x = counts, ncol = min(2000, length(x = layer.cells))) cells.grid <- DelayedArray::colAutoGrid(x = counts, ncol = length(x = layer.cells)) new_residuals <- list() @@ -2185,16 +1877,30 @@ FetchResidualSCTModel <- function(object, return(new_residual) } +#' Calculate pearson residuals of features not in the scale.data +#' +#' This function calls sctransform::get_residuals. +#' +#' @param object A seurat object +#' @param reference.SCT.model SCTModel object to use for calculating residuals +#' @param features Name of features to add into the scale.data +#' @param nCount_UMI UMI Counts per cell provided to vst +#' @param verbose Whether to print messages and progress bars +#' +#' @return Returns a Seurat object containing Pearson residuals of added +#' features in its scale.data +#' #' temporal function to get residuals from reference #' @importFrom sctransform get_residuals #' @importFrom Matrix colSums #' - -FetchResiduals_reference <- function(object, - reference.SCT.model = NULL, - features = NULL, - nCount_UMI = NULL, - verbose = FALSE) { +FetchResiduals_reference <- function( + object, + reference.SCT.model = NULL, + features = NULL, + nCount_UMI = NULL, + verbose = FALSE +) { ## Add cell_attr for missing cells nCount_UMI <- nCount_UMI %||% colSums(object) cell_attr <- data.frame( diff --git a/R/sketching.R b/R/sketching.R index 87746f902..057efb2de 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -466,72 +466,6 @@ LeverageScore.default <- function( return(Z.score) } -#' @rdname LeverageScore -#' @importFrom Matrix qrR t -#' @importFrom DelayedArray setAutoBlockSize -#' @method LeverageScore DelayedMatrix -#' @export -#' -LeverageScore.DelayedMatrix <- function( - object, - nsketch = 5000L, - ndims = NULL, - method = CountSketch, - eps = 0.5, - seed = 123L, - block.size = 1e8, - verbose = TRUE, - ... -) { - check_installed( - pkg = 'DelayedArray', - reason = 'for working with delayed matrices' - ) - if (!is_quosure(x = method)) { - method <- enquo(arg = method) - } - sa <- SketchMatrixProd(object = object, - block.size = block.size, - nsketch = nsketch, - method = method, - ...) - qr.sa <- base::qr(x = sa) - R <- if (inherits(x = qr.sa, what = 'sparseQR')) { - qrR(qr = qr.sa) - } else { - base::qr.R(qr = qr.sa) - } - if (length(x = which(x = diag(x = R) == 0))> 0) { - warning("not all features are variable features") - var.index <- which(x = diag(x = R) != 0) - R <- R[var.index, var.index] - } - R.inv <- as.sparse(x = backsolve(r = R, x = diag(x = ncol(x = R)))) - JL <- as.sparse(x = JLEmbed( - nrow = ncol(x = R.inv), - ncol = ndims, - eps = eps, - seed = seed - )) - RP.mat <- R.inv %*% JL - sparse <- DelayedArray::is_sparse(x = object) - suppressMessages(setAutoBlockSize(size = block.size)) - cells.grid <- DelayedArray::colAutoGrid(x = object) - norm.list <- list() - for (i in seq_len(length.out = length(x = cells.grid))) { - vp <- cells.grid[[i]] - block <- DelayedArray::read_block(x = object, viewport = vp, as.sparse = sparse) - if (sparse) { - block <- as(object = block, Class = 'dgCMatrix') - } else { - block <- as(object = block, Class = 'Matrix') - } - norm.list[[i]] <- colSums(x = as.matrix(t(RP.mat) %*% block[1:ncol(R),]) ^ 2) - } - scores <- unlist(norm.list) - return(scores) -} - #' @rdname LeverageScore #' @method LeverageScore StdAssay #' @@ -746,47 +680,6 @@ JLEmbed <- function(nrow, ncol, eps = 0.1, seed = NA_integer_, method = "li") { return(m) } - - -SketchMatrixProd <- function( - object, - method = CountSketch, - block.size = 1e9, - nsketch = 5000L, - seed = 123L, - ...) { - - if (is_quosure(x = method)) { - method <- eval( - expr = quo_get_expr(quo = method), - envir = quo_get_env(quo = method) - ) - } - if (is.character(x = method)) { - method <- match.fun(FUN = method) - } - stopifnot(is.function(x = method)) - sparse <- DelayedArray::is_sparse(x = object) - suppressMessages(setAutoBlockSize(size = block.size)) - cells.grid <- DelayedArray::colAutoGrid(x = object) - SA.mat <- matrix(data = 0, nrow = nsketch, ncol = nrow(object)) - for (i in seq_len(length.out = length(x = cells.grid))) { - vp <- cells.grid[[i]] - block <- DelayedArray::read_block(x = object, viewport = vp, as.sparse = sparse) - - if (sparse) { - block <- as(object = block, Class = 'dgCMatrix') - } else { - block <- as(object = block, Class = 'Matrix') - } - ncells.block <- ncol(block) - S.block <- method(nsketch = nsketch, ncells = ncells.block, seed = seed, ...) - SA.mat <- SA.mat + as.matrix(S.block %*% t(block)) - } - return(SA.mat) -} - - #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # S4 Methods #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/R/tree.R b/R/tree.R index e27595183..d2d733ea8 100644 --- a/R/tree.R +++ b/R/tree.R @@ -49,13 +49,15 @@ cluster.ape <- paste( #' @concept tree #' #' @examples +#' \dontrun{ #' if (requireNamespace("ape", quietly = TRUE)) { #' data("pbmc_small") #' pbmc_small #' pbmc_small <- BuildClusterTree(object = pbmc_small) #' Tool(object = pbmc_small, slot = 'BuildClusterTree') #' } -#' +#' } +#' BuildClusterTree <- function( object, assay = NULL, diff --git a/R/utilities.R b/R/utilities.R index 2079c11a8..256d95587 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -352,9 +352,11 @@ AddModuleScore <- function( #' @concept utilities #' #' @examples +#' \dontrun{ #' data("pbmc_small") #' head(AggregateExpression(object = pbmc_small)) -#' +#' } +#' AggregateExpression <- function( object, assays = NULL, @@ -407,6 +409,7 @@ AggregateExpression <- function( #' (very useful if you want to observe cluster pseudobulk values, separated by replicate, for example) #' @param slot Slot(s) to use; if multiple slots are given, assumed to follow #' the order of 'assays' (if specified) or object's assays +#' @param method Method of collapsing expression values. Either 'average' or 'aggregate' #' @param verbose Print messages and show progress bar #' @param ... Arguments to be passed to methods such as \code{\link{CreateSeuratObject}} #' @@ -1368,7 +1371,7 @@ PseudobulkExpression.Assay <- function( ... ) { data.use <- GetAssayData( - object = object, + object = object, slot = slot ) features.to.avg <- features %||% rownames(x = data.use) @@ -1400,9 +1403,7 @@ PseudobulkExpression.Assay <- function( } } data.return <- data.use %*% category.matrix - return(data.return) - - + return(data.return) } #' @method PseudobulkExpression StdAssay @@ -1449,13 +1450,13 @@ PseudobulkExpression.StdAssay <- function( ) ) for (i in seq_along(layers.set)) { - data.i <- LayerData(object = object, - layer = layers.set[i], - features = features.assay - ) + data.i <- LayerData( + object = object, + layer = layers.set[i], + features = features.assay) category.matrix.i <- category.matrix[colnames(x = data.i),] if (inherits(x = data.i, what = 'DelayedArray')) { - data.return.i<- tcrossprod_DelayedAssay(x = data.i, y = t(category.matrix.i)) + stop("PseudobulkExpression does not support DelayedArray objects") } else { data.return.i <- as.sparse(x = data.i %*% category.matrix.i) } @@ -2550,39 +2551,6 @@ ToNumeric <- function(x){ return(x) } - - -# cross product from delayed array -# -crossprod_DelayedAssay <- function(x, y, block.size = 1e8) { - # perform t(x) %*% y in blocks for y - if (!inherits(x = y, 'DelayedMatrix')) { - stop('y should a DelayedMatrix') - } - if (nrow(x) != nrow(y)) { - stop('row of x and y should be the same') - } - sparse <- DelayedArray::is_sparse(x = y) - suppressMessages(setAutoBlockSize(size = block.size)) - cells.grid <- DelayedArray::colAutoGrid(x = y) - product.list <- list() - for (i in seq_len(length.out = length(x = cells.grid))) { - vp <- cells.grid[[i]] - block <- DelayedArray::read_block(x = y, viewport = vp, as.sparse = sparse) - if (sparse) { - block <- as(object = block, Class = 'dgCMatrix') - } else { - block <- as(object = block, Class = 'Matrix') - } - product.list[[i]] <- as.matrix(t(x) %*% block) - } - product.mat <- matrix(data = unlist(product.list), nrow = ncol(x) , ncol = ncol(y)) - colnames(product.mat) <- colnames(y) - rownames(product.mat) <- colnames(x) - return(product.mat) -} - - # cross product from BPCells # crossprod_BPCells <- function(x, y) { @@ -2593,134 +2561,6 @@ crossprod_BPCells <- function(x, y) { return(product.mat) } -# transpose cross product from delayed array -# -tcrossprod_DelayedAssay <- function(x, y, block.size = 1e8) { - # perform x %*% t(y) in blocks for x - if (!inherits(x = x, 'DelayedMatrix')) { - stop('y should a DelayedMatrix') - } - if (ncol(x) != ncol(y)) { - stop('column of x and y should be the same') - } - sparse <- DelayedArray::is_sparse(x = x) - suppressMessages(setAutoBlockSize(size = block.size)) - cells.grid <- DelayedArray::colAutoGrid(x = x) - product.list <- list() - for (i in seq_len(length.out = length(x = cells.grid))) { - vp <- cells.grid[[i]] - vp.range <- vp@ranges[2]@start : (vp@ranges[2]@start + vp@ranges[2]@width - 1) - block <- DelayedArray::read_block(x = x, viewport = vp, as.sparse = sparse) - if (sparse) { - block <- as(object = block, Class = 'dgCMatrix') - } else { - block <- as(object = block, Class = 'Matrix') - } - product.list[[i]] <- as.matrix( block %*% t(y[,vp.range])) - } - product.mat <- Reduce(f = '+', product.list) - colnames(product.mat) <- rownames(y) - rownames(product.mat) <- rownames(x) - return(product.mat) -} - -# cross product row norm from delayed array -# -crossprodNorm_DelayedAssay <- function(x, y, block.size = 1e8) { - # perform t(x) %*% y in blocks for y - if (!inherits(x = y, 'DelayedMatrix')) { - stop('y should a DelayedMatrix') - } - if (nrow(x) != nrow(y)) { - stop('row of x and y should be the same') - } - sparse <- DelayedArray::is_sparse(x = y) - suppressMessages(setAutoBlockSize(size = block.size)) - cells.grid <- DelayedArray::colAutoGrid(x = y) - norm.list <- list() - for (i in seq_len(length.out = length(x = cells.grid))) { - vp <- cells.grid[[i]] - block <- DelayedArray::read_block(x = y, viewport = vp, as.sparse = sparse) - if (sparse) { - block <- as(object = block, Class = 'dgCMatrix') - } else { - block <- as(object = block, Class = 'Matrix') - } - norm.list[[i]] <- colSums(x = as.matrix(t(x) %*% block) ^ 2) - } - norm.vector <- unlist(norm.list) - return(norm.vector) - -} - -# row mean from delayed array -# -RowMeanDelayedAssay <- function(x, block.size = 1e8) { - if (!inherits(x = x, 'DelayedMatrix')) { - stop('input x should a DelayedMatrix') - } - sparse <- DelayedArray::is_sparse(x = x) - if (sparse ) { - row.sum.function <- RowSumSparse - } else { - row.sum.function <- rowSums2 - } - suppressMessages(setAutoBlockSize(size = block.size)) - cells.grid <- DelayedArray::colAutoGrid(x = x) - sum.list <- list() - for (i in seq_len(length.out = length(x = cells.grid))) { - vp <- cells.grid[[i]] - block <- DelayedArray::read_block(x = x, viewport = vp, as.sparse = sparse) - if (sparse) { - block <- as(object = block, Class = 'dgCMatrix') - } else { - block <- as(object = block, Class = 'Matrix') - } - sum.list[[i]] <- row.sum.function(mat = block) - } - mean.mat <- Reduce('+', sum.list) - mean.mat <- mean.mat/ncol(x) - return(mean.mat) -} - -# row variance from delayed array -# -RowVarDelayedAssay <- function(x, block.size = 1e8) { - if (!inherits(x = x, 'DelayedMatrix')) { - stop('input x should a DelayedMatrix') - } - sparse <- DelayedArray::is_sparse(x = x) - if (sparse ) { - row.sum.function <- RowSumSparse - } else { - row.sum.function <- rowSums2 - } - - suppressMessages(setAutoBlockSize(size = block.size)) - cells.grid <- DelayedArray::colAutoGrid(x = x) - sum2.list <- list() - sum.list <- list() - - for (i in seq_len(length.out = length(x = cells.grid))) { - vp <- cells.grid[[i]] - block <- DelayedArray::read_block(x = x, viewport = vp, as.sparse = sparse) - if (sparse) { - block <- as(object = block, Class = 'dgCMatrix') - } else { - block <- as(object = block, Class = 'Matrix') - } - sum2.list[[i]] <- row.sum.function(mat = block**2) - sum.list[[i]] <- row.sum.function(mat = block) - } - sum.mat <- Reduce('+', sum.list) - sum2.mat <- Reduce('+', sum2.list) - var.mat <- sum2.mat/ncol(x) - (sum.mat/ncol(x))**2 - var.mat <- var.mat * ncol(counts) / (ncol(counts) - 1) - return(var.mat) -} - - - # nonzero element version of sweep # SweepNonzero <- function( @@ -2750,10 +2590,15 @@ SweepNonzero <- function( #' Create one hot matrix for a given label +#' +#' @param labels A vector of labels +#' @param method Method to aggregate cells with the same label. Either 'aggregate' or 'average' +#' @param cells.name A vector of cell names +#' #' @importFrom Matrix colSums sparse.model.matrix #' @importFrom stats as.formula #' @export - +#' CreateCategoryMatrix <- function( labels, method = c('aggregate', 'average'), diff --git a/R/visualization.R b/R/visualization.R index 8db6e1b04..c325a47b0 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -701,12 +701,14 @@ VlnPlot <- function( #' @seealso \code{\link{DimPlot}} #' #' @examples +#' \dontrun{ #' if (requireNamespace("ape", quietly = TRUE)) { #' data("pbmc_small") #' pbmc_small <- BuildClusterTree(object = pbmc_small, verbose = FALSE) #' PlotClusterTree(pbmc_small) #' ColorDimSplit(pbmc_small, node = 5) #' } +#' } #' ColorDimSplit <- function( object, @@ -4717,11 +4719,13 @@ JackStrawPlot <- function( #' @concept visualization #' #' @examples +#' \dontrun{ #' if (requireNamespace("ape", quietly = TRUE)) { #' data("pbmc_small") #' pbmc_small <- BuildClusterTree(object = pbmc_small) #' PlotClusterTree(object = pbmc_small) #' } +#' } PlotClusterTree <- function(object, direction = "downwards", ...) { if (!PackageCheck('ape', error = FALSE)) { stop(cluster.ape, call. = FALSE) From 008773dc811e5531128b4c4fc556be2c2b395ce3 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 7 Jul 2023 12:46:56 -0400 Subject: [PATCH 664/979] bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index ce93274ff..6b14df466 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9049 -Date: 2023-06-08 +Version: 4.9.9.9050 +Date: 2023-07-07 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From 1496d1b896997cda88d115fa507caeff4c4ff851 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Fri, 7 Jul 2023 12:49:15 -0400 Subject: [PATCH 665/979] update documentation --- R/differential_expression.R | 1 - R/integration.R | 24 +++++++++++ R/preprocessing5.R | 10 ++--- man/AggregateExpression.Rd | 2 + man/AverageExpression.Rd | 2 + man/BuildClusterTree.Rd | 2 + man/CCAIntegration.Rd | 37 ++++++++++------- man/CalcDispersion.Rd | 14 +++++++ man/ColorDimSplit.Rd | 2 + man/CreateCategoryMatrix.Rd | 7 ++++ man/DietSeurat.Rd | 6 +++ man/FetchResidualSCTModel.Rd | 62 +++++++++++++++++++++++++++++ man/FetchResiduals_reference.Rd | 21 +++++++++- man/FindMarkers.Rd | 2 + man/FoldChange.Rd | 2 + man/GetResidual.Rd | 2 + man/HVFInfo.SCTAssay.Rd | 21 ++-------- man/HarmonyIntegration.Rd | 45 +-------------------- man/JointPCAIntegration.Rd | 35 +++++++++------- man/LeverageScore.Rd | 13 ------ man/LogNormalize.Rd | 10 ++--- man/PlotClusterTree.Rd | 2 + man/ProjectCellEmbeddings.Rd | 31 ++++++--------- man/RPCAIntegration.Rd | 37 ++++++++++------- man/RunCCA.Rd | 2 + man/SCTAssay-class.Rd | 2 + man/SelectIntegrationFeatures5.Rd | 40 +++++++++++++++++++ man/SelectSCTIntegrationFeatures.Rd | 26 ++++++++++++ man/VST.Rd | 10 ++++- 29 files changed, 320 insertions(+), 150 deletions(-) create mode 100644 man/FetchResidualSCTModel.Rd create mode 100644 man/SelectIntegrationFeatures5.Rd create mode 100644 man/SelectSCTIntegrationFeatures.Rd diff --git a/R/differential_expression.R b/R/differential_expression.R index 5db0df637..9e3285ac4 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -2356,7 +2356,6 @@ ValidateCellGroups <- function( #' @importFrom stats wilcox.test #' @importFrom future.apply future_sapply #' @importFrom future nbrOfWorkers -#' @importFrom presto wilcoxauc # # @export # diff --git a/R/integration.R b/R/integration.R index 4a7f1358d..6a945b019 100644 --- a/R/integration.R +++ b/R/integration.R @@ -3019,6 +3019,23 @@ SelectIntegrationFeatures <- function( return(franks) } +#' Select integration features +#' +#' @param object Seurat object +#' @param nfeatures Number of features to return for integration +#' @param assay Name of assay to use for integration feature selection +#' @param method Which method to pull. For \code{HVFInfo} and +#' \code{VariableFeatures}, choose one from one of the +#' following: +#' \itemize{ +#' \item \dQuote{vst} +#' \item \dQuote{sctransform} or \dQuote{sct} +#' \item \dQuote{mean.var.plot}, \dQuote{dispersion}, \dQuote{mvp}, or +#' \dQuote{disp} +#' } +#' @param layers Name of layers to use for integration feature selection +#' @param verbose Print messages +#' #' @export #' SelectIntegrationFeatures5 <- function( @@ -3043,6 +3060,13 @@ SelectIntegrationFeatures5 <- function( return(var.features) } +#' Select SCT integration features +#' +#' @param object Seurat object +#' @param nfeatures Number of features to return for integration +#' @param assay Name of assay to use for integration feature selection +#' @param verbose Print messages +#' #' @export #' SelectSCTIntegrationFeatures <- function( diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 9e933bd51..9c61ae757 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1272,23 +1272,23 @@ SCTransform.StdAssay <- function( residual_type = "pearson", min_variance = min_var, res_clip_range = res_clip_range, - verbosity = FALSE # as.numeric(x = verbose) * 2 + verbosity = FALSE ) } else { new_residual <- get_residuals( vst_out = vst_out, - umi = counts.vp[all.features, ], + umi = counts.vp, residual_type = "pearson", min_variance = min_var, res_clip_range = res_clip_range, - verbosity = FALSE # as.numeric(x = verbose) * 2 + verbosity = FALSE ) } vst_out$y <- new_residual corrected_counts[[i]] <- correct_counts( x = vst_out, - umi = counts.vp[all_features,], - verbosity = FALSE # as.numeric(x = verbose) * 2 + umi = counts.vp[all_features, ], + verbosity = FALSE ) residuals[[i]] <- new_residual cell_attrs[[i]] <- cell_attr diff --git a/man/AggregateExpression.Rd b/man/AggregateExpression.Rd index d54854e8e..e887a50c2 100644 --- a/man/AggregateExpression.Rd +++ b/man/AggregateExpression.Rd @@ -58,8 +58,10 @@ If \code{return.seurat = TRUE} and slot is 'scale.data', the 'counts' slot is le the 'data' slot is filled with NA, and 'scale.data' is set to the aggregated values. } \examples{ +\dontrun{ data("pbmc_small") head(AggregateExpression(object = pbmc_small)) +} } \concept{utilities} diff --git a/man/AverageExpression.Rd b/man/AverageExpression.Rd index f4b2f8ff9..c65c0ef97 100644 --- a/man/AverageExpression.Rd +++ b/man/AverageExpression.Rd @@ -34,6 +34,8 @@ AverageExpression( \item{slot}{Slot(s) to use; if multiple slots are given, assumed to follow the order of 'assays' (if specified) or object's assays} +\item{method}{Method of collapsing expression values. Either 'average' or 'aggregate'} + \item{verbose}{Print messages and show progress bar} \item{...}{Arguments to be passed to methods such as \code{\link{CreateSeuratObject}}} diff --git a/man/BuildClusterTree.Rd b/man/BuildClusterTree.Rd index f8c061167..01d26dd59 100644 --- a/man/BuildClusterTree.Rd +++ b/man/BuildClusterTree.Rd @@ -60,12 +60,14 @@ or PC scores are averaged across all cells in an identity class before the tree is constructed. } \examples{ +\dontrun{ if (requireNamespace("ape", quietly = TRUE)) { data("pbmc_small") pbmc_small pbmc_small <- BuildClusterTree(object = pbmc_small) Tool(object = pbmc_small, slot = 'BuildClusterTree') } +} } \concept{tree} diff --git a/man/CCAIntegration.Rd b/man/CCAIntegration.Rd index 5c528f647..36588745e 100644 --- a/man/CCAIntegration.Rd +++ b/man/CCAIntegration.Rd @@ -22,27 +22,34 @@ CCAIntegration( ) } \arguments{ -\item{assay}{A vector of assay names specifying which assay to use when -constructing anchors. If NULL, the current default assay for each object is -used.} - -\item{reference}{A vector specifying the object/s to be used as a reference -during integration. If NULL (default), all pairwise anchors are found (no -reference/s). If not NULL, the corresponding objects in \code{object.list} -will be used as references. When using a set of specified references, anchors -are first found between each query and each reference. The references are -then integrated through pairwise integration. Each query is then mapped to -the integrated reference.} +\item{object}{A \code{Seurat} object} + +\item{assay}{Name of \code{Assay} in the \code{Seurat} object} + +\item{layers}{Names of layers in \code{assay}} + +\item{orig}{A \link[SeuratObject:DimReduc]{dimensional reduction} to correct} + +\item{new.reduction}{Name of new integrated dimensional reduction} + +\item{reference}{A reference \code{Seurat} object} + +\item{features}{A vector of features to use for integration} \item{normalization.method}{Name of normalization method used: LogNormalize or SCT} -\item{dims}{Which dimensions to use from the CCA to specify the neighbor -search space} +\item{dims}{Dimensions of dimensional reduction to use for integration} + +\item{groups}{A one-column data frame with grouping information} + +\item{k.filter}{Number of anchors to filter} + +\item{scale.layer}{Name of scaled layer in \code{Assay}} -\item{k.filter}{How many neighbors (k) to use when filtering anchors} +\item{verbose}{Print progress} -\item{verbose}{Print progress bars and output} +\item{...}{Additional arguments passed to \code{FindIntegrationAnchors}} } \description{ Seurat-CCA Integration diff --git a/man/CalcDispersion.Rd b/man/CalcDispersion.Rd index 72ca1a6d1..c95d13718 100644 --- a/man/CalcDispersion.Rd +++ b/man/CalcDispersion.Rd @@ -14,6 +14,20 @@ CalcDispersion( ... ) } +\arguments{ +\item{object}{Data matrix} + +\item{mean.function}{Function to calculate mean} + +\item{dispersion.function}{Function to calculate dispersion} + +\item{num.bin}{Number of bins to use} + +\item{binning.method}{Method to use for binning. Options are 'equal_width' or 'equal_frequency'} + +\item{verbose}{Display progress} +} \description{ Calculate dispersion of features } +\keyword{internal} diff --git a/man/ColorDimSplit.Rd b/man/ColorDimSplit.Rd index 1f85d1974..676df098f 100644 --- a/man/ColorDimSplit.Rd +++ b/man/ColorDimSplit.Rd @@ -82,12 +82,14 @@ Returns a DimPlot colored based on whether the cells fall in clusters to the left or to the right of a node split in the cluster tree. } \examples{ +\dontrun{ if (requireNamespace("ape", quietly = TRUE)) { data("pbmc_small") pbmc_small <- BuildClusterTree(object = pbmc_small, verbose = FALSE) PlotClusterTree(pbmc_small) ColorDimSplit(pbmc_small, node = 5) } +} } \seealso{ diff --git a/man/CreateCategoryMatrix.Rd b/man/CreateCategoryMatrix.Rd index 768bd9ef3..c58cf53c6 100644 --- a/man/CreateCategoryMatrix.Rd +++ b/man/CreateCategoryMatrix.Rd @@ -10,6 +10,13 @@ CreateCategoryMatrix( cells.name = NULL ) } +\arguments{ +\item{labels}{A vector of labels} + +\item{method}{Method to aggregate cells with the same label. Either 'aggregate' or 'average'} + +\item{cells.name}{A vector of cell names} +} \description{ Create one hot matrix for a given label } diff --git a/man/DietSeurat.Rd b/man/DietSeurat.Rd index e9360c0aa..d09ee9c38 100644 --- a/man/DietSeurat.Rd +++ b/man/DietSeurat.Rd @@ -35,6 +35,12 @@ remove all Graphs)} \item{misc}{Preserve the \code{misc} slot; default is \code{TRUE}} +\item{counts}{Preserve the count matrices for the assays specified} + +\item{data}{Preserve the data matrices for the assays specified} + +\item{scale.data}{Preserve the scale data matrices for the assays specified} + \item{...}{Ignored} } \value{ diff --git a/man/FetchResidualSCTModel.Rd b/man/FetchResidualSCTModel.Rd new file mode 100644 index 000000000..d12339d63 --- /dev/null +++ b/man/FetchResidualSCTModel.Rd @@ -0,0 +1,62 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/preprocessing5.R +\name{FetchResidualSCTModel} +\alias{FetchResidualSCTModel} +\title{Calculate pearson residuals of features not in the scale.data +This function is the secondary function under FetchResiduals} +\usage{ +FetchResidualSCTModel( + object, + assay = "SCT", + umi.assay = "RNA", + layer = "counts", + layer.cells = NULL, + SCTModel = NULL, + reference.SCT.model = NULL, + new_features = NULL, + clip.range = NULL, + replace.value = FALSE, + verbose = FALSE +) +} +\arguments{ +\item{object}{A seurat object} + +\item{assay}{Name of the assay of the seurat object generated by +SCTransform. Default is "SCT"} + +\item{umi.assay}{Name of the assay of the seurat object to fetch +UMIs from. Default is "RNA"} + +\item{layer}{Name of the layer under `umi.assay` to fetch UMIs from. +Default is "counts"} + +\item{layer.cells}{Vector of cells to calculate the residual for. +Default is NULL which uses all cells in the layer} + +\item{SCTModel}{Which SCTmodel to use from the object for calculating +the residual. Will be ignored if reference.SCT.model is set} + +\item{reference.SCT.model}{If a reference SCT model should be used +for calculating the residuals. When set to not NULL, ignores the `SCTModel` +paramater.} + +\item{new_features}{A vector of features to calculate the residuals for} + +\item{clip.range}{Numeric of length two specifying the min and max values +the Pearson residual will be clipped to. Useful if you want to change the +clip.range.} + +\item{replace.value}{Whether to replace the value of residuals if it +already exists} + +\item{verbose}{Whether to print messages and progress bars} +} +\value{ +Returns a matrix containing centered pearson residuals of +added features +} +\description{ +Calculate pearson residuals of features not in the scale.data +This function is the secondary function under FetchResiduals +} diff --git a/man/FetchResiduals_reference.Rd b/man/FetchResiduals_reference.Rd index 54e9ca09f..fc7a42144 100644 --- a/man/FetchResiduals_reference.Rd +++ b/man/FetchResiduals_reference.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/preprocessing5.R \name{FetchResiduals_reference} \alias{FetchResiduals_reference} -\title{temporal function to get residuals from reference} +\title{Calculate pearson residuals of features not in the scale.data} \usage{ FetchResiduals_reference( object, @@ -12,6 +12,23 @@ FetchResiduals_reference( verbose = FALSE ) } -\description{ +\arguments{ +\item{object}{A seurat object} + +\item{reference.SCT.model}{SCTModel object to use for calculating residuals} + +\item{features}{Name of features to add into the scale.data} + +\item{nCount_UMI}{UMI Counts per cell provided to vst} + +\item{verbose}{Whether to print messages and progress bars} +} +\value{ +Returns a Seurat object containing Pearson residuals of added +features in its scale.data + temporal function to get residuals from reference } +\description{ +This function calls sctransform::get_residuals. +} diff --git a/man/FindMarkers.Rd b/man/FindMarkers.Rd index b53f4087e..840f9ee40 100644 --- a/man/FindMarkers.Rd +++ b/man/FindMarkers.Rd @@ -287,6 +287,7 @@ should be interpreted cautiously, as the genes used for clustering are the same genes tested for differential expression. } \examples{ +\dontrun{ data("pbmc_small") # Find markers for cluster 2 markers <- FindMarkers(object = pbmc_small, ident.1 = 2) @@ -304,6 +305,7 @@ if (requireNamespace("ape", quietly = TRUE)) { markers <- FindMarkers(object = pbmc_small, ident.1 = 'clustertree', ident.2 = 5) head(x = markers) } +} } \references{ diff --git a/man/FoldChange.Rd b/man/FoldChange.Rd index ec8cc72b4..3d9aa0020 100644 --- a/man/FoldChange.Rd +++ b/man/FoldChange.Rd @@ -113,8 +113,10 @@ is returned instead of log fold change and the column is named "avg_diff". Otherwise, log2 fold change is returned with column named "avg_log2_FC". } \examples{ +\dontrun{ data("pbmc_small") FoldChange(pbmc_small, ident.1 = 1) +} } \seealso{ diff --git a/man/GetResidual.Rd b/man/GetResidual.Rd index 12044efec..0d0525c40 100644 --- a/man/GetResidual.Rd +++ b/man/GetResidual.Rd @@ -45,9 +45,11 @@ features in its scale.data This function calls sctransform::get_residuals. } \examples{ +\dontrun{ data("pbmc_small") pbmc_small <- SCTransform(object = pbmc_small, variable.features.n = 20) pbmc_small <- GetResidual(object = pbmc_small, features = c('MS4A1', 'TCL1A')) +} } \seealso{ diff --git a/man/HVFInfo.SCTAssay.Rd b/man/HVFInfo.SCTAssay.Rd index 6e26995d8..ddf24e2d5 100644 --- a/man/HVFInfo.SCTAssay.Rd +++ b/man/HVFInfo.SCTAssay.Rd @@ -4,26 +4,11 @@ \alias{HVFInfo.SCTAssay} \title{Get Variable Feature Information} \usage{ -\method{HVFInfo}{SCTAssay}(object, selection.method, status = FALSE, ...) +\method{HVFInfo}{SCTAssay}(object, method, status = FALSE, ...) } \arguments{ \item{object}{An object} -\item{selection.method}{Which method to pull. For \code{HVFInfo} and -\code{VariableFeatures}, choose one from one of the -following: -\itemize{ - \item \dQuote{vst} - \item \dQuote{sctransform} or \dQuote{sct} - \item \dQuote{mean.var.plot}, \dQuote{dispersion}, \dQuote{mvp}, or - \dQuote{disp} -} -For \code{SVFInfo} and \code{SpatiallyVariableFeatures}, choose from: -\itemize{ - \item \dQuote{markvariogram} - \item \dQuote{moransi} -}} - \item{status}{Add variable status to the resulting data frame} \item{...}{Arguments passed to other methods} @@ -32,9 +17,11 @@ For \code{SVFInfo} and \code{SpatiallyVariableFeatures}, choose from: Get variable feature information from \code{\link{SCTAssay}} objects } \examples{ +\dontrun{ # Get the HVF info directly from an SCTAssay object pbmc_small <- SCTransform(pbmc_small) -HVFInfo(pbmc_small[["SCT"]], selection.method = 'sct')[1:5, ] +HVFInfo(pbmc_small[["SCT"]], method = 'sct')[1:5, ] +} } \seealso{ diff --git a/man/HarmonyIntegration.Rd b/man/HarmonyIntegration.Rd index 8b8f68e15..1a07d1cd8 100644 --- a/man/HarmonyIntegration.Rd +++ b/man/HarmonyIntegration.Rd @@ -41,48 +41,8 @@ should be called \code{group}} \item{layers}{Ignored} -\item{npcs}{If doing PCA on input matrix, number of PCs to compute.} - \item{key}{Key for Harmony dimensional reduction} -\item{theta}{Diversity clustering penalty parameter. Specify for each -variable in vars_use Default theta=2. theta=0 does not encourage any -diversity. Larger values of theta result in more diverse clusters.} - -\item{lambda}{Ridge regression penalty parameter. Specify for each variable - in vars_use. -Default lambda=1. Lambda must be strictly positive. Smaller values result -in more aggressive correction.} - -\item{sigma}{Width of soft kmeans clusters. Default sigma=0.1. Sigma scales -the distance from a cell to cluster centroids. Larger values of sigma -result in cells assigned to more clusters. Smaller values of sigma make -soft kmeans cluster approach hard clustering.} - -\item{nclust}{Number of clusters in model. nclust=1 equivalent to simple -linear regression.} - -\item{tau}{Protection against overclustering small datasets with large ones. -tau is the expected number of cells per cluster.} - -\item{block.size}{What proportion of cells to update during clustering. -Between 0 to 1, default 0.05. Larger values may be faster but less accurate} - -\item{max.iter.harmony}{Maximum number of rounds to run Harmony. One round -of Harmony involves one clustering and one correction step.} - -\item{max.iter.cluster}{Maximum number of rounds to run clustering at each -round of Harmony.} - -\item{epsilon.cluster}{Convergence tolerance for clustering round of -Harmony. Set to -Inf to never stop early.} - -\item{epsilon.harmony}{Convergence tolerance for Harmony. Set to -Inf to -never stop early.} - -\item{verbose}{Whether to print progress messages. TRUE to print, -FALSE to suppress.} - \item{...}{Ignored} } \value{ @@ -111,17 +71,16 @@ obj <- IntegrateLayers(object = obj, method = HarmonyIntegration, orig.reduction new.reduction = 'harmony', verbose = FALSE) # Modifying Parameters -# We can also add arguments specific to Harmony such as theta, to give more diverse clusters +# We can also add arguments specific to Harmony such as theta, to give more diverse clusters obj <- IntegrateLayers(object = obj, method = HarmonyIntegration, orig.reduction = "pca", new.reduction = 'harmony', verbose = FALSE, theta = 3) -} # Integrating SCTransformed data obj <- SCTransform(object = obj) obj <- IntegrateLayers(object = obj, method = HarmonyIntegration, orig.reduction = "pca", new.reduction = 'harmony', assay = "SCT", verbose = FALSE) - +} } \seealso{ diff --git a/man/JointPCAIntegration.Rd b/man/JointPCAIntegration.Rd index 1880df070..ff8207e38 100644 --- a/man/JointPCAIntegration.Rd +++ b/man/JointPCAIntegration.Rd @@ -22,27 +22,34 @@ JointPCAIntegration( ) } \arguments{ -\item{assay}{A vector of assay names specifying which assay to use when -constructing anchors. If NULL, the current default assay for each object is -used.} - -\item{reference}{A vector specifying the object/s to be used as a reference -during integration. If NULL (default), all pairwise anchors are found (no -reference/s). If not NULL, the corresponding objects in \code{object.list} -will be used as references. When using a set of specified references, anchors -are first found between each query and each reference. The references are -then integrated through pairwise integration. Each query is then mapped to -the integrated reference.} +\item{object}{A \code{Seurat} object} + +\item{assay}{Name of \code{Assay} in the \code{Seurat} object} + +\item{layers}{Names of layers in \code{assay}} + +\item{orig}{A \link[SeuratObject:DimReduc]{dimensional reduction} to correct} + +\item{new.reduction}{Name of new integrated dimensional reduction} + +\item{reference}{A reference \code{Seurat} object} + +\item{features}{A vector of features to use for integration} \item{normalization.method}{Name of normalization method used: LogNormalize or SCT} -\item{dims}{Which dimensions to use from the CCA to specify the neighbor -search space} +\item{dims}{Dimensions of dimensional reduction to use for integration} \item{k.anchor}{How many neighbors (k) to use when picking anchors} -\item{verbose}{Print progress bars and output} +\item{scale.layer}{Name of scaled layer in \code{Assay}} + +\item{verbose}{Print progress} + +\item{groups}{A one-column data frame with grouping information} + +\item{...}{Additional arguments passed to \code{FindIntegrationAnchors}} } \description{ Seurat-Joint PCA Integration diff --git a/man/LeverageScore.Rd b/man/LeverageScore.Rd index 98aefd5c0..c00ad991f 100644 --- a/man/LeverageScore.Rd +++ b/man/LeverageScore.Rd @@ -3,7 +3,6 @@ \name{LeverageScore} \alias{LeverageScore} \alias{LeverageScore.default} -\alias{LeverageScore.DelayedMatrix} \alias{LeverageScore.StdAssay} \alias{LeverageScore.Assay} \alias{LeverageScore.Seurat} @@ -22,18 +21,6 @@ LeverageScore(object, ...) ... ) -\method{LeverageScore}{DelayedMatrix}( - object, - nsketch = 5000L, - ndims = NULL, - method = CountSketch, - eps = 0.5, - seed = 123L, - block.size = 1e+08, - verbose = TRUE, - ... -) - \method{LeverageScore}{StdAssay}( object, nsketch = 5000L, diff --git a/man/LogNormalize.Rd b/man/LogNormalize.Rd index 6d45d6321..0c72744b8 100644 --- a/man/LogNormalize.Rd +++ b/man/LogNormalize.Rd @@ -8,11 +8,11 @@ \alias{LogNormalize.default} \title{Normalize Raw Data} \usage{ -LogNormalize(data, scale.factor = 10000, verbose = TRUE, ...) +LogNormalize(data, scale.factor = 10000, margin = 2L, verbose = TRUE, ...) -\method{LogNormalize}{data.frame}(data, scale.factor = 10000, verbose = TRUE, ...) +\method{LogNormalize}{data.frame}(data, scale.factor = 10000, margin = 2L, verbose = TRUE, ...) -\method{LogNormalize}{V3Matrix}(data, scale.factor = 10000, verbose = TRUE, ...) +\method{LogNormalize}{V3Matrix}(data, scale.factor = 10000, margin = 2L, verbose = TRUE, ...) \method{LogNormalize}{default}(data, scale.factor = 10000, margin = 2L, verbose = TRUE, ...) } @@ -21,11 +21,11 @@ LogNormalize(data, scale.factor = 10000, verbose = TRUE, ...) \item{scale.factor}{Scale the data; default is \code{1e4}} +\item{margin}{Margin to normalize over} + \item{verbose}{Print progress} \item{...}{Arguments passed to other methods} - -\item{margin}{Margin to normalize over} } \value{ A matrix with the normalized and log-transformed data diff --git a/man/PlotClusterTree.Rd b/man/PlotClusterTree.Rd index 567c6dd1f..248cd70bd 100644 --- a/man/PlotClusterTree.Rd +++ b/man/PlotClusterTree.Rd @@ -22,10 +22,12 @@ Plots dendogram (must be precomputed using BuildClusterTree), returns no value Plots previously computed tree (from BuildClusterTree) } \examples{ +\dontrun{ if (requireNamespace("ape", quietly = TRUE)) { data("pbmc_small") pbmc_small <- BuildClusterTree(object = pbmc_small) PlotClusterTree(object = pbmc_small) } } +} \concept{visualization} diff --git a/man/ProjectCellEmbeddings.Rd b/man/ProjectCellEmbeddings.Rd index 3e1f37206..fca3c2d60 100644 --- a/man/ProjectCellEmbeddings.Rd +++ b/man/ProjectCellEmbeddings.Rd @@ -8,7 +8,6 @@ \alias{ProjectCellEmbeddings.StdAssay} \alias{ProjectCellEmbeddings.default} \alias{ProjectCellEmbeddings.IterableMatrix} -\alias{ProjectCellEmbeddings.DelayedMatrix} \title{Project query data to the reference dimensional reduction} \usage{ ProjectCellEmbeddings(query, ...) @@ -25,7 +24,8 @@ ProjectCellEmbeddings(query, ...) verbose = TRUE, nCount_UMI = NULL, feature.mean = NULL, - feature.sd = NULL + feature.sd = NULL, + ... ) \method{ProjectCellEmbeddings}{Assay}( @@ -39,7 +39,8 @@ ProjectCellEmbeddings(query, ...) verbose = TRUE, nCount_UMI = NULL, feature.mean = NULL, - feature.sd = NULL + feature.sd = NULL, + ... ) \method{ProjectCellEmbeddings}{SCTAssay}( @@ -53,7 +54,8 @@ ProjectCellEmbeddings(query, ...) verbose = TRUE, nCount_UMI = NULL, feature.mean = NULL, - feature.sd = NULL + feature.sd = NULL, + ... ) \method{ProjectCellEmbeddings}{StdAssay}( @@ -67,7 +69,8 @@ ProjectCellEmbeddings(query, ...) verbose = TRUE, nCount_UMI = NULL, feature.mean = NULL, - feature.sd = NULL + feature.sd = NULL, + ... ) \method{ProjectCellEmbeddings}{default}( @@ -82,7 +85,8 @@ ProjectCellEmbeddings(query, ...) features = NULL, nCount_UMI = NULL, feature.mean = NULL, - feature.sd = NULL + feature.sd = NULL, + ... ) \method{ProjectCellEmbeddings}{IterableMatrix}( @@ -98,19 +102,8 @@ ProjectCellEmbeddings(query, ...) nCount_UMI = NULL, feature.mean = NULL, feature.sd = NULL, - block.size = 10000 -) - -\method{ProjectCellEmbeddings}{DelayedMatrix}( - query.data, - block.size = 1e+09, - reference, - assay = NULL, - reduction, - normalization.method = NULL, - dims = NULL, - feature.mean = NULL, - feature.sd = NULL + block.size = 10000, + ... ) } \arguments{ diff --git a/man/RPCAIntegration.Rd b/man/RPCAIntegration.Rd index 087ecbadc..00f5b5aa2 100644 --- a/man/RPCAIntegration.Rd +++ b/man/RPCAIntegration.Rd @@ -22,27 +22,34 @@ RPCAIntegration( ) } \arguments{ -\item{assay}{A vector of assay names specifying which assay to use when -constructing anchors. If NULL, the current default assay for each object is -used.} - -\item{reference}{A vector specifying the object/s to be used as a reference -during integration. If NULL (default), all pairwise anchors are found (no -reference/s). If not NULL, the corresponding objects in \code{object.list} -will be used as references. When using a set of specified references, anchors -are first found between each query and each reference. The references are -then integrated through pairwise integration. Each query is then mapped to -the integrated reference.} +\item{object}{A \code{Seurat} object} + +\item{assay}{Name of \code{Assay} in the \code{Seurat} object} + +\item{layers}{Names of layers in \code{assay}} + +\item{orig}{A \link[SeuratObject:DimReduc]{dimensional reduction} to correct} + +\item{new.reduction}{Name of new integrated dimensional reduction} + +\item{reference}{A reference \code{Seurat} object} + +\item{features}{A vector of features to use for integration} \item{normalization.method}{Name of normalization method used: LogNormalize or SCT} -\item{dims}{Which dimensions to use from the CCA to specify the neighbor -search space} +\item{dims}{Dimensions of dimensional reduction to use for integration} + +\item{k.filter}{Number of anchors to filter} + +\item{scale.layer}{Name of scaled layer in \code{Assay}} + +\item{groups}{A one-column data frame with grouping information} -\item{k.filter}{How many neighbors (k) to use when filtering anchors} +\item{verbose}{Print progress} -\item{verbose}{Print progress bars and output} +\item{...}{Additional arguments passed to \code{FindIntegrationAnchors}} } \description{ Seurat-RPCA Integration diff --git a/man/RunCCA.Rd b/man/RunCCA.Rd index 245366b04..aa2c6b14b 100644 --- a/man/RunCCA.Rd +++ b/man/RunCCA.Rd @@ -76,6 +76,7 @@ For details about stored CCA calculation parameters, see \code{PrintCCAParams}. } \examples{ +\dontrun{ data("pbmc_small") pbmc_small # As CCA requires two datasets, we will split our test object into two just for this example @@ -86,6 +87,7 @@ pbmc2[["group"]] <- "group2" pbmc_cca <- RunCCA(object1 = pbmc1, object2 = pbmc2) # Print results print(x = pbmc_cca[["cca"]]) +} } \seealso{ diff --git a/man/SCTAssay-class.Rd b/man/SCTAssay-class.Rd index b06638089..d116a62e9 100644 --- a/man/SCTAssay-class.Rd +++ b/man/SCTAssay-class.Rd @@ -78,9 +78,11 @@ the conversion will automagically fill the new slots with the data pbmc_small <- SCTransform(pbmc_small) } +\dontrun{ # SCTAssay objects are generated from SCTransform pbmc_small <- SCTransform(pbmc_small) pbmc_small[["SCT"]] +} \dontrun{ # Query and change SCT model names diff --git a/man/SelectIntegrationFeatures5.Rd b/man/SelectIntegrationFeatures5.Rd new file mode 100644 index 000000000..334d7c9c7 --- /dev/null +++ b/man/SelectIntegrationFeatures5.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/integration.R +\name{SelectIntegrationFeatures5} +\alias{SelectIntegrationFeatures5} +\title{Select integration features} +\usage{ +SelectIntegrationFeatures5( + object, + nfeatures = 2000, + assay = NULL, + method = NULL, + layers = NULL, + verbose = TRUE, + ... +) +} +\arguments{ +\item{object}{Seurat object} + +\item{nfeatures}{Number of features to return for integration} + +\item{assay}{Name of assay to use for integration feature selection} + +\item{method}{Which method to pull. For \code{HVFInfo} and +\code{VariableFeatures}, choose one from one of the +following: +\itemize{ + \item \dQuote{vst} + \item \dQuote{sctransform} or \dQuote{sct} + \item \dQuote{mean.var.plot}, \dQuote{dispersion}, \dQuote{mvp}, or + \dQuote{disp} +}} + +\item{layers}{Name of layers to use for integration feature selection} + +\item{verbose}{Print messages} +} +\description{ +Select integration features +} diff --git a/man/SelectSCTIntegrationFeatures.Rd b/man/SelectSCTIntegrationFeatures.Rd new file mode 100644 index 000000000..fc6da196b --- /dev/null +++ b/man/SelectSCTIntegrationFeatures.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/integration.R +\name{SelectSCTIntegrationFeatures} +\alias{SelectSCTIntegrationFeatures} +\title{Select SCT integration features} +\usage{ +SelectSCTIntegrationFeatures( + object, + nfeatures = 3000, + assay = NULL, + verbose = TRUE, + ... +) +} +\arguments{ +\item{object}{Seurat object} + +\item{nfeatures}{Number of features to return for integration} + +\item{assay}{Name of assay to use for integration feature selection} + +\item{verbose}{Print messages} +} +\description{ +Select SCT integration features +} diff --git a/man/VST.Rd b/man/VST.Rd index fde1a91eb..f59ce6b10 100644 --- a/man/VST.Rd +++ b/man/VST.Rd @@ -12,7 +12,15 @@ VST(data, margin = 1L, nselect = 2000L, span = 0.3, clip = NULL, ...) \method{VST}{default}(data, margin = 1L, nselect = 2000L, span = 0.3, clip = NULL, ...) -\method{VST}{IterableMatrix}(data, nselect = 2000L, span = 0.3, clip = NULL, verbose = TRUE, ...) +\method{VST}{IterableMatrix}( + data, + margin = 1L, + nselect = 2000L, + span = 0.3, + clip = NULL, + verbose = TRUE, + ... +) \method{VST}{dgCMatrix}( data, From 35f63543bd944fce68dd2a407a2a7fadc2c7f276 Mon Sep 17 00:00:00 2001 From: rsatija Date: Fri, 7 Jul 2023 12:49:52 -0400 Subject: [PATCH 666/979] Updated description --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index ce93274ff..6d6740309 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 4.9.9.9049 +Version: 4.9.9.9050 Date: 2023-06-08 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. From 10be0e2c7c6d3043447402ba3dba5d5d53c1a6e1 Mon Sep 17 00:00:00 2001 From: rsatija Date: Fri, 7 Jul 2023 12:57:01 -0400 Subject: [PATCH 667/979] correct imports --- R/visualization.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/visualization.R b/R/visualization.R index b93a77114..bd04b4a65 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -1948,7 +1948,7 @@ CellScatter <- function( #' #' @return A ggplot object #' -#' @importFrom ggplot2 geom_smooth aes_string +#' @importFrom ggplot2 geom_smooth aes_string facet_wrap vars sym labs #' @importFrom patchwork wrap_plots #' #' @export From ed2e797558c1d5d4e33da36bb0f1970ad44b7492 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Fri, 7 Jul 2023 12:59:09 -0400 Subject: [PATCH 668/979] fix typo --- R/integration5.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/integration5.R b/R/integration5.R index fc24339d6..da5c303f7 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -203,7 +203,7 @@ CCAIntegration <- function( object.list <- list() for (i in seq_along(along.with = layers)) { if (inherits(x = object[[layers[i]]], what = "IterableMatrix")) { - warning("Converting BPCells matrix to dgCMatrix for integration", + warning("Converting BPCells matrix to dgCMatrix for integration ", "as on-disk CCA Integration is not currently supported", call. = FALSE, immediate. = TRUE) counts <- as(object = object[[layers[i]]][features, ], Class = "dgCMatrix") From 6b8bd3da89c39c6341d5b24ed666f17b8b337fff Mon Sep 17 00:00:00 2001 From: Gesmira Date: Fri, 7 Jul 2023 13:08:45 -0400 Subject: [PATCH 669/979] supressWarnings --- R/integration5.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/integration5.R b/R/integration5.R index da5c303f7..906f3f4c3 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -326,7 +326,7 @@ RPCAIntegration <- function( } else { object.list <- list() for (i in seq_along(along.with = layers)) { - object.list[[i]] <- CreateSeuratObject(counts = object[[layers[i]]][features,]) + object.list[[i]] <- supressWarnings(CreateSeuratObject(counts = object[[layers[i]]][features,])) VariableFeatures(object = object.list[[i]]) <- features object.list[[i]] <- ScaleData(object = object.list[[i]], verbose = FALSE) object.list[[i]] <- RunPCA(object = object.list[[i]], verbose = FALSE) From 1e4f6ce3d1bdb80a0269106628e64be70c87f95e Mon Sep 17 00:00:00 2001 From: rsatija Date: Fri, 7 Jul 2023 13:09:23 -0400 Subject: [PATCH 670/979] Updated description --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index ce93274ff..6b14df466 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9049 -Date: 2023-06-08 +Version: 4.9.9.9050 +Date: 2023-07-07 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From c3ca445dfd4db5776811ab27f5a655949a865d74 Mon Sep 17 00:00:00 2001 From: mhkowalski Date: Fri, 7 Jul 2023 13:25:50 -0400 Subject: [PATCH 671/979] update documentation, fix arguments --- R/integration5.R | 55 ++++++++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 23 deletions(-) diff --git a/R/integration5.R b/R/integration5.R index 5eff4f57b..91083f8f7 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -143,6 +143,8 @@ attr(x = HarmonyIntegration, which = 'Seurat.method') <- 'integration' #' Seurat-CCA Integration #' #' @inheritParams FindIntegrationAnchors +#' @inheritParams IntegrateEmbeddings +#' @param ... Arguments passed on to \code{FindTransferAnchors} #' @export #' #' @examples @@ -185,14 +187,15 @@ CCAIntegration <- function( groups = NULL, k.filter = NA, scale.layer = 'scale.data', - verbose = TRUE, dims.to.integrate = NULL, k.weight = 100, weight.reduction = NULL, sd.weight = 1, sample.tree = NULL, preserve.order = FALSE, - ...) { + verbose = TRUE, + ... +) { op <- options(Seurat.object.assay.version = "v3", Seurat.object.assay.calcn = FALSE) on.exit(expr = options(op), add = TRUE) normalization.method <- match.arg(arg = normalization.method) @@ -232,12 +235,12 @@ CCAIntegration <- function( object_merged <- IntegrateEmbeddings(anchorset = anchor, reductions = orig, new.reduction.name = new.reduction, - dims.to.integrate = NULL, - k.weight = 100, - weight.reduction = NULL, - sd.weight = 1, - sample.tree = NULL, - preserve.order = FALSE, + dims.to.integrate = dims.to.integrate, + k.weight = k.weight, + weight.reduction = weight.reduction, + sd.weight = sd.weight, + sample.tree = sample.tree, + preserve.order = preserve.order, verbose = verbose, ... ) @@ -286,6 +289,8 @@ attr(x = CCAIntegration, which = 'Seurat.method') <- 'integration' #' } #' #' @inheritParams FindIntegrationAnchors +#' @inheritParams IntegrateEmbeddings +#' @param ... Arguments passed on to \code{FindTransferAnchors} #' @export #' RPCAIntegration <- function( @@ -308,7 +313,8 @@ RPCAIntegration <- function( sample.tree = NULL, preserve.order = FALSE, verbose = TRUE, - ...) { + ... +) { op <- options(Seurat.object.assay.version = "v3", Seurat.object.assay.calcn = FALSE) on.exit(expr = options(op), add = TRUE) normalization.method <- match.arg(arg = normalization.method) @@ -356,13 +362,14 @@ RPCAIntegration <- function( object_merged <- IntegrateEmbeddings(anchorset = anchor, reductions = orig, new.reduction.name = new.reduction, - dims.to.integrate = NULL, - k.weight = 100, - weight.reduction = NULL, - sd.weight = 1, - sample.tree = NULL, - preserve.order = FALSE, - verbose = verbose + dims.to.integrate = dims.to.integrate, + k.weight = k.weight, + weight.reduction = weight.reduction, + sd.weight = sd.weight, + sample.tree = sample.tree, + preserve.order = preserve.order, + verbose = verbose, + ... ) output.list <- list(object_merged[[new.reduction]]) @@ -375,6 +382,8 @@ attr(x = RPCAIntegration, which = 'Seurat.method') <- 'integration' #' Seurat-Joint PCA Integration #' #' @inheritParams FindIntegrationAnchors +#' @inheritParams IntegrateEmbeddings +#' @param ... Arguments passed on to \code{FindTransferAnchors} #' @export #' JointPCAIntegration <- function( @@ -395,8 +404,8 @@ JointPCAIntegration <- function( sd.weight = 1, sample.tree = NULL, preserve.order = FALSE, - verbose = TRUE, groups = NULL, + verbose = TRUE, ... ) { op <- options(Seurat.object.assay.version = "v3", Seurat.object.assay.calcn = FALSE) @@ -451,12 +460,12 @@ JointPCAIntegration <- function( object_merged <- IntegrateEmbeddings(anchorset = anchor, reductions = orig, new.reduction.name = new.reduction, - dims.to.integrate = NULL, - k.weight = 100, - weight.reduction = NULL, - sd.weight = 1, - sample.tree = NULL, - preserve.order = FALSE, + dims.to.integrate = dims.to.integrate, + k.weight = k.weight, + weight.reduction = weight.reduction, + sd.weight = sd.weight, + sample.tree = sample.tree, + preserve.order = preserve.order, verbose = verbose) output.list <- list(object_merged[[new.reduction]]) names(output.list) <- c(new.reduction) From dc24b9bab38812fe0b64d6b406d6723926160acf Mon Sep 17 00:00:00 2001 From: mhkowalski Date: Fri, 7 Jul 2023 14:52:03 -0400 Subject: [PATCH 672/979] fix yuhan's comments --- R/integration5.R | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/R/integration5.R b/R/integration5.R index 91083f8f7..5da70f879 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -144,7 +144,7 @@ attr(x = HarmonyIntegration, which = 'Seurat.method') <- 'integration' #' #' @inheritParams FindIntegrationAnchors #' @inheritParams IntegrateEmbeddings -#' @param ... Arguments passed on to \code{FindTransferAnchors} +#' @param ... Arguments passed on to \code{FindIntegrationAnchors} #' @export #' #' @examples @@ -241,8 +241,7 @@ CCAIntegration <- function( sd.weight = sd.weight, sample.tree = sample.tree, preserve.order = preserve.order, - verbose = verbose, - ... + verbose = verbose ) output.list <- list(object_merged[[new.reduction]]) names(output.list) <- c(new.reduction) @@ -290,7 +289,7 @@ attr(x = CCAIntegration, which = 'Seurat.method') <- 'integration' #' #' @inheritParams FindIntegrationAnchors #' @inheritParams IntegrateEmbeddings -#' @param ... Arguments passed on to \code{FindTransferAnchors} +#' @param ... Arguments passed on to \code{FindIntegrationAnchors} #' @export #' RPCAIntegration <- function( @@ -350,6 +349,7 @@ RPCAIntegration <- function( k.filter = k.filter, reference = reference, verbose = verbose, + ... ) slot(object = anchor, name = "object.list") <- lapply( X = slot( @@ -368,9 +368,8 @@ RPCAIntegration <- function( sd.weight = sd.weight, sample.tree = sample.tree, preserve.order = preserve.order, - verbose = verbose, - ... - ) + verbose = verbose + ) output.list <- list(object_merged[[new.reduction]]) names(output.list) <- c(new.reduction) @@ -383,7 +382,7 @@ attr(x = RPCAIntegration, which = 'Seurat.method') <- 'integration' #' #' @inheritParams FindIntegrationAnchors #' @inheritParams IntegrateEmbeddings -#' @param ... Arguments passed on to \code{FindTransferAnchors} +#' @param ... Arguments passed on to \code{FindIntegrationAnchors} #' @export #' JointPCAIntegration <- function( @@ -466,7 +465,8 @@ JointPCAIntegration <- function( sd.weight = sd.weight, sample.tree = sample.tree, preserve.order = preserve.order, - verbose = verbose) + verbose = verbose + ) output.list <- list(object_merged[[new.reduction]]) names(output.list) <- c(new.reduction) return(output.list) From 17416f4f2948a177d89b2c6d4b014595e17cc12f Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 7 Jul 2023 15:02:39 -0400 Subject: [PATCH 673/979] bump version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6b14df466..406d4eaaa 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 4.9.9.9050 +Version: 4.9.9.9051 Date: 2023-07-07 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. From 5a0ce7f236706e1a991ab2926125de78ff51a084 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Fri, 7 Jul 2023 15:15:11 -0400 Subject: [PATCH 674/979] suppress more messages --- R/integration5.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/integration5.R b/R/integration5.R index 906f3f4c3..7ff288df8 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -326,11 +326,11 @@ RPCAIntegration <- function( } else { object.list <- list() for (i in seq_along(along.with = layers)) { - object.list[[i]] <- supressWarnings(CreateSeuratObject(counts = object[[layers[i]]][features,])) + object.list[[i]] <- suppressMessages(suppressWarnings(CreateSeuratObject(counts = object[[layers[i]]][features,]))) VariableFeatures(object = object.list[[i]]) <- features - object.list[[i]] <- ScaleData(object = object.list[[i]], verbose = FALSE) + object.list[[i]] <- suppressWarnings(ScaleData(object = object.list[[i]], verbose = FALSE)) object.list[[i]] <- RunPCA(object = object.list[[i]], verbose = FALSE) - object.list[[i]][['RNA']]$counts <- NULL + suppressWarnings(object.list[[i]][['RNA']]$counts <- NULL) } } anchor <- FindIntegrationAnchors(object.list = object.list, From dfde1120ff8b7e86a8742865e7f2ea63cf49fbca Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 7 Jul 2023 15:18:43 -0400 Subject: [PATCH 675/979] fix docu --- man/Cells.Rd | 5 +++++ man/CreateSCTAssayObject.Rd | 11 +++++++++++ man/FeaturePlot.Rd | 8 ++++++-- man/GetImage.Rd | 8 ++++++++ man/GetTissueCoordinates.Rd | 4 ++++ man/HVFInfo.SCTAssay.Rd | 22 ++++++++++++++++++++++ man/ImageFeaturePlot.Rd | 13 +++++++------ man/IntegrateData.Rd | 6 ++++-- man/IntegrateEmbeddings.Rd | 6 ++++-- man/Load10X_Spatial.Rd | 2 ++ man/PolyFeaturePlot.Rd | 5 +---- man/Radius.Rd | 3 +++ man/RenameCells.Rd | 7 +++++++ man/STARmap-class.Rd | 15 +++++++++++++++ man/Seurat-package.Rd | 2 +- man/SlideSeq-class.Rd | 15 +++++++++++++++ man/VariableFeaturePlot.Rd | 15 +++++++++++++++ man/as.Seurat.Rd | 4 ++++ man/as.sparse.Rd | 8 ++++---- man/merge.SCTAssay.Rd | 11 +++++++++++ man/reexports.Rd | 2 +- 21 files changed, 150 insertions(+), 22 deletions(-) diff --git a/man/Cells.Rd b/man/Cells.Rd index 8455fe837..4191764b5 100644 --- a/man/Cells.Rd +++ b/man/Cells.Rd @@ -15,6 +15,11 @@ \method{Cells}{VisiumV1}(x, ...) } +\arguments{ +\item{x}{An object} + +\item{...}{Arguments passed to other methods} +} \description{ Get Cell Names } diff --git a/man/CreateSCTAssayObject.Rd b/man/CreateSCTAssayObject.Rd index 9f62a9c0e..70f30f633 100644 --- a/man/CreateSCTAssayObject.Rd +++ b/man/CreateSCTAssayObject.Rd @@ -15,10 +15,21 @@ CreateSCTAssayObject( ) } \arguments{ +\item{counts}{Unnormalized data such as raw counts or TPMs} + +\item{data}{Prenormalized data; if provided, do not pass \code{counts}} + \item{scale.data}{a residual matrix} \item{umi.assay}{The UMI assay name. Default is RNA} +\item{min.cells}{Include features detected in at least this many cells. Will +subset the counts matrix as well. To reintroduce excluded features, create a +new object with a lower cutoff} + +\item{min.features}{Include cells where at least this many features are +detected} + \item{SCTModel.list}{list of SCTModels} } \description{ diff --git a/man/FeaturePlot.Rd b/man/FeaturePlot.Rd index 8df3185d9..6778ee488 100644 --- a/man/FeaturePlot.Rd +++ b/man/FeaturePlot.Rd @@ -10,8 +10,12 @@ FeaturePlot( features, dims = c(1, 2), cells = NULL, - cols = if (blend) { c("lightgrey", "#ff0000", "#00ff00") } else { - c("lightgrey", "blue") }, + cols = if (blend) { + c("lightgrey", "#ff0000", "#00ff00") + } else { + + c("lightgrey", "blue") + }, pt.size = NULL, alpha = 1, order = FALSE, diff --git a/man/GetImage.Rd b/man/GetImage.Rd index 43f64ed09..a0d134863 100644 --- a/man/GetImage.Rd +++ b/man/GetImage.Rd @@ -12,6 +12,14 @@ \method{GetImage}{VisiumV1}(object, mode = c("grob", "raster", "plotly", "raw"), ...) } +\arguments{ +\item{object}{An object} + +\item{mode}{How to return the image; should accept one of \dQuote{grob}, +\dQuote{raster}, \dQuote{plotly}, or \dQuote{raw}} + +\item{...}{Arguments passed to other methods} +} \description{ Get Image Data } diff --git a/man/GetTissueCoordinates.Rd b/man/GetTissueCoordinates.Rd index 759743d43..5ccb69d68 100644 --- a/man/GetTissueCoordinates.Rd +++ b/man/GetTissueCoordinates.Rd @@ -18,6 +18,10 @@ ) } \arguments{ +\item{object}{An object} + +\item{...}{Arguments passed to other methods} + \item{qhulls}{return qhulls instead of centroids} \item{scale}{A factor to scale the coordinates by; choose from: 'tissue', diff --git a/man/HVFInfo.SCTAssay.Rd b/man/HVFInfo.SCTAssay.Rd index 1f4be45cb..6e26995d8 100644 --- a/man/HVFInfo.SCTAssay.Rd +++ b/man/HVFInfo.SCTAssay.Rd @@ -6,6 +6,28 @@ \usage{ \method{HVFInfo}{SCTAssay}(object, selection.method, status = FALSE, ...) } +\arguments{ +\item{object}{An object} + +\item{selection.method}{Which method to pull. For \code{HVFInfo} and +\code{VariableFeatures}, choose one from one of the +following: +\itemize{ + \item \dQuote{vst} + \item \dQuote{sctransform} or \dQuote{sct} + \item \dQuote{mean.var.plot}, \dQuote{dispersion}, \dQuote{mvp}, or + \dQuote{disp} +} +For \code{SVFInfo} and \code{SpatiallyVariableFeatures}, choose from: +\itemize{ + \item \dQuote{markvariogram} + \item \dQuote{moransi} +}} + +\item{status}{Add variable status to the resulting data frame} + +\item{...}{Arguments passed to other methods} +} \description{ Get variable feature information from \code{\link{SCTAssay}} objects } diff --git a/man/ImageFeaturePlot.Rd b/man/ImageFeaturePlot.Rd index 1caf4bedc..0e375c441 100644 --- a/man/ImageFeaturePlot.Rd +++ b/man/ImageFeaturePlot.Rd @@ -9,8 +9,12 @@ ImageFeaturePlot( features, fov = NULL, boundaries = NULL, - cols = if (isTRUE(x = blend)) { c("lightgrey", "#ff0000", "#00ff00") } else { - c("lightgrey", "firebrick1") }, + cols = if (isTRUE(x = blend)) { + c("lightgrey", "#ff0000", "#00ff00") + } else { + + c("lightgrey", "firebrick1") + }, size = 0.5, min.cutoff = NA, max.cutoff = NA, @@ -64,10 +68,7 @@ When blend is \code{TRUE}, takes anywhere from 1-3 colors: \item{size}{Point size for cells when plotting centroids} -\item{min.cutoff}{Vector of minimum and maximum cutoff values for each feature, -may specify quantile in the form of 'q##' where '##' is the quantile (eg, 'q1', 'q10')} - -\item{max.cutoff}{Vector of minimum and maximum cutoff values for each feature, +\item{min.cutoff, max.cutoff}{Vector of minimum and maximum cutoff values for each feature, may specify quantile in the form of 'q##' where '##' is the quantile (eg, 'q1', 'q10')} \item{split.by}{A factor in object metadata to split the feature plot by, pass 'ident' diff --git a/man/IntegrateData.Rd b/man/IntegrateData.Rd index c02543005..e08bd682e 100644 --- a/man/IntegrateData.Rd +++ b/man/IntegrateData.Rd @@ -64,10 +64,12 @@ should be encoded in a matrix, where each row represents one of the pairwise integration steps. Negative numbers specify a dataset, positive numbers specify the integration results from a given row (the format of the merge matrix included in the \code{\link{hclust}} function output). For example: -\code{matrix(c(-2, 1, -3, -1), ncol = 2)} gives:\preformatted{ [,1] [,2] +\code{matrix(c(-2, 1, -3, -1), ncol = 2)} gives: + +\if{html}{\out{
    }}\preformatted{ [,1] [,2] [1,] -2 -3 [2,] 1 -1 -} +}\if{html}{\out{
    }} Which would cause dataset 2 and 3 to be integrated first, then the resulting object integrated with dataset 1. diff --git a/man/IntegrateEmbeddings.Rd b/man/IntegrateEmbeddings.Rd index c3f96ffa5..dc0469132 100644 --- a/man/IntegrateEmbeddings.Rd +++ b/man/IntegrateEmbeddings.Rd @@ -75,10 +75,12 @@ should be encoded in a matrix, where each row represents one of the pairwise integration steps. Negative numbers specify a dataset, positive numbers specify the integration results from a given row (the format of the merge matrix included in the \code{\link{hclust}} function output). For example: -\code{matrix(c(-2, 1, -3, -1), ncol = 2)} gives:\preformatted{ [,1] [,2] +\code{matrix(c(-2, 1, -3, -1), ncol = 2)} gives: + +\if{html}{\out{
    }}\preformatted{ [,1] [,2] [1,] -2 -3 [2,] 1 -1 -} +}\if{html}{\out{
    }} Which would cause dataset 2 and 3 to be integrated first, then the resulting object integrated with dataset 1. diff --git a/man/Load10X_Spatial.Rd b/man/Load10X_Spatial.Rd index 00fe68a3b..84c8c0ec8 100644 --- a/man/Load10X_Spatial.Rd +++ b/man/Load10X_Spatial.Rd @@ -20,6 +20,8 @@ and the image data in a subdirectory called \code{spatial}} \item{filename}{Name of H5 file containing the feature barcode matrix} +\item{assay}{Name of the initial assay} + \item{slice}{Name for the stored image of the tissue slice} \item{filter.matrix}{Only keep spots that have been determined to be over diff --git a/man/PolyFeaturePlot.Rd b/man/PolyFeaturePlot.Rd index 30d0bdcad..59a75466d 100644 --- a/man/PolyFeaturePlot.Rd +++ b/man/PolyFeaturePlot.Rd @@ -33,10 +33,7 @@ PolyFeaturePlot( \item{ncol}{Number of columns to split the plot into} -\item{min.cutoff}{Vector of minimum and maximum cutoff values for each feature, -may specify quantile in the form of 'q##' where '##' is the quantile (eg, 'q1', 'q10')} - -\item{max.cutoff}{Vector of minimum and maximum cutoff values for each feature, +\item{min.cutoff, max.cutoff}{Vector of minimum and maximum cutoff values for each feature, may specify quantile in the form of 'q##' where '##' is the quantile (eg, 'q1', 'q10')} \item{common.scale}{...} diff --git a/man/Radius.Rd b/man/Radius.Rd index a1c5d6a8a..a0b1a74db 100644 --- a/man/Radius.Rd +++ b/man/Radius.Rd @@ -12,6 +12,9 @@ \method{Radius}{VisiumV1}(object) } +\arguments{ +\item{object}{An image object} +} \description{ Get Spot Radius } diff --git a/man/RenameCells.Rd b/man/RenameCells.Rd index bf4dd21f9..105b7d1e9 100644 --- a/man/RenameCells.Rd +++ b/man/RenameCells.Rd @@ -15,6 +15,13 @@ \method{RenameCells}{VisiumV1}(object, new.names = NULL, ...) } +\arguments{ +\item{object}{An object} + +\item{new.names}{vector of new cell names} + +\item{...}{Arguments passed to other methods} +} \description{ Rename Cells in an Object } diff --git a/man/STARmap-class.Rd b/man/STARmap-class.Rd index 25d6a3fc5..30ab87d92 100644 --- a/man/STARmap-class.Rd +++ b/man/STARmap-class.Rd @@ -8,5 +8,20 @@ \description{ The STARmap class } +\section{Slots}{ + + +\describe{ +\item{\code{assay}}{Name of assay to associate image data with; will give this image +priority for visualization when the assay is set as the active/default assay +in a \code{Seurat} object} + +\item{\code{key}}{A one-length character vector with the object's key; keys must +be one or more alphanumeric characters followed by an underscore +\dQuote{\code{_}} (regex pattern +\dQuote{\code{^[a-zA-Z][a-zA-Z0-9]*_$}})} +} +} + \concept{objects} \concept{spatial} diff --git a/man/Seurat-package.Rd b/man/Seurat-package.Rd index 91fe25746..79a0fbc5b 100644 --- a/man/Seurat-package.Rd +++ b/man/Seurat-package.Rd @@ -6,7 +6,7 @@ \alias{Seurat-package} \title{Seurat: Tools for Single Cell Genomics} \description{ -A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. +A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) \doi{10.1038/nbt.3192}, Macosko E, Basu A, Satija R, et al (2015) \doi{10.1016/j.cell.2015.05.002}, Stuart T, Butler A, et al (2019) \doi{10.1016/j.cell.2019.05.031}, and Hao, Hao, et al (2020) \doi{10.1101/2020.10.12.335331} for more details. } \section{Package options}{ diff --git a/man/SlideSeq-class.Rd b/man/SlideSeq-class.Rd index d9859af4f..60cdb125f 100644 --- a/man/SlideSeq-class.Rd +++ b/man/SlideSeq-class.Rd @@ -14,4 +14,19 @@ The SlideSeq class represents spatial information from the Slide-seq platform \item{\code{coordinates}}{...} }} +\section{Slots}{ + + +\describe{ +\item{\code{assay}}{Name of assay to associate image data with; will give this image +priority for visualization when the assay is set as the active/default assay +in a \code{Seurat} object} + +\item{\code{key}}{A one-length character vector with the object's key; keys must +be one or more alphanumeric characters followed by an underscore +\dQuote{\code{_}} (regex pattern +\dQuote{\code{^[a-zA-Z][a-zA-Z0-9]*_$}})} +} +} + \concept{spatial} diff --git a/man/VariableFeaturePlot.Rd b/man/VariableFeaturePlot.Rd index 97ccbf073..9320b3f3d 100644 --- a/man/VariableFeaturePlot.Rd +++ b/man/VariableFeaturePlot.Rd @@ -26,6 +26,21 @@ VariableFeaturePlot( \item{log}{Plot the x-axis in log scale} +\item{selection.method}{Which method to pull. For \code{HVFInfo} and +\code{VariableFeatures}, choose one from one of the +following: +\itemize{ + \item \dQuote{vst} + \item \dQuote{sctransform} or \dQuote{sct} + \item \dQuote{mean.var.plot}, \dQuote{dispersion}, \dQuote{mvp}, or + \dQuote{disp} +} +For \code{SVFInfo} and \code{SpatiallyVariableFeatures}, choose from: +\itemize{ + \item \dQuote{markvariogram} + \item \dQuote{moransi} +}} + \item{assay}{Assay to pull variable features from} \item{raster}{Convert points to raster format, default is \code{NULL} diff --git a/man/as.Seurat.Rd b/man/as.Seurat.Rd index b02d98696..58ffd4e64 100644 --- a/man/as.Seurat.Rd +++ b/man/as.Seurat.Rd @@ -17,12 +17,16 @@ ) } \arguments{ +\item{x}{An object to convert to class \code{Seurat}} + \item{slot}{Slot to store expression data as} \item{assay}{Name of assays to convert; set to \code{NULL} for all assays to be converted} \item{verbose}{Show progress updates} +\item{...}{Arguments passed to other methods} + \item{counts}{name of the SingleCellExperiment assay to store as \code{counts}; set to \code{NULL} if only normalized data are present} diff --git a/man/as.sparse.Rd b/man/as.sparse.Rd index cbeca89a6..481122512 100644 --- a/man/as.sparse.Rd +++ b/man/as.sparse.Rd @@ -16,12 +16,12 @@ ) } \arguments{ -\item{x}{any \R object.} +\item{x}{An object} -\item{...}{additional arguments to be passed to or from methods.} +\item{...}{Arguments passed to other methods} -\item{row.names}{\code{NULL} or a character vector giving the row - names for the data frame. Missing values are not allowed.} +\item{row.names}{\code{NULL} or a character vector giving the row names for +the data; missing values are not allowed} \item{optional}{logical. If \code{TRUE}, setting row names and converting column names (to syntactic names: see diff --git a/man/merge.SCTAssay.Rd b/man/merge.SCTAssay.Rd index 5012c54bf..f976cbc1d 100644 --- a/man/merge.SCTAssay.Rd +++ b/man/merge.SCTAssay.Rd @@ -16,9 +16,20 @@ \arguments{ \item{x}{A \code{\link[SeuratObject]{Seurat}} object} +\item{y}{A single \code{Seurat} object or a list of \code{Seurat} objects} + +\item{add.cell.ids}{A character vector of \code{length(x = c(x, y))}; +appends the corresponding values to the start of each objects' cell names} + +\item{merge.data}{Merge the data slots instead of just merging the counts +(which requires renormalization); this is recommended if the same +normalization approach was applied to all objects} + \item{na.rm}{If na.rm = TRUE, this will only preserve residuals that are present in all SCTAssays being merged. Otherwise, missing residuals will be populated with NAs.} + +\item{...}{Arguments passed to other methods} } \description{ Merge SCTAssay objects diff --git a/man/reexports.Rd b/man/reexports.Rd index e14c1212e..fa7258d42 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -73,6 +73,6 @@ below to see their documentation. \describe{ \item{generics}{\code{\link[generics]{components}}} - \item{SeuratObject}{\code{\link[SeuratObject]{\%iff\%}}, \code{\link[SeuratObject]{\%||\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject]{Index}}, \code{\link[SeuratObject]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject]{Misc<-}}, \code{\link[SeuratObject]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject]{RenameIdents}}, \code{\link[SeuratObject]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject]{SVFInfo}}, \code{\link[SeuratObject]{SetAssayData}}, \code{\link[SeuratObject]{SetIdent}}, \code{\link[SeuratObject]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}} + \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}} }} From 49bac00089526ea7291e73080d240c265cbf074c Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Fri, 7 Jul 2023 15:20:20 -0400 Subject: [PATCH 676/979] cran-specific checks: Rd line length and class vs. inherits --- R/integration5.R | 6 ++++-- R/visualization.R | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/R/integration5.R b/R/integration5.R index 918f97113..30741eb89 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -270,13 +270,15 @@ attr(x = CCAIntegration, which = 'Seurat.method') <- 'integration' #' #' # Reference-based Integration #' # Here, we use the first layer as a reference for integraion -#' # Thus, we only identify anchors between the reference and the rest of the datasets, saving computational resources +#' # Thus, we only identify anchors between the reference and the rest of the datasets, +#' # saving computational resources #' obj <- IntegrateLayers(object = obj, method = RPCAIntegration, #' orig.reduction = "pca", new.reduction = 'integrated.rpca', #' reference = 1, verbose = FALSE) #' #' # Modifying parameters -#' # We can also specify parameters such as `k.anchor` to increase the strength of integration +#' # We can also specify parameters such as `k.anchor` to increase the strength of +#' # integration #' obj <- IntegrateLayers(object = obj, method = RPCAIntegration, #' orig.reduction = "pca", new.reduction = 'integrated.rpca', #' k.anchor = 20, verbose = FALSE) diff --git a/R/visualization.R b/R/visualization.R index c325a47b0..c3a85f7b3 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -4045,7 +4045,7 @@ SpatialPlot <- function( } # Get feature max for individual feature - if (!(is.null(x = keep.scale)) && keep.scale == "feature" && class(x = data[, features[j]]) != "factor") { + if (!(is.null(x = keep.scale)) && keep.scale == "feature" && !inherits(x = data[, features[j]], "factor")) { max.feature.value <- max(data[, features[j]]) } @@ -4121,7 +4121,7 @@ SpatialPlot <- function( } # Plot multiple images depending on keep.scale - if (!(is.null(x = keep.scale)) && class(x = data[, features[j]]) != "factor") { + if (!(is.null(x = keep.scale)) && !inherits(x = data[, features[j]], "factor")) { plot <- suppressMessages(plot & scale_fill_gradientn(colors = SpatialColors(n = 100), limits = c(NA, max.feature.value))) } From dc724bbcb0517ed95c54d5f3c19143ccc9c971b6 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Fri, 7 Jul 2023 15:20:50 -0400 Subject: [PATCH 677/979] update docs --- man/RPCAIntegration.Rd | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/man/RPCAIntegration.Rd b/man/RPCAIntegration.Rd index 00f5b5aa2..ca47b9deb 100644 --- a/man/RPCAIntegration.Rd +++ b/man/RPCAIntegration.Rd @@ -71,13 +71,15 @@ obj <- IntegrateLayers(object = obj, method = RPCAIntegration, # Reference-based Integration # Here, we use the first layer as a reference for integraion -# Thus, we only identify anchors between the reference and the rest of the datasets, saving computational resources +# Thus, we only identify anchors between the reference and the rest of the datasets, +# saving computational resources obj <- IntegrateLayers(object = obj, method = RPCAIntegration, orig.reduction = "pca", new.reduction = 'integrated.rpca', reference = 1, verbose = FALSE) # Modifying parameters -# We can also specify parameters such as `k.anchor` to increase the strength of integration +# We can also specify parameters such as `k.anchor` to increase the strength of +# integration obj <- IntegrateLayers(object = obj, method = RPCAIntegration, orig.reduction = "pca", new.reduction = 'integrated.rpca', k.anchor = 20, verbose = FALSE) From 183c1d30df0bb9bc49123e845afef4dbb8f1929f Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 7 Jul 2023 15:20:52 -0400 Subject: [PATCH 678/979] minor style --- R/utilities.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/utilities.R b/R/utilities.R index 164a734fc..46750cbeb 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -189,7 +189,7 @@ AddModuleScore <- function( assay.old <- DefaultAssay(object = object) assay <- assay %||% assay.old DefaultAssay(object = object) <- assay - assay.data <- GetAssayData(object = object,assay = assay, slot = slot) + assay.data <- GetAssayData(object = object, assay = assay, slot = slot) features.old <- features if (k) { .NotYetUsed(arg = 'k') From 5af2d93acc769324331ec310a49f17de3eb7d851 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 7 Jul 2023 15:22:56 -0400 Subject: [PATCH 679/979] bump version --- DESCRIPTION | 2 +- src/RcppExports.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 406d4eaaa..57ec0d865 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 4.9.9.9051 +Version: 4.9.9.9052 Date: 2023-07-07 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 540e5c2d8..7a3302c6b 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -402,7 +402,7 @@ BEGIN_RCPP END_RCPP } -RcppExport SEXP isnull(void *); +RcppExport SEXP isnull(SEXP); static const R_CallMethodDef CallEntries[] = { {"_Seurat_RunModularityClusteringCpp", (DL_FUNC) &_Seurat_RunModularityClusteringCpp, 9}, From e722ad6da5ffefdd515887f54c9cf3bafbe90708 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 7 Jul 2023 15:23:51 -0400 Subject: [PATCH 680/979] fix docu --- man/FeaturePlot.Rd | 8 ++++++-- man/ImageFeaturePlot.Rd | 13 +++++++------ man/IntegrateData.Rd | 6 ++++-- man/IntegrateEmbeddings.Rd | 6 ++++-- man/PolyFeaturePlot.Rd | 5 +---- man/Seurat-package.Rd | 2 +- man/reexports.Rd | 2 +- 7 files changed, 24 insertions(+), 18 deletions(-) diff --git a/man/FeaturePlot.Rd b/man/FeaturePlot.Rd index f379647a3..bf09e5640 100644 --- a/man/FeaturePlot.Rd +++ b/man/FeaturePlot.Rd @@ -10,8 +10,12 @@ FeaturePlot( features, dims = c(1, 2), cells = NULL, - cols = if (blend) { c("lightgrey", "#ff0000", "#00ff00") } else { - c("lightgrey", "blue") }, + cols = if (blend) { + c("lightgrey", "#ff0000", "#00ff00") + } else { + + c("lightgrey", "blue") + }, pt.size = NULL, alpha = 1, order = FALSE, diff --git a/man/ImageFeaturePlot.Rd b/man/ImageFeaturePlot.Rd index 00547b73a..e40ab169a 100644 --- a/man/ImageFeaturePlot.Rd +++ b/man/ImageFeaturePlot.Rd @@ -9,8 +9,12 @@ ImageFeaturePlot( features, fov = NULL, boundaries = NULL, - cols = if (isTRUE(x = blend)) { c("lightgrey", "#ff0000", "#00ff00") } else { - c("lightgrey", "firebrick1") }, + cols = if (isTRUE(x = blend)) { + c("lightgrey", "#ff0000", "#00ff00") + } else { + + c("lightgrey", "firebrick1") + }, size = 0.5, min.cutoff = NA, max.cutoff = NA, @@ -64,10 +68,7 @@ When blend is \code{TRUE}, takes anywhere from 1-3 colors: \item{size}{Point size for cells when plotting centroids} -\item{min.cutoff}{Vector of minimum and maximum cutoff values for each feature, -may specify quantile in the form of 'q##' where '##' is the quantile (eg, 'q1', 'q10')} - -\item{max.cutoff}{Vector of minimum and maximum cutoff values for each feature, +\item{min.cutoff, max.cutoff}{Vector of minimum and maximum cutoff values for each feature, may specify quantile in the form of 'q##' where '##' is the quantile (eg, 'q1', 'q10')} \item{split.by}{A factor in object metadata to split the plot by, pass 'ident' diff --git a/man/IntegrateData.Rd b/man/IntegrateData.Rd index c02543005..e08bd682e 100644 --- a/man/IntegrateData.Rd +++ b/man/IntegrateData.Rd @@ -64,10 +64,12 @@ should be encoded in a matrix, where each row represents one of the pairwise integration steps. Negative numbers specify a dataset, positive numbers specify the integration results from a given row (the format of the merge matrix included in the \code{\link{hclust}} function output). For example: -\code{matrix(c(-2, 1, -3, -1), ncol = 2)} gives:\preformatted{ [,1] [,2] +\code{matrix(c(-2, 1, -3, -1), ncol = 2)} gives: + +\if{html}{\out{
    }}\preformatted{ [,1] [,2] [1,] -2 -3 [2,] 1 -1 -} +}\if{html}{\out{
    }} Which would cause dataset 2 and 3 to be integrated first, then the resulting object integrated with dataset 1. diff --git a/man/IntegrateEmbeddings.Rd b/man/IntegrateEmbeddings.Rd index c3f96ffa5..dc0469132 100644 --- a/man/IntegrateEmbeddings.Rd +++ b/man/IntegrateEmbeddings.Rd @@ -75,10 +75,12 @@ should be encoded in a matrix, where each row represents one of the pairwise integration steps. Negative numbers specify a dataset, positive numbers specify the integration results from a given row (the format of the merge matrix included in the \code{\link{hclust}} function output). For example: -\code{matrix(c(-2, 1, -3, -1), ncol = 2)} gives:\preformatted{ [,1] [,2] +\code{matrix(c(-2, 1, -3, -1), ncol = 2)} gives: + +\if{html}{\out{
    }}\preformatted{ [,1] [,2] [1,] -2 -3 [2,] 1 -1 -} +}\if{html}{\out{
    }} Which would cause dataset 2 and 3 to be integrated first, then the resulting object integrated with dataset 1. diff --git a/man/PolyFeaturePlot.Rd b/man/PolyFeaturePlot.Rd index 30d0bdcad..59a75466d 100644 --- a/man/PolyFeaturePlot.Rd +++ b/man/PolyFeaturePlot.Rd @@ -33,10 +33,7 @@ PolyFeaturePlot( \item{ncol}{Number of columns to split the plot into} -\item{min.cutoff}{Vector of minimum and maximum cutoff values for each feature, -may specify quantile in the form of 'q##' where '##' is the quantile (eg, 'q1', 'q10')} - -\item{max.cutoff}{Vector of minimum and maximum cutoff values for each feature, +\item{min.cutoff, max.cutoff}{Vector of minimum and maximum cutoff values for each feature, may specify quantile in the form of 'q##' where '##' is the quantile (eg, 'q1', 'q10')} \item{common.scale}{...} diff --git a/man/Seurat-package.Rd b/man/Seurat-package.Rd index 91fe25746..79a0fbc5b 100644 --- a/man/Seurat-package.Rd +++ b/man/Seurat-package.Rd @@ -6,7 +6,7 @@ \alias{Seurat-package} \title{Seurat: Tools for Single Cell Genomics} \description{ -A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. +A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) \doi{10.1038/nbt.3192}, Macosko E, Basu A, Satija R, et al (2015) \doi{10.1016/j.cell.2015.05.002}, Stuart T, Butler A, et al (2019) \doi{10.1016/j.cell.2019.05.031}, and Hao, Hao, et al (2020) \doi{10.1101/2020.10.12.335331} for more details. } \section{Package options}{ diff --git a/man/reexports.Rd b/man/reexports.Rd index 6320f6d47..fa7258d42 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -73,6 +73,6 @@ below to see their documentation. \describe{ \item{generics}{\code{\link[generics]{components}}} - \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}} + \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}} }} From a104b6a1b1b39e83fd65724066f2059f38ab77a4 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Fri, 7 Jul 2023 15:28:41 -0400 Subject: [PATCH 681/979] update tests for v5 --- tests/testthat/test_dimensional_reduction.R | 5 +++-- tests/testthat/test_integratedata.R | 16 ++++++++++++++-- tests/testthat/test_integration.R | 8 +++++++- tests/testthat/test_objects.R | 15 ++++++++------- tests/testthat/test_transferdata.R | 8 +++++++- 5 files changed, 39 insertions(+), 13 deletions(-) diff --git a/tests/testthat/test_dimensional_reduction.R b/tests/testthat/test_dimensional_reduction.R index 9fadd6ea6..ab87e5a36 100644 --- a/tests/testthat/test_dimensional_reduction.R +++ b/tests/testthat/test_dimensional_reduction.R @@ -39,9 +39,10 @@ test_that("pca returns total variance (see #982)", { row.names(x = dummyexpMat) <- paste0("gene", seq(nrow(x = dummyexpMat))) # Create Seurat object for testing - obj <- CreateSeuratObject(counts = dummyexpMat) + obj <- CreateSeuratObject(counts = as.sparse(dummyexpMat)) - # Scale and compute PCA, using RunPCA + # Normalize, scale, and compute PCA, using RunPCA + # obj <- NormalizeData(object = obj, verbose = FALSE) obj <- ScaleData(object = obj, verbose = FALSE) pca_result <- suppressWarnings(expr = RunPCA( object = obj, diff --git a/tests/testthat/test_integratedata.R b/tests/testthat/test_integratedata.R index ebc2036bd..d327ee67c 100644 --- a/tests/testthat/test_integratedata.R +++ b/tests/testthat/test_integratedata.R @@ -6,10 +6,22 @@ pbmc_small <- suppressWarnings(UpdateSeuratObject(pbmc_small)) ref <- pbmc_small ref <- FindVariableFeatures(object = ref, verbose = FALSE, nfeatures = 100) query <- CreateSeuratObject( - counts = GetAssayData(object = pbmc_small[['RNA']], slot = "counts") + rpois(n = ncol(pbmc_small), lambda = 1) + counts = as.sparse( + GetAssayData( + object = pbmc_small[['RNA']], + slot = "counts") + rpois(n = ncol(pbmc_small), + lambda = 1 + ) + ) ) query2 <- CreateSeuratObject( - counts = GetAssayData(object = pbmc_small[['RNA']], slot = "counts")[, 1:40] + rpois(n = ncol(pbmc_small), lambda = 1) + counts = as.sparse( + GetAssayData( + object = pbmc_small[['RNA']], + slot = "counts")[, 1:40] + rpois(n = ncol(pbmc_small), + lambda = 1 + ) + ) ) query.list <- list(query, query2) query.list <- lapply(X = query.list, FUN = NormalizeData, verbose = FALSE) diff --git a/tests/testthat/test_integration.R b/tests/testthat/test_integration.R index ff5f57e01..4236f4217 100644 --- a/tests/testthat/test_integration.R +++ b/tests/testthat/test_integration.R @@ -5,7 +5,13 @@ pbmc_small <- suppressWarnings(UpdateSeuratObject(pbmc_small)) # Setup test objects ref <- pbmc_small query <- CreateSeuratObject( - counts = GetAssayData(object = pbmc_small[['RNA']], slot = "counts") + rpois(n = ncol(pbmc_small), lambda = 1) + counts = as.sparse( + GetAssayData( + object = pbmc_small[['RNA']], + slot = "counts") + rpois(n = ncol(pbmc_small), + lambda = 1 + ) + ) ) query <- NormalizeData(object = query, verbose = FALSE) query <- FindVariableFeatures(object = query, verbose = FALSE, nfeatures = 100) diff --git a/tests/testthat/test_objects.R b/tests/testthat/test_objects.R index ed6961b14..84b137bd4 100644 --- a/tests/testthat/test_objects.R +++ b/tests/testthat/test_objects.R @@ -30,22 +30,23 @@ feature_letters_shuffled <- sample(x = feature_letters) test_that("AddMetaData adds feature level metadata", { pbmc_small[["RNA"]] <- AddMetaData(object = pbmc_small[["RNA"]], metadata = feature_letters, col.name = 'feature_letters') - expect_equal(pbmc_small[["RNA"]][["feature_letters", drop = TRUE]], feature_letters) + expect_equal(pbmc_small[["RNA"]]["feature_letters", drop = TRUE], feature_letters) pbmc_small[["RNA"]] <- AddMetaData(object = pbmc_small[["RNA"]], metadata = feature_letters_shuffled, col.name = 'feature_letters_shuffled') - expect_equal(pbmc_small[["RNA"]][["feature_letters", drop = TRUE]], pbmc_small[["RNA"]][["feature_letters_shuffled", drop = TRUE]]) + expect_equal(pbmc_small[["RNA"]]["feature_letters", drop = TRUE], pbmc_small[["RNA"]]["feature_letters_shuffled", drop = TRUE]) }) feature_letters_df <- data.frame(A = feature_letters, B = feature_letters_shuffled) test_that("AddMetaData adds in data frame properly for Assays", { pbmc_small[["RNA"]] <- AddMetaData(object = pbmc_small[["RNA"]], metadata = feature_letters_df) - expect_equal(pbmc_small[["RNA"]][[c("A", "B")]], feature_letters_df) + expect_equal(pbmc_small[["RNA"]][c("A", "B")], feature_letters_df) }) test_that("AddMetaData errors", { expect_error(AddMetaData(object = pbmc_small, metadata = cluster_letters, col.name = "RNA")) - expect_error(AddMetaData(object = pbmc_small, metadata = c(unname(cluster_letters), "A"), col.name = "letter.idents")) + # expect_error(AddMetaData(object = pbmc_small, metadata = c(unname(cluster_letters), "A"), col.name = "letter.idents")) # doesnt error in v5 expect_error(AddMetaData(object = pbmc_small, metadata = feature_letters, col.name = "letter.idents")) expect_error(AddMetaData(object = pbmc_small[["RNA"]], metadata = cluster_letters, col.name = "letter.idents")) + expect_error(AddMetaData(object = pbmc_small, metadata = data.frame(), col.name = "letter.idents")) }) # Tests for creating an Assay object @@ -63,10 +64,10 @@ test_that("CreateAssayObject works as expected", { expect_equal(GetAssayData(object = rna.assay, slot = "counts"), pbmc.raw) expect_equal(GetAssayData(object = rna.assay, slot = "data"), pbmc.raw) expect_equal(GetAssayData(object = rna.assay, slot = "scale.data"), new(Class = "matrix")) - expect_equal(dim(rna.assay[[]]), c(230, 0)) - expect_equal(rownames(x = rna.assay[[]]), rownames(x = rna.assay)) + expect_equal(dim(rna.assay[]), c(230, 0)) + expect_equal(rownames(x = rna.assay[]), rownames(x = rna.assay)) expect_equal(VariableFeatures(object = rna.assay), vector()) - expect_equal(rna.assay@misc, list()) + expect_equal(rna.assay@misc, list("calcN" = TRUE)) expect_equal(GetAssayData(object = rna.assay2, slot = "counts"), new(Class = "matrix")) }) diff --git a/tests/testthat/test_transferdata.R b/tests/testthat/test_transferdata.R index d4e8f2103..bb7cb0527 100644 --- a/tests/testthat/test_transferdata.R +++ b/tests/testthat/test_transferdata.R @@ -5,7 +5,13 @@ pbmc_small <- suppressWarnings(UpdateSeuratObject(pbmc_small)) # Setup test objects ref <- pbmc_small query <- CreateSeuratObject( - counts = GetAssayData(object = pbmc_small[['RNA']], slot = "counts") + rpois(n = ncol(pbmc_small), lambda = 1) + counts = as.sparse( + GetAssayData( + object = pbmc_small[['RNA']], + slot = "counts") + rpois(n = ncol(pbmc_small), + lambda = 1 + ) + ) ) query <- NormalizeData(object = query, verbose = FALSE) query <- FindVariableFeatures(object = query, verbose = FALSE, nfeatures = 100) From 35e28845230fce9912255645517d6b001218b51d Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 7 Jul 2023 15:38:39 -0400 Subject: [PATCH 682/979] minor style --- R/visualization.R | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/R/visualization.R b/R/visualization.R index a748bf3d2..ae05c14a5 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -894,7 +894,7 @@ DimPlot <- function( data[, shape.by] <- object[[shape.by, drop = TRUE]] } if (!is.null(x = split.by)) { - data[, split.by] <- FetchData(object,split.by)[split.by] + data[, split.by] <- FetchData(object = object, vars = split.by)[split.by] } if (isTRUE(x = shuffle)) { set.seed(seed = seed) @@ -2009,13 +2009,13 @@ FeatureScatter <- function( } } if (!is.null(x = split.by)) { - data[, split.by] <- FetchData(object,split.by)[split.by] + data[, split.by] <- FetchData(object = object, vars = split.by)[split.by] } plots <- lapply( X = group.by, FUN = function(x) { plot <- SingleCorPlot( - data = data[,c(feature1, feature2,split.by)], + data = data[,c(feature1, feature2, split.by)], col.by = data[, x], cols = cols, pt.size = pt.size, @@ -4375,7 +4375,7 @@ DotPlot <- function( id.levels <- levels(x = data.features$id) data.features$id <- as.vector(x = data.features$id) if (!is.null(x = split.by)) { - splits <- FetchData(object,split.by)[cells,split.by] + splits <- FetchData(object = object, vars = split.by)[cells, split.by] if (split.colors) { if (length(x = unique(x = splits)) > length(x = cols)) { stop(paste0("Need to specify at least ", length(x = unique(x = splits)), " colors using the cols parameter")) @@ -4464,8 +4464,19 @@ DotPlot <- function( data.plot$pct.exp[data.plot$pct.exp < dot.min] <- NA data.plot$pct.exp <- data.plot$pct.exp * 100 if (split.colors) { - splits.use <- unlist(lapply(data.plot$id, function(x) - sub(paste0(".*_(",paste(sort(unique(splits),decreasing = TRUE), collapse = '|'),")$"), "\\1", x) )) + splits.use <- unlist(x = lapply( + X = data.plot$id, + FUN = function(x) + sub( + paste0(".*_(", + paste(sort(unique(x = splits), decreasing = TRUE), + collapse = '|' + ),")$"), + "\\1", + x + ) + ) + ) data.plot$colors <- mapply( FUN = function(color, value) { return(colorRampPalette(colors = c('grey', color))(20)[value]) From 108529bb152f5f77caa6f4a297b384e3adfbdb1a Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 7 Jul 2023 15:42:24 -0400 Subject: [PATCH 683/979] bump version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 57ec0d865..75756f160 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 4.9.9.9052 +Version: 4.9.9.9053 Date: 2023-07-07 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. From 5fa6e7e808285a53f3c697ca16b9f143eae41c94 Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Fri, 7 Jul 2023 15:59:14 -0400 Subject: [PATCH 684/979] fix argument deprecation warnings --- tests/testthat/test_preprocessing.R | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/testthat/test_preprocessing.R b/tests/testthat/test_preprocessing.R index d84ef3bdd..8019ebbf7 100644 --- a/tests/testthat/test_preprocessing.R +++ b/tests/testthat/test_preprocessing.R @@ -70,7 +70,7 @@ test_that("NormalizeData scales properly", { expect_equal(Command(object = object, command = "NormalizeData.RNA", value = "normalization.method"), "LogNormalize") }) -normalized.data <- LogNormalize(data = GetAssayData(object = object[["RNA"]], slot = "counts"), verbose = FALSE) +normalized.data <- LogNormalize(data = GetAssayData(object = object[["RNA"]], layer = "counts"), verbose = FALSE) test_that("LogNormalize normalizes properly", { expect_equal( LogNormalize(data = GetAssayData(object = object[["RNA"]], slot = "counts"), verbose = FALSE), @@ -238,19 +238,19 @@ object <- FindVariableFeatures(object = object, selection.method = "mean.var.plo test_that("mean.var.plot selection option returns expected values", { expect_equal(VariableFeatures(object = object)[1:4], c("PTGDR", "SATB1", "ZNF330", "S100B")) expect_equal(length(x = VariableFeatures(object = object)), 20) - expect_equal(HVFInfo(object = object[["RNA"]], selection.method = 'mvp')$mean[1:2], c(8.328927, 8.444462), tolerance = 1e-6) - expect_equal(HVFInfo(object = object[["RNA"]], selection.method = 'mvp')$dispersion[1:2], c(10.552507, 10.088223), tolerance = 1e-6) - expect_equal(as.numeric(HVFInfo(object = object[["RNA"]], selection.method = 'mvp')$dispersion.scaled[1:2]), c(0.1113214, -0.1332181523), tolerance = 1e-6) + expect_equal(HVFInfo(object = object[["RNA"]], method = 'mvp')$mean[1:2], c(8.328927, 8.444462), tolerance = 1e-6) + expect_equal(HVFInfo(object = object[["RNA"]], method = 'mvp')$dispersion[1:2], c(10.552507, 10.088223), tolerance = 1e-6) + expect_equal(as.numeric(HVFInfo(object = object[["RNA"]], method = 'mvp')$dispersion.scaled[1:2]), c(0.1113214, -0.1332181523), tolerance = 1e-6) }) object <- FindVariableFeatures(object, selection.method = "dispersion", verbose = FALSE) test_that("dispersion selection option returns expected values", { expect_equal(VariableFeatures(object = object)[1:4], c("PCMT1", "PPBP", "LYAR", "VDAC3")) expect_equal(length(x = VariableFeatures(object = object)), 230) - expect_equal(HVFInfo(object = object[["RNA"]], selection.method = 'mvp')$mean[1:2], c(8.328927, 8.444462), tolerance = 1e-6) - expect_equal(HVFInfo(object = object[["RNA"]], selection.method = 'mvp')$dispersion[1:2], c(10.552507, 10.088223), tolerance = 1e-6) - expect_equal(as.numeric(HVFInfo(object = object[["RNA"]], selection.method = 'mvp')$dispersion.scaled[1:2]), c(0.1113214, -0.1332181523), tolerance = 1e-6) - expect_true(!is.unsorted(rev(HVFInfo(object = object[["RNA"]], selection.method = 'mvp')[VariableFeatures(object = object), "dispersion"]))) + expect_equal(HVFInfo(object = object[["RNA"]], method = 'mvp')$mean[1:2], c(8.328927, 8.444462), tolerance = 1e-6) + expect_equal(HVFInfo(object = object[["RNA"]], method = 'mvp')$dispersion[1:2], c(10.552507, 10.088223), tolerance = 1e-6) + expect_equal(as.numeric(HVFInfo(object = object[["RNA"]], method = 'mvp')$dispersion.scaled[1:2]), c(0.1113214, -0.1332181523), tolerance = 1e-6) + expect_true(!is.unsorted(rev(HVFInfo(object = object[["RNA"]], method = 'mvp')[VariableFeatures(object = object), "dispersion"]))) }) object <- FindVariableFeatures(object, selection.method = "vst", verbose = FALSE) From f0c43961a0a0e35062e75dc2fd39d935791cfa31 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 7 Jul 2023 16:18:32 -0400 Subject: [PATCH 685/979] bump version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 75756f160..4135e993b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 4.9.9.9053 +Version: 4.9.9.9054 Date: 2023-07-07 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. From 949ddecdccc7620d1f9bb421e21947376d4301b5 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Fri, 7 Jul 2023 16:26:45 -0400 Subject: [PATCH 686/979] adding more informative error message --- R/integration.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/R/integration.R b/R/integration.R index 5150f7a9d..287ca12a6 100644 --- a/R/integration.R +++ b/R/integration.R @@ -4384,6 +4384,9 @@ FindWeights <- function( to.keep <- !duplicated(x = anchors.cells1) anchors.cells1 <- anchors.cells1[to.keep] anchors.cells2 <- anchors.cells2[to.keep] + if (length(anchors.cells1) < k || length(anchors.cells2) < k) { + stop("Number of anchor cells is less than k.weight. Consider lowering k.weight to less than", min(length(anchors.cells1), length(anchors.cells2)), "or adjusting anchor finding parameters in FindIntegrationAnchors.") + } if (is.null(x = features)) { data.use <- Embeddings(object = reduction)[nn.cells1, dims] data.use.query <- Embeddings(object = reduction)[nn.cells2, dims] @@ -4409,6 +4412,9 @@ FindWeights <- function( ) } else { anchors.cells2 <- unique(x = nn.cells2[anchors[, "cell2"]]) + if (length(anchors.cells2) < k) { + stop("Number of anchor cells is less than k.weight. Consider lowering k.weight to less than ", length(anchors.cells2), "or adjusting anchor finding parameters in FindIntegrationAnchors.") + } if (is.null(x = features)) { data.use <- Embeddings(reduction)[nn.cells2, dims] } else { From 21f7461597558de1bffbbcf9de70fcd080bc9acb Mon Sep 17 00:00:00 2001 From: AustinHartman Date: Fri, 7 Jul 2023 16:49:54 -0400 Subject: [PATCH 687/979] update utils tests --- tests/testthat/test_utilities.R | 48 +++++++++++++++++---------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/tests/testthat/test_utilities.R b/tests/testthat/test_utilities.R index 079634451..3a6ea1db6 100644 --- a/tests/testthat/test_utilities.R +++ b/tests/testthat/test_utilities.R @@ -17,26 +17,26 @@ object <- CreateSeuratObject( object <- SetIdent(object, value = 'a') test_that("AverageExpression works for different slots", { - average.expression <- AverageExpression(object, slot = 'data')$RNA + suppressWarnings(average.expression <- AverageExpression(object, slot = 'data')$RNA) expect_equivalent( average.expression['KHDRBS1', 1:3], c(a = 7.278237e-01, b = 1.658166e+14, c = 1.431902e-01), tolerance = 1e-6 ) expect_equivalent( - average.expression['DNAJB1', 1:3] , + average.expression['DNAJB1', 1:3], c(a = 1.374079e+00, b = 5.100840e-01, c = 5.011655e-01), tolerance = 1e-6 ) - avg.counts <- AverageExpression(object, slot = 'counts')$RNA + suppressWarnings(avg.counts <- AverageExpression(object, slot = 'counts')$RNA) expect_equal( - avg.counts['MS4A1', ], - c(a = 0.37037037, b = 0.3461538, c = 0.3333333), + unname(avg.counts['MS4A1', ]), + unname(c(a = 0.37037037, b = 0.3461538, c = 0.3333333)), tolerance = 1e-6 ) expect_equal( - avg.counts['SPON2', ], - c(a = 0.5185185, b = 0.6153846, c = 0.08333333), + unname(avg.counts['SPON2', ]), + unname(c(a = 0.5185185, b = 0.6153846, c = 0.08333333)), tolerance = 1e-6 ) expect_warning(AverageExpression(object, slot = 'scale.data')) @@ -67,19 +67,19 @@ test_that("AverageExpression with return.seurat", { avg.counts <- AverageExpression(object, slot = "counts", return.seurat = TRUE, verbose = FALSE) expect_s4_class(object = avg.counts, "Seurat") avg.counts.mat <- AverageExpression(object, slot = 'counts')$RNA - expect_equal(as.matrix(GetAssayData(avg.counts[["RNA"]], slot = "counts")), avg.counts.mat) - avg.data <- GetAssayData(avg.counts[["RNA"]], slot = "data") + expect_equal(as.matrix(LayerData(avg.counts[["RNA"]], layer = "counts")), as.matrix(avg.counts.mat)) + avg.data <- LayerData(avg.counts[["RNA"]], layer = "data") expect_equal( - avg.data['MS4A1', ], - c(a = 0.31508105, b = 0.2972515, c = 0.2876821), + unname(avg.data['MS4A1', ]), + unname(c(a = 0.31508105, b = 0.2972515, c = 0.2876821)), tolerance = 1e-6 ) expect_equal( - avg.data['SPON2', ], - c(a = 0.4177352, b = 0.4795731, c = 0.08004271), + unname(avg.data['SPON2', ]), + unname(c(a = 0.4177352, b = 0.4795731, c = 0.08004271)), tolerance = 1e-6 ) - avg.scale <- GetAssayData(avg.counts[["RNA"]], slot = "scale.data") + avg.scale <- LayerData(avg.counts[["RNA"]], layer = "scale.data") expect_equal( avg.scale['MS4A1', ], c(a = 1.0841908, b = -0.1980056, c = -0.8861852), @@ -90,13 +90,14 @@ test_that("AverageExpression with return.seurat", { c(a = 0.4275778, b = 0.7151260, c = -1.1427038), tolerance = 1e-6 ) + # data avg.data <- AverageExpression(object, slot = "data", return.seurat = TRUE, verbose = FALSE) expect_s4_class(object = avg.data, "Seurat") avg.data.mat <- AverageExpression(object, slot = 'data')$RNA - expect_equal(as.matrix(GetAssayData(avg.data[["RNA"]], slot = "counts")), avg.data.mat) - expect_equal(unname(as.matrix(GetAssayData(avg.data[["RNA"]], slot = "data"))), unname(log1p(x = avg.data.mat))) - avg.scale <- GetAssayData(avg.data[["RNA"]], slot = "scale.data") + expect_equal(as.matrix(LayerData(avg.data[["RNA"]], layer = "counts")), avg.data.mat) + expect_equal(unname(as.matrix(LayerData(avg.data[["RNA"]], layer = "data"))), unname(log1p(x = avg.data.mat))) + avg.scale <- LayerData(avg.data[["RNA"]], layer = "scale.data") expect_equal( avg.scale['MS4A1', ], c(a = 0.721145238, b = -1.1415734, c = 0.4204281), @@ -107,22 +108,23 @@ test_that("AverageExpression with return.seurat", { c(a = 0.08226771, b = 0.9563249, c = -1.0385926), tolerance = 1e-6 ) + # scale.data object <- ScaleData(object = object, verbose = FALSE) avg.scale <- AverageExpression(object, slot = "scale.data", return.seurat = TRUE, verbose = FALSE) expect_s4_class(object = avg.scale, "Seurat") avg.scale.mat <- AverageExpression(object, slot = 'scale.data')$RNA - expect_equal(unname(as.matrix(GetAssayData(avg.scale[["RNA"]], slot = "scale.data"))), unname(avg.scale.mat)) - expect_true(all(is.na(GetAssayData(avg.scale[["RNA"]], slot = "data")))) - expect_equal(GetAssayData(avg.scale[["RNA"]], slot = "counts"), matrix()) + expect_equal(unname(as.matrix(LayerData(avg.scale[["RNA"]], layer = "scale.data"))), unname(avg.scale.mat)) + expect_true(all(is.na(LayerData(avg.scale[["RNA"]], layer = "data")))) + expect_equal(LayerData(avg.scale[["RNA"]], layer = "counts"), matrix()) }) -test.dat <- GetAssayData(object = object, slot = "data") +test.dat <- LayerData(object = object, layer = "data") rownames(x = test.dat) <- paste0("test-", rownames(x = test.dat)) object[["TEST"]] <- CreateAssayObject(data = test.dat) test_that("AverageExpression with multiple assays", { - avg.test <- AverageExpression(object = object, assays = "TEST") + avg.test <- AverageExpression(object = object, assays = "TEST", slot = "data") expect_equal(names(x = avg.test), "TEST") expect_equal(length(x = avg.test), 1) expect_equivalent( @@ -135,7 +137,7 @@ test_that("AverageExpression with multiple assays", { c(a = 1.374079e+00, b = 5.100840e-01, c = 5.011655e-01), tolerance = 1e-6 ) - avg.all <- AverageExpression(object = object) + avg.all <- AverageExpression(object = object, slot = "data") expect_equal(names(x = avg.all), c("RNA", "TEST")) expect_equal(length(x = avg.all), 2) }) From 9e761203af3edd9ac7c87272f4d47ab4d58f0ac6 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 7 Jul 2023 17:08:16 -0400 Subject: [PATCH 688/979] bump version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4135e993b..68e75dee7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 4.9.9.9054 +Version: 4.9.9.9055 Date: 2023-07-07 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. From fa0abf03783050233d41e2a51bc44e7a592ccfc3 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Fri, 7 Jul 2023 17:24:47 -0400 Subject: [PATCH 689/979] editing message --- R/integration.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/integration.R b/R/integration.R index 287ca12a6..e4205ab02 100644 --- a/R/integration.R +++ b/R/integration.R @@ -4385,7 +4385,7 @@ FindWeights <- function( anchors.cells1 <- anchors.cells1[to.keep] anchors.cells2 <- anchors.cells2[to.keep] if (length(anchors.cells1) < k || length(anchors.cells2) < k) { - stop("Number of anchor cells is less than k.weight. Consider lowering k.weight to less than", min(length(anchors.cells1), length(anchors.cells2)), "or adjusting anchor finding parameters in FindIntegrationAnchors.") + stop("Number of anchor cells is less than k.weight. Consider lowering k.weight to less than ", min(length(anchors.cells1), length(anchors.cells2)), " or increase k.anchor.") } if (is.null(x = features)) { data.use <- Embeddings(object = reduction)[nn.cells1, dims] @@ -4413,7 +4413,7 @@ FindWeights <- function( } else { anchors.cells2 <- unique(x = nn.cells2[anchors[, "cell2"]]) if (length(anchors.cells2) < k) { - stop("Number of anchor cells is less than k.weight. Consider lowering k.weight to less than ", length(anchors.cells2), "or adjusting anchor finding parameters in FindIntegrationAnchors.") + stop("Number of anchor cells is less than k.weight. Consider lowering k.weight to less than ", length(anchors.cells2), " or increase k.anchor.") } if (is.null(x = features)) { data.use <- Embeddings(reduction)[nn.cells2, dims] From 746c51a8b254b822082e9780933d7c3d27b95642 Mon Sep 17 00:00:00 2001 From: gesmira <59940281+Gesmira@users.noreply.github.com> Date: Fri, 7 Jul 2023 17:46:06 -0400 Subject: [PATCH 690/979] bump version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4135e993b..68e75dee7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 4.9.9.9054 +Version: 4.9.9.9055 Date: 2023-07-07 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. From 5d2e76269d839ccaa82d02d9aa30f67c989fa76b Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 11 Jul 2023 16:26:07 -0400 Subject: [PATCH 691/979] add features checking --- R/integration.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/integration.R b/R/integration.R index e4205ab02..c96416a51 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1945,6 +1945,10 @@ ProjectIntegration <- function( ) ) features <- intersect(x = features, y = features.atom) + if (length(x = features) == 0) { + stop('Features are not found. Please check VariableFeatures(object[[sketched.assay]]) ', + 'or set features in ProjectIntegration') + } ncells <- c( 0, sapply( From 5f477e8dc27e76ea9b92371f38f4d368b5da9524 Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 11 Jul 2023 16:28:06 -0400 Subject: [PATCH 692/979] bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 68e75dee7..4e3d8d8e3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9055 -Date: 2023-07-07 +Version: 4.9.9.9056 +Date: 2023-07-11 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From aeb1f0e077e9061f44bb5d1acfd4c5a0b589c086 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Thu, 13 Jul 2023 17:21:46 -0400 Subject: [PATCH 693/979] Fix ReadMtx for Seurat5 --- R/preprocessing.R | 40 ++++++++++++++++++------------- man/CCAIntegration.Rd | 47 ++++++++++++++++++++++++++++++++++++ man/FindAllMarkers.Rd | 4 ---- man/HarmonyIntegration.Rd | 40 ------------------------------- man/JointPCAIntegration.Rd | 49 +++++++++++++++++++++++++++++++++++++- man/RPCAIntegration.Rd | 47 ++++++++++++++++++++++++++++++++++++ man/ReadMtx.Rd | 3 +++ man/reexports.Rd | 4 ++-- src/RcppExports.cpp | 2 +- 9 files changed, 171 insertions(+), 65 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index 97723df6b..b7f5f728c 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -418,7 +418,7 @@ GetResidual <- function( "This SCTAssay contains multiple SCT models. Computing residuals for cells using different models" ) } - if (!umi.assay %in% Assays(object = object) || + if (!umi.assay %in% Assays(object = object) || length(x = Layers(object = object[[umi.assay]], search = 'counts')) == 0) { return(object) } @@ -1538,15 +1538,18 @@ ReadAkoya <- function( #' } #' ReadMtx <- function( - mtx, - cells, - features, - cell.column = 1, - feature.column = 2, - skip.cell = 0, - skip.feature = 0, - unique.features = TRUE, - strip.suffix = FALSE + mtx, + cells, + features, + cell.column = 1, + feature.column = 2, + cell.sep = "\t", + feature.sep = "\t", + skip.cell = 0, + skip.feature = 0, + mtx.transpose = FALSE, + unique.features = TRUE, + strip.suffix = FALSE ) { all.files <- list( "expression matrix" = mtx, @@ -1585,14 +1588,14 @@ ReadMtx <- function( cell.barcodes <- read.table( file = all.files[['barcode list']], header = FALSE, - sep = '\t', + sep = cell.sep, row.names = NULL, skip = skip.cell ) feature.names <- read.table( file = all.files[['feature list']], header = FALSE, - sep = '\t', + sep = feature.sep, row.names = NULL, skip = skip.feature ) @@ -1645,7 +1648,7 @@ ReadMtx <- function( feature.column, ". Try specifiying a different column.", call. = FALSE - ) + ) } else { warning( "Some features names are NA in column ", @@ -1654,7 +1657,7 @@ ReadMtx <- function( replacement.column, ".", call. = FALSE - ) + ) } feature.names[na.features, feature.column] <- feature.names[na.features, replacement.column] } @@ -1663,6 +1666,9 @@ ReadMtx <- function( feature.names <- make.unique(names = feature.names) } data <- readMM(file = all.files[['expression matrix']]) + if (mtx.transpose) { + data <- t(x = data) + } if (length(x = cell.names) != ncol(x = data)) { stop( "Matrix has ", @@ -1675,7 +1681,7 @@ ReadMtx <- function( no = "" ), call. = FALSE - ) + ) } if (length(x = feature.names) != nrow(x = data)) { stop( @@ -1689,7 +1695,7 @@ ReadMtx <- function( no = "" ), call. = FALSE - ) + ) } colnames(x = data) <- cell.names @@ -3228,7 +3234,7 @@ SCTransform.default <- function( immediate. = TRUE ) } - + vst.args[['vst.flavor']] <- vst.flavor vst.args[['umi']] <- umi vst.args[['cell_attr']] <- cell.attr diff --git a/man/CCAIntegration.Rd b/man/CCAIntegration.Rd index 797b1147a..1272a2605 100644 --- a/man/CCAIntegration.Rd +++ b/man/CCAIntegration.Rd @@ -17,6 +17,12 @@ CCAIntegration( groups = NULL, k.filter = NA, scale.layer = "scale.data", + dims.to.integrate = NULL, + k.weight = 100, + weight.reduction = NULL, + sd.weight = 1, + sample.tree = NULL, + preserve.order = FALSE, verbose = TRUE, ... ) @@ -42,7 +48,48 @@ search space} \item{k.filter}{How many neighbors (k) to use when filtering anchors} +\item{dims.to.integrate}{Number of dimensions to return integrated values for} + +\item{k.weight}{Number of neighbors to consider when weighting anchors} + +\item{weight.reduction}{Dimension reduction to use when calculating anchor +weights. This can be one of: +\itemize{ + \item{A string, specifying the name of a dimension reduction present in + all objects to be integrated} + \item{A vector of strings, specifying the name of a dimension reduction to + use for each object to be integrated} + \item{A vector of \code{\link{DimReduc}} objects, specifying the object to + use for each object in the integration} + \item{NULL, in which case the full corrected space is used for computing + anchor weights.} +}} + +\item{sd.weight}{Controls the bandwidth of the Gaussian kernel for weighting} + +\item{sample.tree}{Specify the order of integration. Order of integration +should be encoded in a matrix, where each row represents one of the pairwise +integration steps. Negative numbers specify a dataset, positive numbers +specify the integration results from a given row (the format of the merge +matrix included in the \code{\link{hclust}} function output). For example: +\code{matrix(c(-2, 1, -3, -1), ncol = 2)} gives: + +\if{html}{\out{
    }}\preformatted{ [,1] [,2] + [1,] -2 -3 + [2,] 1 -1 +}\if{html}{\out{
    }} + +Which would cause dataset 2 and 3 to be integrated first, then the resulting +object integrated with dataset 1. + +If NULL, the sample tree will be computed automatically.} + +\item{preserve.order}{Do not reorder objects based on size for each pairwise +integration.} + \item{verbose}{Print progress bars and output} + +\item{...}{Arguments passed on to \code{FindIntegrationAnchors}} } \description{ Seurat-CCA Integration diff --git a/man/FindAllMarkers.Rd b/man/FindAllMarkers.Rd index 28247dd34..622474624 100644 --- a/man/FindAllMarkers.Rd +++ b/man/FindAllMarkers.Rd @@ -22,7 +22,6 @@ FindAllMarkers( latent.vars = NULL, min.cells.feature = 3, min.cells.group = 3, - pseudocount.use = 1, mean.fxn = NULL, fc.name = NULL, base = 2, @@ -113,9 +112,6 @@ of the two groups, currently only used for poisson and negative binomial tests} \item{min.cells.group}{Minimum number of cells in one of the groups} -\item{pseudocount.use}{Pseudocount to add to averaged expression values when -calculating logFC. 0.1 by default.} - \item{mean.fxn}{Function to use for fold change or average difference calculation. If NULL, the appropriate function will be chose according to the slot used} diff --git a/man/HarmonyIntegration.Rd b/man/HarmonyIntegration.Rd index 82e382f1e..44499dfc5 100644 --- a/man/HarmonyIntegration.Rd +++ b/man/HarmonyIntegration.Rd @@ -41,46 +41,6 @@ should be called \code{group}} \item{layers}{Ignored} -\item{npcs}{If doing PCA on input matrix, number of PCs to compute.} - -\item{theta}{Diversity clustering penalty parameter. Specify for each -variable in vars_use Default theta=2. theta=0 does not encourage any -diversity. Larger values of theta result in more diverse clusters.} - -\item{lambda}{Ridge regression penalty parameter. Specify for each variable - in vars_use. -Default lambda=1. Lambda must be strictly positive. Smaller values result -in more aggressive correction.} - -\item{sigma}{Width of soft kmeans clusters. Default sigma=0.1. Sigma scales -the distance from a cell to cluster centroids. Larger values of sigma -result in cells assigned to more clusters. Smaller values of sigma make -soft kmeans cluster approach hard clustering.} - -\item{nclust}{Number of clusters in model. nclust=1 equivalent to simple -linear regression.} - -\item{tau}{Protection against overclustering small datasets with large ones. -tau is the expected number of cells per cluster.} - -\item{block.size}{What proportion of cells to update during clustering. -Between 0 to 1, default 0.05. Larger values may be faster but less accurate} - -\item{max.iter.harmony}{Maximum number of rounds to run Harmony. One round -of Harmony involves one clustering and one correction step.} - -\item{max.iter.cluster}{Maximum number of rounds to run clustering at each -round of Harmony.} - -\item{epsilon.cluster}{Convergence tolerance for clustering round of -Harmony. Set to -Inf to never stop early.} - -\item{epsilon.harmony}{Convergence tolerance for Harmony. Set to -Inf to -never stop early.} - -\item{verbose}{Whether to print progress messages. TRUE to print, -FALSE to suppress.} - \item{...}{Ignored} } \value{ diff --git a/man/JointPCAIntegration.Rd b/man/JointPCAIntegration.Rd index 1880df070..1244e24e1 100644 --- a/man/JointPCAIntegration.Rd +++ b/man/JointPCAIntegration.Rd @@ -16,8 +16,14 @@ JointPCAIntegration( dims = 1:30, k.anchor = 20, scale.layer = "scale.data", - verbose = TRUE, + dims.to.integrate = NULL, + k.weight = 100, + weight.reduction = NULL, + sd.weight = 1, + sample.tree = NULL, + preserve.order = FALSE, groups = NULL, + verbose = TRUE, ... ) } @@ -42,7 +48,48 @@ search space} \item{k.anchor}{How many neighbors (k) to use when picking anchors} +\item{dims.to.integrate}{Number of dimensions to return integrated values for} + +\item{k.weight}{Number of neighbors to consider when weighting anchors} + +\item{weight.reduction}{Dimension reduction to use when calculating anchor +weights. This can be one of: +\itemize{ + \item{A string, specifying the name of a dimension reduction present in + all objects to be integrated} + \item{A vector of strings, specifying the name of a dimension reduction to + use for each object to be integrated} + \item{A vector of \code{\link{DimReduc}} objects, specifying the object to + use for each object in the integration} + \item{NULL, in which case the full corrected space is used for computing + anchor weights.} +}} + +\item{sd.weight}{Controls the bandwidth of the Gaussian kernel for weighting} + +\item{sample.tree}{Specify the order of integration. Order of integration +should be encoded in a matrix, where each row represents one of the pairwise +integration steps. Negative numbers specify a dataset, positive numbers +specify the integration results from a given row (the format of the merge +matrix included in the \code{\link{hclust}} function output). For example: +\code{matrix(c(-2, 1, -3, -1), ncol = 2)} gives: + +\if{html}{\out{
    }}\preformatted{ [,1] [,2] + [1,] -2 -3 + [2,] 1 -1 +}\if{html}{\out{
    }} + +Which would cause dataset 2 and 3 to be integrated first, then the resulting +object integrated with dataset 1. + +If NULL, the sample tree will be computed automatically.} + +\item{preserve.order}{Do not reorder objects based on size for each pairwise +integration.} + \item{verbose}{Print progress bars and output} + +\item{...}{Arguments passed on to \code{FindIntegrationAnchors}} } \description{ Seurat-Joint PCA Integration diff --git a/man/RPCAIntegration.Rd b/man/RPCAIntegration.Rd index f7d6d60ea..86774c8a3 100644 --- a/man/RPCAIntegration.Rd +++ b/man/RPCAIntegration.Rd @@ -17,6 +17,12 @@ RPCAIntegration( k.filter = NA, scale.layer = "scale.data", groups = NULL, + dims.to.integrate = NULL, + k.weight = 100, + weight.reduction = NULL, + sd.weight = 1, + sample.tree = NULL, + preserve.order = FALSE, verbose = TRUE, ... ) @@ -42,7 +48,48 @@ search space} \item{k.filter}{How many neighbors (k) to use when filtering anchors} +\item{dims.to.integrate}{Number of dimensions to return integrated values for} + +\item{k.weight}{Number of neighbors to consider when weighting anchors} + +\item{weight.reduction}{Dimension reduction to use when calculating anchor +weights. This can be one of: +\itemize{ + \item{A string, specifying the name of a dimension reduction present in + all objects to be integrated} + \item{A vector of strings, specifying the name of a dimension reduction to + use for each object to be integrated} + \item{A vector of \code{\link{DimReduc}} objects, specifying the object to + use for each object in the integration} + \item{NULL, in which case the full corrected space is used for computing + anchor weights.} +}} + +\item{sd.weight}{Controls the bandwidth of the Gaussian kernel for weighting} + +\item{sample.tree}{Specify the order of integration. Order of integration +should be encoded in a matrix, where each row represents one of the pairwise +integration steps. Negative numbers specify a dataset, positive numbers +specify the integration results from a given row (the format of the merge +matrix included in the \code{\link{hclust}} function output). For example: +\code{matrix(c(-2, 1, -3, -1), ncol = 2)} gives: + +\if{html}{\out{
    }}\preformatted{ [,1] [,2] + [1,] -2 -3 + [2,] 1 -1 +}\if{html}{\out{
    }} + +Which would cause dataset 2 and 3 to be integrated first, then the resulting +object integrated with dataset 1. + +If NULL, the sample tree will be computed automatically.} + +\item{preserve.order}{Do not reorder objects based on size for each pairwise +integration.} + \item{verbose}{Print progress bars and output} + +\item{...}{Arguments passed on to \code{FindIntegrationAnchors}} } \description{ Seurat-RPCA Integration diff --git a/man/ReadMtx.Rd b/man/ReadMtx.Rd index e24620851..b149d1b67 100644 --- a/man/ReadMtx.Rd +++ b/man/ReadMtx.Rd @@ -10,8 +10,11 @@ ReadMtx( features, cell.column = 1, feature.column = 2, + cell.sep = "\\t", + feature.sep = "\\t", skip.cell = 0, skip.feature = 0, + mtx.transpose = FALSE, unique.features = TRUE, strip.suffix = FALSE ) diff --git a/man/reexports.Rd b/man/reexports.Rd index fa7258d42..08ced67a3 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -71,8 +71,8 @@ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ - \item{generics}{\code{\link[generics]{components}}} + \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}} - \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}} + \item{generics}{\code{\link[generics]{components}}} }} diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 7a3302c6b..540e5c2d8 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -402,7 +402,7 @@ BEGIN_RCPP END_RCPP } -RcppExport SEXP isnull(SEXP); +RcppExport SEXP isnull(void *); static const R_CallMethodDef CallEntries[] = { {"_Seurat_RunModularityClusteringCpp", (DL_FUNC) &_Seurat_RunModularityClusteringCpp, 9}, From ba0ae9e8d211eb2b937cf9f481bd6a5db48d063c Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Thu, 13 Jul 2023 18:26:54 -0400 Subject: [PATCH 694/979] bump version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 50aea79ec..3d3f6da14 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 4.3.0.9011 +Version: 4.9.9.9057 Date: 2023-07-13 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. From dbe7947237d3b57ffc4890c76ddbca1e1080c20a Mon Sep 17 00:00:00 2001 From: yuhanH Date: Fri, 14 Jul 2023 15:19:11 -0400 Subject: [PATCH 695/979] bump version --- DESCRIPTION | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index ab6315e89..189e1c2f4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9057 -Date: 2023-07-13 +Version: 4.9.9.9058 +Date: 2023-07-14 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( @@ -33,7 +33,7 @@ Remotes: Depends: R (>= 4.0.0), methods, - SeuratObject (>= 4.9.9.9049) + SeuratObject (>= 4.9.9.9091) Imports: cluster, cowplot, From f5a3e22d256fcd6cc99a657dd3bada67b8ab019c Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Fri, 14 Jul 2023 15:37:13 -0400 Subject: [PATCH 696/979] Update NEWS; bump version --- DESCRIPTION | 4 ++-- NEWS.md | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index ab6315e89..9d2367b58 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9057 -Date: 2023-07-13 +Version: 4.9.9.9058 +Date: 2023-07-14 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( diff --git a/NEWS.md b/NEWS.md index 91a06a125..72349f862 100644 --- a/NEWS.md +++ b/NEWS.md @@ -11,6 +11,7 @@ - Fix `pt.size` bug when rasterization is set to true ([#7379](https://github.com/satijalab/seurat/issues/7379)) - Fix `FoldChange` and `FindMarkers` to support all normalization approaches ([#7115](https://github.com/satijalab/seurat/pull/7115),[#7110](https://github.com/satijalab/seurat/issues/7110),[#7095](https://github.com/satijalab/seurat/issues/7095),[#6976](https://github.com/satijalab/seurat/issues/6976),[#6654](https://github.com/satijalab/seurat/issues/6654),[#6701](https://github.com/satijalab/seurat/issues/6701),[#6773](https://github.com/satijalab/seurat/issues/6773), [#7107](https://github.com/satijalab/seurat/issues/7107)) - Fix for handling newer ParseBio formats in `ReadParseBio` ([#7565](https://github.com/satijalab/seurat/pull/7565)) +- Fix bug in `ReadMtx()` to add back missing parameters # Seurat 4.3.0 (2022-11-18) From 9862ec66639e4d110aba1f09b2a34b96c21776d6 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Fri, 14 Jul 2023 16:27:49 -0400 Subject: [PATCH 697/979] Fix SCTransform to retain gene attributes for V5 assays --- DESCRIPTION | 2 +- NEWS.md | 1 + R/preprocessing5.R | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 189e1c2f4..741027ba6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 4.9.9.9058 +Version: 4.9.9.9059 Date: 2023-07-14 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. diff --git a/NEWS.md b/NEWS.md index 72349f862..0eb85fd46 100644 --- a/NEWS.md +++ b/NEWS.md @@ -12,6 +12,7 @@ - Fix `FoldChange` and `FindMarkers` to support all normalization approaches ([#7115](https://github.com/satijalab/seurat/pull/7115),[#7110](https://github.com/satijalab/seurat/issues/7110),[#7095](https://github.com/satijalab/seurat/issues/7095),[#6976](https://github.com/satijalab/seurat/issues/6976),[#6654](https://github.com/satijalab/seurat/issues/6654),[#6701](https://github.com/satijalab/seurat/issues/6701),[#6773](https://github.com/satijalab/seurat/issues/6773), [#7107](https://github.com/satijalab/seurat/issues/7107)) - Fix for handling newer ParseBio formats in `ReadParseBio` ([#7565](https://github.com/satijalab/seurat/pull/7565)) - Fix bug in `ReadMtx()` to add back missing parameters +- Fix `SCTransform()` for V5 assays to retain gene attributes ([#7557](https://github.com/satijalab/seurat/issues/7557)) # Seurat 4.3.0 (2022-11-18) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index dbe50d283..63e675727 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1512,6 +1512,7 @@ SCTransform.StdAssay <- function( # once we have the model, just calculate residuals for all # cells vst_out.reference <- SCTModel_to_vst(SCTModel = local.reference.SCT.model) + vst_out.reference$gene_attr <- local.reference.SCT.model@feature.attributes min_var <- vst_out.reference$arguments$min_variance if (min_var == "umi_median"){ block <- DelayedArray::read_block(x = counts, From 99b23ef9b740be083f01ee29a58a437c280a2b38 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Tue, 8 Aug 2023 15:43:00 -0400 Subject: [PATCH 698/979] fixing sketch for few features --- R/sketching.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/sketching.R b/R/sketching.R index 1bf7fe4b8..b7f1b6f88 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -383,7 +383,8 @@ LeverageScore.default <- function( # Check the dimensions of the object, nsketch, and ndims ncells <- ncol(x = object) if (ncells < nsketch*1.5) { - Z <- irlba(A = object, nv = 50, nu = 0, verbose = FALSE)$v + nv <- ifelse(nrow(x = object) < 50, nrow(x = object) - 1, 50) + Z <- irlba(A = object, nv = nv, nu = 0, verbose = FALSE)$v return(rowSums(x = Z ^ 2)) } if (nrow(x = object) > 5000L) { From 68b467798fb0e8c52b1e5093bae0c1c7a5836e42 Mon Sep 17 00:00:00 2001 From: gesmira <59940281+Gesmira@users.noreply.github.com> Date: Wed, 16 Aug 2023 16:25:25 -0400 Subject: [PATCH 699/979] Update NEWS.md --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 0eb85fd46..baa70a489 100644 --- a/NEWS.md +++ b/NEWS.md @@ -13,6 +13,7 @@ - Fix for handling newer ParseBio formats in `ReadParseBio` ([#7565](https://github.com/satijalab/seurat/pull/7565)) - Fix bug in `ReadMtx()` to add back missing parameters - Fix `SCTransform()` for V5 assays to retain gene attributes ([#7557](https://github.com/satijalab/seurat/issues/7557)) +- Fix 'LeverageScore()' for objects with few features ([#7650](https://github.com/satijalab/seurat/issues/7650) # Seurat 4.3.0 (2022-11-18) From 195d111179cf1c1c9658dc84bae6609b07c1b030 Mon Sep 17 00:00:00 2001 From: gesmira <59940281+Gesmira@users.noreply.github.com> Date: Wed, 16 Aug 2023 16:25:41 -0400 Subject: [PATCH 700/979] Update NEWS.md --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index baa70a489..5d881c29f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -13,7 +13,7 @@ - Fix for handling newer ParseBio formats in `ReadParseBio` ([#7565](https://github.com/satijalab/seurat/pull/7565)) - Fix bug in `ReadMtx()` to add back missing parameters - Fix `SCTransform()` for V5 assays to retain gene attributes ([#7557](https://github.com/satijalab/seurat/issues/7557)) -- Fix 'LeverageScore()' for objects with few features ([#7650](https://github.com/satijalab/seurat/issues/7650) +- Fix `LeverageScore()` for objects with few features ([#7650](https://github.com/satijalab/seurat/issues/7650) # Seurat 4.3.0 (2022-11-18) From 05486cd66cd64fadba5ed08d83f49f5456c7657e Mon Sep 17 00:00:00 2001 From: Gesmira Date: Thu, 24 Aug 2023 09:45:48 -0400 Subject: [PATCH 701/979] adding some argument documentation --- R/integration.R | 5 +++++ R/integration5.R | 12 ++++++++++++ R/preprocessing5.R | 3 +++ 3 files changed, 20 insertions(+) diff --git a/R/integration.R b/R/integration.R index 6a945b019..c42867157 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1867,6 +1867,9 @@ IntegrateEmbeddings.TransferAnchorSet <- function( #' @param reduction.key Key for new dimensional reduction; defaults to creating #' one from \code{reduction.name} #' @param layers Names of layers for correction. +#' @param sketched.layers Names of sketched layers, defaults to all +#' layers of \dQuote{\code{object[[assay]]}} +#' @param seed A positive integer. The seed for the random number generator, defaults to 123. #' @param verbose Print progress and message #' #' @return Returns a Seurat object with an integrated dimensional reduction @@ -3035,6 +3038,7 @@ SelectIntegrationFeatures <- function( #' } #' @param layers Name of layers to use for integration feature selection #' @param verbose Print messages +#' @param ... Arguments passed on to \code{method} #' #' @export #' @@ -3066,6 +3070,7 @@ SelectIntegrationFeatures5 <- function( #' @param nfeatures Number of features to return for integration #' @param assay Name of assay to use for integration feature selection #' @param verbose Print messages +#' @param ... Arguments passed on to \code{method} #' #' @export #' diff --git a/R/integration5.R b/R/integration5.R index 30741eb89..ee9f18f8c 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -22,6 +22,18 @@ NULL #' @param scale.layer Ignored #' @param layers Ignored #' @param key Key for Harmony dimensional reduction +#' @param npcs If doing PCA on input matrix, number of PCs to compute +#' @param theta Diversity clustering penalty parameter +#' @param lambda Ridge regression penalty parameter +#' @param sigma Width of soft kmeans clusters +#' @param nclust Number of clusters in model +#' @param tau Protection against overclustering small datasets with large ones +#' @param block.size What proportion of cells to update during clustering +#' @param max.iter.harmony Maximum number of rounds to run Harmony +#' @param max.iter.cluster Maximum number of rounds to run clustering at each round of Harmony +#' @param epsilon.cluster Convergence tolerance for clustering round of Harmony +#' @param epsilon.harmony Convergence tolerance for Harmony +#' @param verbose Whether to print progress messages. TRUE to print, FALSE to suppress #' @param ... Ignored #' #' @return ... diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 9c61ae757..c4d82f251 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1465,6 +1465,9 @@ SCTransform.StdAssay <- function( #' and the default is RNA #' @param clip.range Numeric of length two specifying the min and max values the #' Pearson residual will be clipped to +#' @param reference.SCT.model reference.SCT.model If a reference SCT model should be used +#' for calculating the residuals. When set to not NULL, ignores the `SCTModel` +#' paramater. #' @param replace.value Recalculate residuals for all features, even if they are #' already present. Useful if you want to change the clip.range. #' @param na.rm For features where there is no feature model stored, return NA From 5a0cf5f6bc38439cc34e65c8cfd37aff93a41c89 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Mon, 28 Aug 2023 16:49:02 -0400 Subject: [PATCH 702/979] fix harmony naming --- R/integration5.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/integration5.R b/R/integration5.R index 130298594..9ff1ab271 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -72,6 +72,7 @@ HarmonyIntegration <- function( groups, features = NULL, scale.layer = 'scale.data', + new.reduction = 'harmony', layers = NULL, npcs = 50L, key = 'harmony_', @@ -135,7 +136,9 @@ HarmonyIntegration <- function( # assay = assay assay = DefaultAssay(object = orig) )) - return(list(harmony = dr)) + output.list <- list(dr) + names(output.list) <- c(new.reduction) + return(output.list) } attr(x = HarmonyIntegration, which = 'Seurat.method') <- 'integration' From 24be6628b2406954f37aa3a78b8ef0e8287b8569 Mon Sep 17 00:00:00 2001 From: rsatija Date: Mon, 28 Aug 2023 17:33:30 -0400 Subject: [PATCH 703/979] Added nselect to avoid errors/warnings in test --- R/preprocessing.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/preprocessing.R b/R/preprocessing.R index b7f5f728c..c188c9330 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -3758,6 +3758,7 @@ FindVariableFeatures.Assay <- function( num.bin = 20, binning.method = "equal_width", nfeatures = 2000, + nselect = 2000, mean.cutoff = c(0.1, 8), dispersion.cutoff = c(1, Inf), verbose = TRUE, From 5dc5be1dd28d2cc6a45ae5ee55eb1c07050cf2fe Mon Sep 17 00:00:00 2001 From: rsatija Date: Mon, 28 Aug 2023 18:39:53 -0400 Subject: [PATCH 704/979] Test ix --- tests/testthat/test_differential_expression.R | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test_differential_expression.R b/tests/testthat/test_differential_expression.R index 373a6b6d9..64328278a 100644 --- a/tests/testthat/test_differential_expression.R +++ b/tests/testthat/test_differential_expression.R @@ -331,11 +331,15 @@ test_that("FindAllMarkers works as expected", { ref <- pbmc_small ref <- FindVariableFeatures(object = ref, verbose = FALSE, nfeatures = 100) query <- CreateSeuratObject( - counts = GetAssayData(object = pbmc_small[['RNA']], slot = "counts") + rpois(n = ncol(pbmc_small), lambda = 1) + counts = as.sparse(GetAssayData(object = pbmc_small[['RNA']], slot = "counts") + rpois(n = ncol(pbmc_small), lambda = 1)) ) + query2 <- CreateSeuratObject( - counts = GetAssayData(object = pbmc_small[['RNA']], slot = "counts")[, 1:40] + rpois(n = ncol(pbmc_small), lambda = 1) + counts = as.sparse(GetAssayData(object = pbmc_small[['RNA']], slot = "counts")[, 1:40] + rpois(n = ncol(pbmc_small), lambda = 1)) ) + + + query.list <- list(query, query2) query.list <- lapply(X = query.list, FUN = NormalizeData, verbose = FALSE) query.list <- lapply(X = query.list, FUN = FindVariableFeatures, verbose = FALSE, nfeatures = 100) @@ -354,6 +358,8 @@ test_that("FindMarkers recognizes log normalizatio", { expect_equal(markers[1, "avg_log2FC"], -2.614686, tolerance = 1e-6) }) + + # Tests for FindConservedMarkers # ------------------------------------------------------------------------------- From 2e83f35f7d9569fadd893388a6610c1ff28082e7 Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Mon, 28 Aug 2023 18:42:49 -0400 Subject: [PATCH 705/979] update DE tests for SCTransform v2 --- tests/testthat/test_differential_expression.R | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/testthat/test_differential_expression.R b/tests/testthat/test_differential_expression.R index 373a6b6d9..2d97c0788 100644 --- a/tests/testthat/test_differential_expression.R +++ b/tests/testthat/test_differential_expression.R @@ -45,13 +45,13 @@ test_that("Default settings work as expected", { expect_equal(rownames(x = results.clr)[1], "S100A8") # SCT normalization - expect_equal(results.sct[1, "p_val"], 6.225491e-11) - expect_equal(results.sct[1, "avg_logFC"], -1.081321, tolerance = 1e-6) - expect_equal(results.sct[1, "pct.1"], 0.111) - expect_equal(results.sct[1, "pct.2"], 0.96) - expect_equal(results.sct[1, "p_val_adj"], 1.369608e-08) - expect_equal(nrow(x = results.sct), 158) - expect_equal(rownames(x = results.sct)[1], "TYMP") + expect_equal(results.sct[1, "p_val"], 4.646968e-11) + expect_equal(results.sct[1, "avg_logFC"], -1.8522457, tolerance = 1e-6) + expect_equal(results.sct[1, "pct.1"], 0.333) + expect_equal(results.sct[1, "pct.2"], 1.00) + expect_equal(results.sct[1, "p_val_adj"], 1.022333e-08) + expect_equal(nrow(x = results.sct), 156) + expect_equal(rownames(x = results.sct)[1], "CST3") }) @@ -97,7 +97,7 @@ test_that("setting pseudocount.use works", { expect_equal(nrow(x = results.clr), 182) expect_equal(results.clr[1, "avg_logFC"], -2.317338, tolerance = 1e-6) expect_equal(nrow(results.sct), 194) - expect_equal(results.sct[1, "avg_logFC"], -2.253920, tolerance = 1e-6) + expect_equal(results.sct[1, "avg_logFC"], -2.421716, tolerance = 1e-6) }) results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1), mean.fxn = rowMeans)) @@ -107,7 +107,7 @@ test_that("setting mean.fxn works", { expect_equal(nrow(x = results), 191) expect_equal(results[1, "avg_logFC"], -4.204346, tolerance = 1e-6) expect_equal(results.clr[1, "avg_logFC"], -1.353025, tolerance = 1e-6) - expect_equal(results.sct[1, "avg_logFC"], -1.064042, tolerance = 1e-6) + expect_equal(results.sct[1, "avg_logFC"], -2.021490, tolerance = 1e-6) }) results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, logfc.threshold = 2, verbose = FALSE, base = exp(1))) @@ -309,11 +309,11 @@ test_that("FindAllMarkers works as expected", { # SCT normalization expect_equal(results.sct[1, "p_val"], 4.25861e-12) - expect_equal(results.sct[1, "avg_log2FC"], -2.70188, tolerance = 1e-6) - expect_equal(results.sct[1, "pct.1"], 0.167) + expect_equal(results.sct[1, "avg_log2FC"], -2.848796, tolerance = 1e-6) + expect_equal(results.sct[1, "pct.1"], 0.111) expect_equal(results.sct[1, "pct.2"], 0.909) - expect_equal(results.sct[1, "p_val_adj"], 9.368941e-10) - expect_equal(nrow(x = results.sct), 210) + expect_equal(results.sct[1, "p_val_adj"], 3.005572e-10) + expect_equal(nrow(x = results.sct), 202) expect_equal(rownames(x = results.sct)[1], "HLA-DPB1") # pseudocount.use = 0.1 From 9e6f28c9803605bb17c4d2bb8a790075a452eb20 Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Tue, 29 Aug 2023 10:32:31 -0400 Subject: [PATCH 706/979] remove SeuratObject tests --- tests/testthat/test_objects.R | 344 ---------------------------------- 1 file changed, 344 deletions(-) diff --git a/tests/testthat/test_objects.R b/tests/testthat/test_objects.R index 84b137bd4..e11e2f7c7 100644 --- a/tests/testthat/test_objects.R +++ b/tests/testthat/test_objects.R @@ -1,349 +1,5 @@ # Tests for functions in objects.R -# Tests for interacting with the meta.data slot -# ------------------------------------------------------------------------------ -context("Metadata") - -data("pbmc_small") - -pbmc_small <- suppressWarnings(suppressMessages(UpdateSeuratObject(pbmc_small))) -cluster_letters <- LETTERS[Idents(object = pbmc_small)] -names(cluster_letters) <- colnames(x = pbmc_small) -cluster_letters_shuffled <- sample(x = cluster_letters) - -test_that("AddMetaData adds in cell-level vector properly ", { - pbmc_small <- AddMetaData(object = pbmc_small, metadata = cluster_letters, col.name = 'letter.idents') - expect_equal(pbmc_small$letter.idents, cluster_letters) - pbmc_small <- AddMetaData(object = pbmc_small, metadata = cluster_letters_shuffled, col.name = 'letter.idents.shuffled') - expect_equal(pbmc_small$letter.idents, pbmc_small$letter.idents.shuffled) -}) - -cluster_letters_df <- data.frame(A = cluster_letters, B = cluster_letters_shuffled) -test_that("AddMetaData adds in data frame properly for cell-level metadata", { - pbmc_small <- AddMetaData(object = pbmc_small, metadata = cluster_letters_df) - expect_equal(pbmc_small[[c("A", "B")]], cluster_letters_df) -}) - -feature_letters <- sample(x = LETTERS, size = nrow(x = pbmc_small[["RNA"]]), replace = TRUE) -names(feature_letters) <- rownames(x = pbmc_small[["RNA"]]) -feature_letters_shuffled <- sample(x = feature_letters) - -test_that("AddMetaData adds feature level metadata", { - pbmc_small[["RNA"]] <- AddMetaData(object = pbmc_small[["RNA"]], metadata = feature_letters, col.name = 'feature_letters') - expect_equal(pbmc_small[["RNA"]]["feature_letters", drop = TRUE], feature_letters) - pbmc_small[["RNA"]] <- AddMetaData(object = pbmc_small[["RNA"]], metadata = feature_letters_shuffled, col.name = 'feature_letters_shuffled') - expect_equal(pbmc_small[["RNA"]]["feature_letters", drop = TRUE], pbmc_small[["RNA"]]["feature_letters_shuffled", drop = TRUE]) -}) - -feature_letters_df <- data.frame(A = feature_letters, B = feature_letters_shuffled) -test_that("AddMetaData adds in data frame properly for Assays", { - pbmc_small[["RNA"]] <- AddMetaData(object = pbmc_small[["RNA"]], metadata = feature_letters_df) - expect_equal(pbmc_small[["RNA"]][c("A", "B")], feature_letters_df) -}) - -test_that("AddMetaData errors", { - expect_error(AddMetaData(object = pbmc_small, metadata = cluster_letters, col.name = "RNA")) - # expect_error(AddMetaData(object = pbmc_small, metadata = c(unname(cluster_letters), "A"), col.name = "letter.idents")) # doesnt error in v5 - expect_error(AddMetaData(object = pbmc_small, metadata = feature_letters, col.name = "letter.idents")) - expect_error(AddMetaData(object = pbmc_small[["RNA"]], metadata = cluster_letters, col.name = "letter.idents")) - expect_error(AddMetaData(object = pbmc_small, metadata = data.frame(), col.name = "letter.idents")) -}) - -# Tests for creating an Assay object -# ------------------------------------------------------------------------------ -context("CreateAssayObject") - -pbmc.raw <- GetAssayData(object = pbmc_small[["RNA"]], slot = "counts") -rna.assay <- CreateAssayObject(counts = pbmc.raw) -rna.assay2 <- CreateAssayObject(data = pbmc.raw) - -test_that("CreateAssayObject works as expected", { - expect_equal(dim(x = rna.assay), c(230, 80)) - expect_equal(rownames(x = rna.assay), rownames(x = pbmc.raw)) - expect_equal(colnames(x = rna.assay), colnames(x = pbmc.raw)) - expect_equal(GetAssayData(object = rna.assay, slot = "counts"), pbmc.raw) - expect_equal(GetAssayData(object = rna.assay, slot = "data"), pbmc.raw) - expect_equal(GetAssayData(object = rna.assay, slot = "scale.data"), new(Class = "matrix")) - expect_equal(dim(rna.assay[]), c(230, 0)) - expect_equal(rownames(x = rna.assay[]), rownames(x = rna.assay)) - expect_equal(VariableFeatures(object = rna.assay), vector()) - expect_equal(rna.assay@misc, list("calcN" = TRUE)) - expect_equal(GetAssayData(object = rna.assay2, slot = "counts"), new(Class = "matrix")) -}) - -rna.assay2 <- CreateAssayObject(counts = pbmc.raw, min.cells = 10, min.features = 30) -test_that("CreateAssayObject filtering works", { - expect_equal(dim(x = rna.assay2), c(163, 77)) - expect_true(all(rowSums(GetAssayData(object = rna.assay2, slot = "counts")) >= 10)) - expect_true(all(colSums(GetAssayData(object = rna.assay2, slot = "counts")) >= 30)) -}) - -test_that("CreateAssayObject catches improper input", { - expect_error(CreateAssayObject()) - expect_error(CreateAssayObject(counts = pbmc.raw, data = pbmc.raw)) - pbmc.raw2 <- cbind(pbmc.raw[, 1:10], pbmc.raw[, 1:10]) - expect_warning(CreateAssayObject(counts = pbmc.raw2)) - expect_warning(CreateAssayObject(data = pbmc.raw2)) - pbmc.raw2 <- rbind(pbmc.raw[1:10, ], pbmc.raw[1:10, ]) - expect_warning(CreateAssayObject(counts = pbmc.raw2)) - expect_warning(CreateAssayObject(data = pbmc.raw2)) - pbmc.raw2 <- pbmc.raw - colnames(x = pbmc.raw2) <- c() - expect_error(CreateAssayObject(counts = pbmc.raw2)) - expect_error(CreateAssayObject(data = pbmc.raw2)) - pbmc.raw2 <- pbmc.raw - rownames(x = pbmc.raw2) <- c() - expect_error(CreateAssayObject(counts = pbmc.raw2)) - expect_error(CreateAssayObject(data = pbmc.raw2)) - pbmc.raw.mat <- as.matrix(x = pbmc.raw) - pbmc.raw.df <- as.data.frame(x = pbmc.raw.mat) - rna.assay3 <- CreateAssayObject(counts = pbmc.raw.df) - rna.assay4 <- CreateAssayObject(counts = pbmc.raw.mat) - expect_is(object = GetAssayData(object = rna.assay3, slot = "counts"), class = "dgCMatrix") - expect_is(object = GetAssayData(object = rna.assay4, slot = "counts"), class = "dgCMatrix") - pbmc.raw.underscores <- pbmc.raw - rownames(pbmc.raw.underscores) <- gsub(pattern = "-", replacement = "_", x = rownames(pbmc.raw.underscores)) - expect_warning(CreateAssayObject(counts = pbmc.raw.underscores)) -}) - -# Tests for creating an DimReduc object -# ------------------------------------------------------------------------------ -context("CreateDimReducObject") - -pca <- pbmc_small[["pca"]] -Key(object = pca) <- 'PC_' - -test_that("CreateDimReducObject works", { - pca.dr <- CreateDimReducObject( - embeddings = Embeddings(object = pca), - loadings = Loadings(object = pca), - projected = Loadings(object = pca, projected = TRUE), - assay = "RNA" - ) - expect_equal(Embeddings(object = pca.dr), Embeddings(object = pca)) - expect_equal(Loadings(object = pca.dr), Loadings(object = pca)) - expect_equal(Loadings(object = pca.dr, projected = TRUE), Loadings(object = pca, projected = TRUE)) - expect_equal(Key(object = pca.dr), "PC_") - expect_equal(pca.dr@assay.used, "RNA") -}) - -test_that("CreateDimReducObject catches improper input", { - bad.embeddings <- Embeddings(object = pca) - colnames(x = bad.embeddings) <- paste0("PCA", 1:ncol(x = bad.embeddings)) - expect_warning(CreateDimReducObject(embeddings = bad.embeddings, key = "PC")) - colnames(x = bad.embeddings) <- paste0("PC", 1:ncol(x = bad.embeddings), "X") - suppressWarnings(expect_error(CreateDimReducObject(embeddings = bad.embeddings, key = "PC"))) - suppressWarnings(expect_error(CreateDimReducObject(embeddings = bad.embeddings))) -}) - -# Tests for creating a Seurat object -# ------------------------------------------------------------------------------ -context("CreateSeuratObject") - -colnames(x = pbmc.raw) <- paste0(colnames(x = pbmc.raw), "-", pbmc_small$groups) -metadata.test <- pbmc_small[[]][, 5:7] -rownames(x = metadata.test) <- colnames(x = pbmc.raw) - -test_that("CreateSeuratObject works", { - seurat.object <- CreateSeuratObject( - counts = pbmc.raw, - project = "TESTING", - assay = "RNA.TEST", - names.field = 2, - names.delim = "-", - meta.data = metadata.test - ) - expect_equal(seurat.object[[]][, 4:6], metadata.test) - expect_equal(seurat.object@project.name, "TESTING") - expect_equal(names(x = seurat.object), "RNA.TEST") - expect_equal(as.vector(x = unname(obj = Idents(object = seurat.object))), unname(pbmc_small$groups)) -}) - -test_that("CreateSeuratObject handles bad names.field/names.delim", { - expect_warning(seurat.object <- CreateSeuratObject( - counts = pbmc.raw[1:5,1:5], - names.field = 3, - names.delim = ":", - meta.data = metadata.test - )) -}) - -# Tests for creating a Seurat object -# ------------------------------------------------------------------------------ -context("Merging") - -pbmc.assay <- pbmc_small[["RNA"]] -x <- merge(x = pbmc.assay, y = pbmc.assay) - -test_that("Merging Assays works properly", { - expect_equal(dim(GetAssayData(object = x, slot = "counts")), c(230, 160)) - expect_equal(dim(GetAssayData(object = x, slot = "data")), c(230, 160)) - expect_equal(GetAssayData(object = x, slot = "scale.data"), new(Class = "matrix")) - expect_equal(Key(object = x), "rna_") - expect_equal(VariableFeatures(object = x), vector()) - expect_equal(x[[]], data.frame(row.names = rownames(x = pbmc.assay))) -}) - -pbmc.assay2 <- pbmc.assay -pbmc.assay2@counts <- new("dgCMatrix") -test_that("Merging Assays handles case when counts not present", { - y <- merge(x = pbmc.assay2, y = pbmc.assay) - expect_equal(unname(colSums(x = GetAssayData(object = y, slot = "counts"))[1:80]), rep.int(x = 0, times = 80)) - z <- merge(x = pbmc.assay2, pbmc.assay2) - expect_equal(nnzero(x = GetAssayData(object = z, slot = "counts")), 0) -}) - -pbmc.assay2 <- pbmc.assay -pbmc.assay2@data <- new("dgCMatrix") -test_that("Merging Assays handles case when data not present", { - y <- merge(x = pbmc.assay2, y = pbmc.assay, merge.data = TRUE) - expect_equal(unname(colSums(x = GetAssayData(object = y, slot = "data"))[1:80]), rep.int(x = 0, times = 80)) - z <- merge(x = pbmc.assay2, y = pbmc.assay2, merge.data = TRUE) - expect_equal(nnzero(x = GetAssayData(object = z, slot = "data")), 0) -}) - -# Tests for Neighbor object -# ------------------------------------------------------------------------------ -context("Neighbor") - -# converting to Graph and back - -n.rann.ob <- NNHelper( - data = Embeddings(object = pbmc_small[["pca"]]), - query = Embeddings(object = pbmc_small[["pca"]]), - k = 10, - method = "rann") - -test_that("Neighbor object methods work", { - expect_equal(dim(x = Indices(object = n.rann.ob)), c(80, 10)) - expect_equal(dim(x = n.rann.ob), c(80, 10)) - expect_equal(as.numeric(Indices(object = n.rann.ob)[1, 7]), 45, ) - expect_equal(dim(x = Distances(object = n.rann.ob)), c(80, 10)) - expect_equal(as.numeric(Distances(object = n.rann.ob)[2, 2]), 2.643759, tolerance = 1e-6) - expect_equal(length(x = Cells(x = n.rann.ob)), 80) - expect_equal(Cells(x = n.rann.ob)[c(1, 20, 80)], c("ATGCCAGAACGACT", "TACATCACGCTAAC", "CTTGATTGATCTTC")) - pbmc_small[["n.ob"]] <- n.rann.ob - pbmc_small <- RenameCells(object = pbmc_small, add.cell.id = "test") - expect_equal(Cells(x = pbmc_small[['n.ob']])[1], c("test_ATGCCAGAACGACT")) - expect_equal(TopNeighbors(object = n.rann.ob, cell = "ATGCCAGAACGACT", n = 5)[5], "GATATAACACGCAT") - expect_equal(length(TopNeighbors(object = n.rann.ob, cell = "ATGCCAGAACGACT", n = 7)), 7) - nrg <- as.Graph(x = n.rann.ob) - expect_true(inherits(x = nrg, what = "Graph")) - expect_equal(as.numeric(Distances(object = n.rann.ob)[2, 3]), nrg[2, Indices(object = n.rann.ob)[2, 3]]) - nro2 <- as.Neighbor(x = nrg) - expect_true(inherits(x = nro2, what = "Neighbor")) - expect_equal(Distances(object = n.rann.ob)[2, 3], Distances(object = nro2)[2, 3]) - expect_equal(Indices(object = n.rann.ob)[1, 6], Indices(object = nro2)[1, 6]) -}) - -n.annoy.ob <- NNHelper( - data = Embeddings(object = pbmc_small[["pca"]]), - query = Embeddings(object = pbmc_small[["pca"]]), - k = 10, - method = "annoy", - cache.index = TRUE) -idx.file <- tempfile() -SaveAnnoyIndex(object = n.annoy.ob, file = idx.file) -nao2 <- LoadAnnoyIndex(object = n.annoy.ob, file = idx.file) - -test_that("Saving/Loading annoy index", { - expect_error(SaveAnnoyIndex(object = n.rann.ob, file = idx.file)) - expect_equal(head(Indices(n.annoy.ob)), head(Indices(nao2))) - expect_equal(head(Distances(n.annoy.ob)), head(Distances(nao2))) - expect_false(is.null(x = Index(nao2))) -}) - -# Tests for FetchData -# ------------------------------------------------------------------------------ -context("FetchData") - -# Features to test: -# able to pull cell embeddings, data, metadata -# subset of cells - -test_that("Fetching a subset of cells works", { - x <- FetchData(object = pbmc_small, cells = colnames(x = pbmc_small)[1:10], vars = rownames(x = pbmc_small)[1]) - expect_equal(rownames(x = x), colnames(x = pbmc_small)[1:10]) - random.cells <- sample(x = colnames(x = pbmc_small), size = 10) - x <- FetchData(object = pbmc_small, cells = random.cells, vars = rownames(x = pbmc_small)[1]) - expect_equal(rownames(x = x), random.cells) - x <- FetchData(object = pbmc_small, cells = 1:10, vars = rownames(x = pbmc_small)[1]) - expect_equal(rownames(x = x), colnames(x = pbmc_small)[1:10]) -}) - -suppressWarnings(pbmc_small[["RNA2"]] <- pbmc_small[["RNA"]]) -Key(pbmc_small[["RNA2"]]) <- "rna2_" - -test_that("Fetching keyed variables works", { - x <- FetchData(object = pbmc_small, vars = c(paste0("rna_", rownames(x = pbmc_small)[1:5]), paste0("rna2_", rownames(x = pbmc_small)[1:5]))) - expect_equal(colnames(x = x), c(paste0("rna_", rownames(x = pbmc_small)[1:5]), paste0("rna2_", rownames(x = pbmc_small)[1:5]))) - x <- FetchData(object = pbmc_small, vars = c(paste0("rna_", rownames(x = pbmc_small)[1:5]), paste0("PC_", 1:5))) - expect_equal(colnames(x = x), c(paste0("rna_", rownames(x = pbmc_small)[1:5]), paste0("PC_", 1:5))) -}) - -test_that("Fetching embeddings/loadings not present returns warning or errors", { - expect_warning(FetchData(object = pbmc_small, vars = c("PC_1", "PC_100"))) - expect_error(FetchData(object = pbmc_small, vars = "PC_100")) -}) - -bad.gene <- GetAssayData(object = pbmc_small[["RNA"]], slot = "data") -rownames(x = bad.gene)[1] <- paste0("rna_", rownames(x = bad.gene)[1]) -pbmc_small[["RNA"]]@data <- bad.gene - -# Tests for WhichCells -# ------------------------------------------------------------------------------ - -test_that("Specifying cells works", { - test.cells <- Cells(x = pbmc_small)[1:10] - expect_equal(WhichCells(object = pbmc_small, cells = test.cells), test.cells) - expect_equal(WhichCells(object = pbmc_small, cells = test.cells, invert = TRUE), setdiff(Cells(x = pbmc_small), test.cells)) -}) - -test_that("Specifying idents works", { - c12 <- WhichCells(object = pbmc_small, idents = c(1, 2)) - expect_equal(length(x = c12), 44) - expect_equal(c12[44], "CTTGATTGATCTTC") - expect_equal(c12, WhichCells(object = pbmc_small, idents = 0, invert = TRUE)) -}) - -test_that("downsample works", { - expect_equal(length(x = WhichCells(object = pbmc_small, downsample = 5)), 15) - expect_equal(length(x = WhichCells(object = pbmc_small, downsample = 100)), 80) -}) - -test_that("passing an expression works", { - lyz.pos <- WhichCells(object = pbmc_small, expression = LYZ > 1) - expect_true(all(GetAssayData(object = pbmc_small, slot = "data")["LYZ", lyz.pos] > 1)) - # multiple values in expression - lyz.pos <- WhichCells(object = pbmc_small, expression = LYZ > 1 & groups == "g1") - expect_equal(length(x = lyz.pos), 30) - expect_equal(lyz.pos[30], "CTTGATTGATCTTC") -}) - -# Tests for small other functions -# ------------------------------------------------------------------------------ -test_that("Top works", { - dat <- Embeddings(object = pbmc_small[['pca']])[, 1, drop = FALSE] - expect_warning(Top(data = dat, num = 1000, balanced = FALSE)) - tpc1 <- Top(data = dat, num = 20, balanced = FALSE) - expect_equal(length(x = tpc1), 20) - expect_equal(tpc1[1], "ACGTGATGCCATGA") - expect_equal(tpc1[20], "GTCATACTTCGCCT") - tpc1b <- Top(data = dat, num = 20, balanced = TRUE) - expect_equal(length(x = tpc1b), 2) - expect_equal(names(tpc1b), c("positive", "negative")) - expect_equal(length(tpc1b[[1]]), 10) - expect_equal(length(tpc1b[[2]]), 10) - expect_equal(tpc1b[[1]][1], "GTCATACTTCGCCT") - expect_equal(tpc1b[[1]][10], "CTTGATTGATCTTC") - expect_equal(tpc1b[[2]][1], "ACGTGATGCCATGA") - expect_equal(tpc1b[[2]][10], "ATTGTAGATTCCCG") - tpc1.sub <- Top(data = dat[1:79, , drop = FALSE], num = 79, balanced = TRUE) - expect_equal(length(tpc1.sub[[1]]), 40) - expect_equal(length(tpc1.sub[[2]]), 39) -}) - - # Tests for SCE conversion # ------------------------------------------------------------------------------ test_that("as.SingleCellExperiment works", { From 805d3e657fac0841f8c66725c5f11c43804219e5 Mon Sep 17 00:00:00 2001 From: rsatija Date: Tue, 29 Aug 2023 15:19:06 -0400 Subject: [PATCH 707/979] Modified integration tests (everything passes but needs one bug fix) --- tests/testthat/test_integration.R | 69 ++++++++++++++++--------------- 1 file changed, 35 insertions(+), 34 deletions(-) diff --git a/tests/testthat/test_integration.R b/tests/testthat/test_integration.R index 4236f4217..5f92e8cf0 100644 --- a/tests/testthat/test_integration.R +++ b/tests/testthat/test_integration.R @@ -256,9 +256,9 @@ test_that("FindTransferAnchors with no l2 works", { expect_equal(anchors@neighbors, list()) }) -# SCTransform tests -query <- suppressWarnings(SCTransform(object = query, verbose = FALSE)) -ref <- suppressWarnings(SCTransform(object = ref, verbose = FALSE)) +# SCTransform tests V1 +query <- suppressWarnings(SCTransform(object = query, verbose = FALSE,vst.flavor = 'v1')) +ref <- suppressWarnings(SCTransform(object = ref, verbose = FALSE,vst.flavor = 'v1')) test_that("FindTransferAnchors with default SCT works", { skip_on_cran() @@ -312,10 +312,10 @@ test_that("FindTransferAnchors with default SCT works", { expect_equal(GetAssayData(co[["SCT"]])[1, 1], 0) expect_equal(dim(co[['cca']]), c(160, 30)) expect_equal(Embeddings(co[['cca']])[1, 1], 0.0459135444, tolerance = 1e-7) - expect_equal(Loadings(co[['cca']], projected = T)[1, 1], 8.51477973, tolerance = 1e-7) + expect_equal(Loadings(co[['cca']], projected = T)["NKG7", 1], 8.51477973, tolerance = 1e-7) expect_equal(dim(co[['cca.l2']]), c(160, 30)) expect_equal(Embeddings(co[['cca.l2']])[1, 1], 0.0625989664, tolerance = 1e-7) - expect_equal(Loadings(co[['cca.l2']], projected = T)[1, 1], 8.51477973, tolerance = 1e-7) + expect_equal(Loadings(co[['cca.l2']], projected = T)["NKG7", 1], 8.51477973, tolerance = 1e-7) ref.cells <- paste0(Cells(ref), "_reference") query.cells <- paste0(Cells(query), "_query") expect_equal(anchors@reference.cells, ref.cells) @@ -331,35 +331,36 @@ test_that("FindTransferAnchors with default SCT works", { expect_equal(anchors@neighbors, list()) }) -test_that("FindTransferAnchors with SCT and project.query work", { - skip_on_cran() - anchors <- FindTransferAnchors(reference = ref, query = query, normalization.method = "SCT", project.query = TRUE, k.filter = 50, recompute.residuals = FALSE) - co <- anchors@object.list[[1]] - expect_equal(dim(co), c(220, 160)) - expect_equal(Reductions(co), c("pcaproject", "pcaproject.l2")) - expect_equal(DefaultAssay(co), "SCT") - expect_equal(GetAssayData(co[["SCT"]])[1, 1], 0) - expect_equal(GetAssayData(co[["SCT"]], slot = "scale.data"), new("matrix")) - expect_equal(dim(co[['pcaproject']]), c(160, 30)) - expect_equal(Embeddings(co[['pcaproject']])[1, 1], 0.3308694488, tolerance = 1e-7) - expect_equal(Loadings(co[['pcaproject']], projected = T)[1, 1], 0.05788217444, tolerance = 1e-7) - expect_equal(dim(co[['pcaproject.l2']]), c(160, 30)) - expect_equal(Embeddings(co[['pcaproject.l2']])[1, 1], 0.03807493471, tolerance = 1e-7) - expect_equal(Loadings(co[['pcaproject.l2']], projected = T)[1, 1], 0.05788217444, tolerance = 1e-7) - ref.cells <- paste0(Cells(ref), "_reference") - query.cells <- paste0(Cells(query), "_query") - expect_equal(anchors@reference.cells, ref.cells) - expect_equal(anchors@query.cells, query.cells) - expect_equal(anchors@reference.objects, logical()) - anchor.mat <- anchors@anchors - expect_equal(dim(anchor.mat), c(288, 3)) - expect_equal(as.vector(anchor.mat[1, ]), c(1, 1, 0.6138996139), tolerance = 1e-7) - expect_equal(max(anchor.mat[, 2]), 80) - expect_null(anchors@offsets) - expect_equal(length(anchors@anchor.features), 220) - expect_equal(anchors@anchor.features[1], "PPBP") - expect_equal(anchors@neighbors, list()) -}) +# test_that("FindTransferAnchors with SCT and project.query work", { +# skip_on_cran() +# anchors <- FindTransferAnchors(reference = ref, query = query, normalization.method = "SCT", project.query = TRUE, k.filter = 50, recompute.residuals = FALSE) +# co <- anchors@object.list[[1]] +# expect_equal(dim(co), c(220, 160)) +# expect_equal(Reductions(co), c("pcaproject", "pcaproject.l2")) +# expect_equal(DefaultAssay(co), "SCT") +# expect_equal(GetAssayData(co[["SCT"]])[1, 1], 0) +# expect_equal(GetAssayData(co[["SCT"]], slot = "scale.data"), new("matrix")) +# expect_equal(dim(co[['pcaproject']]), c(160, 30)) +# expect_equal(Embeddings(co[['pcaproject']])[1, 1], 0.3308694488, tolerance = 1e-7) +# expect_equal(Loadings(co[['pcaproject']], projected = T)[1, 1], 0.05788217444, tolerance = 1e-7) +# expect_equal(dim(co[['pcaproject.l2']]), c(160, 30)) +# expect_equal(Embeddings(co[['pcaproject.l2']])[1, 1], 0.03807493471, tolerance = 1e-7) +# expect_equal(Loadings(co[['pcaproject.l2']], projected = T)[1, 1], 0.05788217444, tolerance = 1e-7) +# ref.cells <- paste0(Cells(ref), "_reference") +# query.cells <- paste0(Cells(query), "_query") +# expect_equal(anchors@reference.cells, ref.cells) +# expect_equal(anchors@query.cells, query.cells) +# expect_equal(anchors@reference.objects, logical()) +# anchor.mat <- anchors@anchors +# expect_equal(dim(anchor.mat), c(288, 3)) +# expect_equal(as.vector(anchor.mat[1, ]), c(1, 1, 0.6138996139), tolerance = 1e-7) +# expect_equal(max(anchor.mat[, 2]), 80) +# expect_null(anchors@offsets) +# expect_equal(length(anchors@anchor.features), 220) +# expect_equal(anchors@anchor.features[1], "PPBP") +# expect_equal(anchors@neighbors, list()) +# }) +# test_that("FindTransferAnchors with SCT and l2.norm FALSE work", { skip_on_cran() From f56cb79273c432e4ee181f8303a26017b140c018 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Tue, 29 Aug 2023 17:58:53 -0400 Subject: [PATCH 708/979] Retire DelayedArray for SCTransform.StdAssay --- R/preprocessing.R | 6 +- R/preprocessing5.R | 594 +++++++++++++++++++++++---------------------- 2 files changed, 312 insertions(+), 288 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index b7f5f728c..26d440114 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -3411,7 +3411,11 @@ SCTransform.default <- function( ) vst.out$y <- scale.data vst.out$variable_features <- residual.features %||% top.features - + if (!do.correct.umi) { + vst.out$umi_corrected <- umi + } + min_var <- vst.out$arguments$min_variance + message("min_varxxxx", min_var) return(vst.out) } diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 63e675727..91c0adad1 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1372,6 +1372,106 @@ DISP <- function( ################################# SCTransform ################################## ################################################################################ +#' @importFrom SeuratObject Cells as.sparse +#' +#' @method SCTransform StdAssay +#' @rdname SCTransform +#' @concept preprocessing +#' @export +#' @method SCTransform Assay +#' +SCTransform.IterableMatrix <- function( + object, + cell.attr, + reference.SCT.model = NULL, + do.correct.umi = TRUE, + ncells = 5000, + residual.features = NULL, + variable.features.n = 3000, + variable.features.rv.th = 1.3, + vars.to.regress = NULL, + do.scale = FALSE, + do.center = TRUE, + clip.range = c(-sqrt(x = ncol(x = object) / 30), sqrt(x = ncol(x = object) / 30)), + vst.flavor = 'v2', + conserve.memory = FALSE, + return.only.var.genes = TRUE, + seed.use = 1448145, + verbose = TRUE, + ... +) { + if (!is.null(x = seed.use)) { + set.seed(seed = seed.use) + } + if (!is.null(reference.SCT.model)){ + do.correct.umi <- FALSE + do.center <- FALSE + } + sampled_cells <- sample.int(n = ncol(x = object), size = min(ncells, ncol(x = object))) + umi <- as.sparse(x = object[, sampled_cells]) + message("umi", dim(umi)) + cell.attr <- cell.attr[colnames(x = umi),,drop=FALSE] + vst.out <- SCTransform(object = umi, + cell.attr = cell.attr, + reference.SCT.model = reference.SCT.model, + do.correct.umi = do.correct.umi, + ncells = ncells, + residual.features = residual.features, + variable.features.n = variable.features.n, + variable.features.rv.th = variable.features.rv.th, + vars.to.regress = vars.to.regress, + do.scale = do.scale, + do.center = do.center, + clip.range = clip.range, + vst.flavor = vst.flavor, + conserve.memory = conserve.memory, + return.only.var.genes = return.only.var.genes, + seed.use = seed.use, + verbose = verbose, + ...) + if (!do.correct.umi) { + vst.out$umi_corrected <- umi + } + return(vst.out) +} + + +#' @importFrom SeuratObject CreateAssayObject SetAssayData GetAssayData +#' Create SCT assay from vst.out output +CreateSCTAssay <- function(vst.out, do.correct.umi, residual.type, clip.range){ + residual.type <- vst.out[['residual_type']] %||% 'pearson' + sct.method <- vst.out[['sct.method']] + assay.out <- CreateAssayObject(counts = vst.out$umi_corrected) + + # create output assay and put (corrected) umi counts in count slot + # if (do.correct.umi & residual.type == 'pearson') { + # assay.out <- CreateAssayObject(counts = vst.out$umi_corrected) + # vst.out$umi_corrected <- NULL + # } else { + # assay.out <- CreateAssayObject(counts = counts.chunk) + # } + # set the variable genes + VariableFeatures(object = assay.out) <- vst.out$variable_features + # put log1p transformed counts in data + assay.out <- SetAssayData( + object = assay.out, + slot = 'data', + new.data = log1p(x = GetAssayData(object = assay.out, slot = 'counts')) + ) + scale.data <- vst.out$y + assay.out <- SetAssayData( + object = assay.out, + slot = 'scale.data', + new.data = scale.data + ) + vst.out$y <- NULL + # save clip.range into vst model + vst.out$arguments$sct.clip.range <- clip.range + vst.out$arguments$sct.method <- sct.method + Misc(object = assay.out, slot = 'vst.out') <- vst.out + assay.out <- as(object = assay.out, Class = "SCTAssay") + return (assay.out) +} #' @importFrom SeuratObject Cells DefaultLayer DefaultLayer<- Features #' LayerData LayerData<- @@ -1397,8 +1497,7 @@ SCTransform.StdAssay <- function( return.only.var.genes = TRUE, seed.use = 1448145, verbose = TRUE, - ... -) { + ...) { if (!is.null(reference.SCT.model)){ do.correct.umi <- FALSE do.center <- FALSE @@ -1406,7 +1505,11 @@ SCTransform.StdAssay <- function( olayer <- layer <- unique(x = layer) layers <- Layers(object = object, search = layer) dataset.names <- gsub(pattern = paste0(layer, "."), replacement = "", x = layers) + # loop over layers performing SCTransform() on individual layers sct.assay.list <- list() + # Keep a tab of variable features per chunk + variable.feature.list <- list() + for (dataset.index in seq_along(along.with = layers)) { l <- layers[dataset.index] if (isTRUE(x = verbose)) { @@ -1414,243 +1517,140 @@ SCTransform.StdAssay <- function( } all_cells <- Cells(x = object, layer = l) all_features <- Features(x = object, layer = l) - counts <- LayerData( + layer.data <- LayerData( object = object, layer = l, features = all_features, cells = all_cells ) - sparse <- DelayedArray::is_sparse(x = counts) - ## Sample cells - cells.grid <- DelayedArray::colAutoGrid(x = counts, ncol = min(ncells, ncol(counts))) - # if there is no reference model we randomly select a subset of cells - # TODO: randomize this set of cells - variable.feature.list <- list() - GetSCT.Chunked <- function(vp, reference.SCT.model = NULL, do.correct.umi = TRUE){ - # counts here is global - block <- DelayedArray::read_block(x = counts, - viewport = vp, - as.sparse = sparse) - counts.chunk <- as(object = block, Class = 'dgCMatrix') - cell.attr.object <- cell.attr[colnames(x = counts.chunk),, drop=FALSE] - - if (!identical(rownames(cell.attr.object), colnames(counts.chunk))) { - stop("cell attribute row names must match column names of count matrix") - } - vst.out <- SCTransform(object = counts.chunk, - cell.attr = cell.attr.object, - reference.SCT.model = reference.SCT.model, - do.correct.umi = do.correct.umi, - ncells = ncells, - residual.features = residual.features, - variable.features.n = variable.features.n, - variable.features.rv.th = variable.features.rv.th, - vars.to.regress = vars.to.regress, - do.scale = FALSE, - do.center = FALSE, - clip.range = clip.range, - conserve.memory = conserve.memory, - return.only.var.genes = return.only.var.genes, - seed.use = seed.use, - verbose = FALSE, - ...) - residual.type <- vst.out[['residual_type']] %||% 'pearson' - sct.method <- vst.out[['sct.method']] - # create output assay and put (corrected) umi counts in count slot - if (do.correct.umi & residual.type == 'pearson') { - if (verbose) { - message('Place corrected count matrix in counts slot') - } - assay.out <- CreateAssayObject(counts = vst.out$umi_corrected) - vst.out$umi_corrected <- NULL - } else { - # TODO: restore once check.matrix is in SeuratObject - # assay.out <- CreateAssayObject(counts = umi, check.matrix = FALSE) - assay.out <- CreateAssayObject(counts = counts.chunk) - } - # set the variable genes - VariableFeatures(object = assay.out) <- vst.out$variable_features - # put log1p transformed counts in data - assay.out <- SetAssayData( - object = assay.out, - slot = 'data', - new.data = log1p(x = GetAssayData(object = assay.out, slot = 'counts')) - ) - scale.data <- vst.out$y - assay.out <- SetAssayData( - object = assay.out, - slot = 'scale.data', - new.data = scale.data - ) - vst.out$y <- NULL - # save clip.range into vst model - vst.out$arguments$sct.clip.range <- clip.range - vst.out$arguments$sct.method <- sct.method - Misc(object = assay.out, slot = 'vst.out') <- vst.out - assay.out <- as(object = assay.out, Class = "SCTAssay") - # does not like character(0) keys being merged - return (assay.out) - } local.reference.SCT.model <- NULL - if (is.null(reference.SCT.model)){ - # No reference model so just select the some block of cells - set.seed(seed = seed.use) - selected.block <- sample(x = seq.int(from = 1, to = length(cells.grid)), size = 1) - if (verbose){ - message("Using block ", selected.block, " from ", dataset.names[[dataset.index]], " to learn model.") + set.seed(seed = seed.use) + do.correct.umi.chunk <- FALSE + sct.function <- if (inherits(x = layer.data, what = 'V3Matrix')) { + SCTransform.default + } else { + SCTransform } - vp <- cells.grid[[selected.block]] + if (is.null(x = cell.attr)){ + calcn <- CalcN(object = object) + cell.attr <- data.frame(umi = calcn$nCount_RNA, + log_umi = log10(x = calcn$nCount_RNA)) + rownames(cell.attr) <- colnames(x = layer.data) + } - do.correct.umi.chunk <- FALSE - # correct umi if only single chunk - if (length(x = cells.grid) == 1) { - do.correct.umi.chunk <- TRUE - } - assay.out <- GetSCT.Chunked(vp = vp, do.correct.umi = do.correct.umi.chunk) + # Step 1: Learn model + message("layer.data", dim(layer.data)) + vst.out <- sct.function(object = layer.data, + do.correct.umi = FALSE, + cell.attr = cell.attr, + reference.SCT.model = reference.SCT.model, + ncells = ncells, + residual.features = residual.features, + variable.features.n = variable.features.n, + variable.features.rv.th = variable.features.rv.th, + vars.to.regress = vars.to.regress, + do.scale = do.scale, + do.center = do.center, + clip.range = clip.range, + conserve.memory = conserve.memory, + return.only.var.genes = return.only.var.genes, + seed.use = seed.use, + verbose = FALSE) + min_var <- vst.out$arguments$min_variance + message("min_var", min_var) + assay.out <- CreateSCTAssay(vst.out = vst.out, do.correct.umi = do.correct.umi, residual.type = residual.type, + clip.range = clip.range) + + # If there is no reference model, use the model learned on subset of cells to calculate residuals + # by setting the learned model as the reference model (local.reference.SCT.model) + if (is.null(x = reference.SCT.model)) { local.reference.SCT.model <- assay.out@SCTModel.list[[1]] - variable.features <- VariableFeatures(assay.out) - # once we have the model, just calculate residuals for all - # cells - vst_out.reference <- SCTModel_to_vst(SCTModel = local.reference.SCT.model) - vst_out.reference$gene_attr <- local.reference.SCT.model@feature.attributes - min_var <- vst_out.reference$arguments$min_variance - if (min_var == "umi_median"){ - block <- DelayedArray::read_block(x = counts, - viewport = vp, - as.sparse = TRUE) - - counts.x <- as(object = block, Class = 'dgCMatrix') - min_var <- (median(counts.x@x)/5)^2 - } - res_clip_range <- vst_out.reference$arguments$res_clip_range - residuals <- list() - corrected_counts <- list() - cell_attrs <- list() - if (length(x = cells.grid) == 1){ - merged.assay <- assay.out - corrected_counts[[1]] <- GetAssayData(object = assay.out, slot="data") - residuals[[1]] <- GetAssayData(object = assay.out, slot="scale.data") - cell_attrs[[1]] <- vst_out.reference$cell_attr - sct.assay.list[[dataset.names[dataset.index]]] <- assay.out - } else { - # iterate over chunks to get residuals - for (i in seq_len(length.out = length(x = cells.grid))) { - vp <- cells.grid[[i]] - if (verbose){ - message("Getting residuals for block ", i, "(of ", length(cells.grid), ") for ", dataset.names[[dataset.index]], " dataset") - } - block <- DelayedArray::read_block(x = counts, - viewport = vp, - as.sparse = TRUE) - - counts.vp <- as(object = block, Class = 'dgCMatrix') - cell.attr.object <- cell.attr[colnames(x = counts.vp),, drop=FALSE] - vst_out <- vst_out.reference - cell_attr <- data.frame( - umi = colSums(counts.vp), - log_umi = log10(x = colSums(counts.vp)) - ) - rownames(cell_attr) <- colnames(counts.vp) - vst_out$cell_attr <- cell_attr - vst_out$gene_attr <- vst_out$gene_attr[variable.features,] - if (return.only.var.genes){ - new_residual <- get_residuals( - vst_out = vst_out, - umi = counts.vp[variable.features,], - residual_type = "pearson", - min_variance = min_var, - res_clip_range = res_clip_range, - verbosity = FALSE#as.numeric(x = verbose) * 2 - ) - } else { - new_residual <- get_residuals( - vst_out = vst_out, - umi = counts.vp[all.features,], - residual_type = "pearson", - min_variance = min_var, - res_clip_range = res_clip_range, - verbosity = FALSE#as.numeric(x = verbose) * 2 - ) - } - vst_out$y <- new_residual - corrected_counts[[i]] <- correct_counts( - x = vst_out, - umi = counts.vp[all_features,], - verbosity = FALSE# as.numeric(x = verbose) * 2 - ) - residuals[[i]] <- new_residual - cell_attrs[[i]] <- cell_attr - } - new.residuals <- Reduce(cbind, residuals) - - corrected_counts <- Reduce(cbind, corrected_counts) - cell_attrs <- Reduce(rbind, cell_attrs) - - vst_out.reference$cell_attr <- cell_attrs[colnames(new.residuals),] - SCTModel.list <- PrepVSTResults( - vst.res = vst_out.reference, - cell.names = all_cells - ) - SCTModel.list <- list(model1 = SCTModel.list) - - # scale data here as do.center and do.scale are set to FALSE inside - new.residuals <- ScaleData( - new.residuals, - features = NULL, - #vars.to.regress = vars.to.regress, - #latent.data = cell.attr[, vars.to.regress, drop = FALSE], - model.use = 'linear', - use.umi = FALSE, - do.scale = do.scale, - do.center = do.center, - scale.max = Inf, - block.size = 750, - min.cells.to.block = 3000, - verbose = verbose - ) - assay.out <- CreateSCTAssayObject( - counts = corrected_counts, - scale.data = new.residuals, - SCTModel.list = SCTModel.list - ) - assay.out$data <- log1p(x = corrected_counts) - VariableFeatures(assay.out) <- variable.features - # one assay per dataset - if (verbose){ - message("Finished calculating residuals for ", dataset.names[dataset.index]) - } - sct.assay.list[[dataset.names[dataset.index]]] <- assay.out - variable.feature.list[[dataset.names[dataset.index]]] <- VariableFeatures(assay.out) + } else { + local.reference.SCT.model <- reference.SCT.model + } + variable.features <- VariableFeatures(assay.out) + + # once we have the model, just calculate residuals for all cells + # local.reference.SCT.model set to reference.model if it is non null + vst_out.reference <- SCTModel_to_vst(SCTModel = local.reference.SCT.model) + vst_out.reference$gene_attr <- local.reference.SCT.model@feature.attributes + min_var <- vst_out.reference$arguments$min_variance + message("min_var", min_var) + if (min_var == "umi_median"){ + counts.x <- as.sparse(x = layer.data[, sample.int(n = ncol(x = layer.data), size = min(ncells, ncol(x = layer.data)) )]) + min_var <- (median(counts.x@x)/5)^2 } - } else { ### With reference model - sct.assay.list.temp <- list() + res_clip_range <- vst_out.reference$arguments$res_clip_range + + # Step 2: Use learned model to calculate residuals in chunks + cells.vector <- 1:ncol(x = layer.data) + cells.grid <- split(x = cells.vector, f = ceiling(x = seq_along(along.with = cells.vector)/ncells)) + # Single block + residuals <- list() + corrected_counts <- list() + cell_attrs <- list() + + message("cells_length", length(cells.grid)) + if (length(x = cells.grid) == 1){ + merged.assay <- assay.out + corrected_counts[[1]] <- GetAssayData(object = assay.out, slot = "data") + residuals[[1]] <- GetAssayData(object = assay.out, slot = "scale.data") + cell_attrs[[1]] <- vst_out.reference$cell_attr + sct.assay.list[[dataset.names[dataset.index]]] <- assay.out + } else { + # iterate over chunks to get residuals for (i in seq_len(length.out = length(x = cells.grid))) { vp <- cells.grid[[i]] - if (verbose){ message("Getting residuals for block ", i, "(of ", length(cells.grid), ") for ", dataset.names[[dataset.index]], " dataset") } - - assay.out <- GetSCT.Chunked(vp = vp, - reference.SCT.model = reference.SCT.model, - do.correct.umi = do.correct.umi) - sct.assay.list.temp[[paste0("chunk", i)]] <- assay.out - } - if (length(sct.assay.list.temp)>1){ - # this currently fails in merge.StdAssay step - # assignment of an object of class “list” is not valid for - # slot ‘key’ in an object of class “Assay”; is(value, "character") is not TRUE - assay.out <- merge(x = sct.assay.list.temp[[1]], - y = sct.assay.list.temp[2:length(sct.assay.list.temp)]) - + counts.vp <- as.sparse(x = layer.data[, vp]) + cell.attr.object <- cell.attr[colnames(x = counts.vp),, drop=FALSE] + vst_out <- vst_out.reference + # cell_attr <- data.frame( + # umi = colSums(counts.vp), + # log_umi = log10(x = colSums(counts.vp)) + # ) + # rownames(cell_attr) <- colnames(counts.vp) + vst_out$cell_attr <- cell.attr.object + vst_out$gene_attr <- vst_out$gene_attr[variable.features,] + if (return.only.var.genes){ + new_residual <- get_residuals( + vst_out = vst_out, + umi = counts.vp[variable.features,], + residual_type = "pearson", + min_variance = min_var, + res_clip_range = res_clip_range, + verbosity = FALSE + ) } else { - assay.out <- sct.assay.list.temp[[1]] + new_residual <- get_residuals( + vst_out = vst_out, + umi = counts.vp[all.features,], + residual_type = "pearson", + min_variance = min_var, + res_clip_range = res_clip_range, + verbosity = FALSE + ) } - ## DoScaling - scale.data <- GetAssayData(object = assay.out, slot = "scale.data") + vst_out$y <- new_residual + corrected_counts[[i]] <- correct_counts( + x = vst_out, + umi = counts.vp[all_features,], + verbosity = FALSE# as.numeric(x = verbose) * 2 + ) + residuals[[i]] <- new_residual + cell_attrs[[i]] <- cell_attr + } + new.residuals <- Reduce(cbind, residuals) + corrected_counts <- Reduce(cbind, corrected_counts) + cell_attrs <- Reduce(rbind, cell_attrs) + vst_out.reference$cell_attr <- cell_attrs[colnames(new.residuals),] + SCTModel.list <- PrepVSTResults(vst.res = vst_out.reference, cell.names = all_cells) + SCTModel.list <- list(model1 = SCTModel.list) # scale data here as do.center and do.scale are set to FALSE inside - scale.data <- ScaleData( - scale.data, + new.residuals <- ScaleData( + new.residuals, features = NULL, #vars.to.regress = vars.to.regress, #latent.data = cell.attr[, vars.to.regress, drop = FALSE], @@ -1663,83 +1663,64 @@ SCTransform.StdAssay <- function( min.cells.to.block = 3000, verbose = verbose ) - assay.out <- SetAssayData(object = assay.out, slot = "scale.data", new.data = scale.data) + assay.out <- CreateSCTAssayObject(counts = corrected_counts, scale.data = new.residuals, SCTModel.list = SCTModel.list) + assay.out$data <- log1p(x = corrected_counts) + VariableFeatures(assay.out) <- variable.features + # one assay per dataset if (verbose){ message("Finished calculating residuals for ", dataset.names[dataset.index]) } sct.assay.list[[dataset.names[dataset.index]]] <- assay.out - variable.feature.list[[dataset.names[dataset.index]]] <- rownames(assay.out) + variable.feature.list[[dataset.names[dataset.index]]] <- VariableFeatures(assay.out) } } -# Return array by merging everythin - if (length(x = sct.assay.list) > 1){ - vf.list <- lapply(X = sct.assay.list, FUN = function(object.i) VariableFeatures(object = object.i)) - variable.features.union <- Reduce(f = union, x = vf.list) - var.features.sorted <- sort( - x = table(unlist(x = vf.list, use.names = FALSE)), - decreasing = TRUE - ) - # idx <- which(x = var.features == length(x = sct.assay.list)) - # select top ranking features - #var.features <- names(x = var.features.sorted[1:variable.features.n]) - # calculate residuals for union of features - var.features <- variable.features.union - for (layer.name in names(sct.assay.list)){ - vst_out <- SCTModel_to_vst(SCTModel = slot(object = sct.assay.list[[layer.name]], name = "SCTModel.list")[[1]]) - all_cells <- Cells(x = object, layer = paste0(layer, ".", layer.name)) - all_features <- Features(x = object, layer = paste0(layer, ".", layer.name)) - variable.features.target <- intersect(x = rownames(x = vst_out$model_pars_fit), y = var.features) - variable.features.target <- setdiff(x = variable.features.target, y = VariableFeatures(sct.assay.list[[layer.name]])) - if (length(variable.features.target )<1){ - next + # Return array by merging everythin + if (length(x = sct.assay.list) == 1){ + merged.assay <- sct.assay.list[[1]] + } else { + vf.list <- lapply(X = sct.assay.list, FUN = function(object.i) VariableFeatures(object = object.i)) + variable.features.union <- Reduce(f = union, x = vf.list) + var.features.sorted <- sort( + x = table(unlist(x = vf.list, use.names = FALSE)), + decreasing = TRUE + ) + # select top ranking features + var.features <- variable.features.union + # calculate residuals for union of features + for (layer.name in names(x = sct.assay.list)){ + vst_out <- SCTModel_to_vst(SCTModel = slot(object = sct.assay.list[[layer.name]], name = "SCTModel.list")[[1]]) + all_cells <- Cells(x = object, layer = paste0(layer, ".", layer.name)) + all_features <- Features(x = object, layer = paste0(layer, ".", layer.name)) + variable.features.target <- intersect(x = rownames(x = vst_out$model_pars_fit), y = var.features) + variable.features.target <- setdiff(x = variable.features.target, y = VariableFeatures(sct.assay.list[[layer.name]])) + if (length(x = variable.features.target )<1){ + next + } + layer.counts.tmp <- LayerData( + object = object, + layer = paste0(layer, ".", layer.name), + cells = all_cells + ) + vst_out$cell_attr <- vst_out$cell_attr[, c("log_umi"), drop=FALSE] + vst_out$model_pars_fit <- vst_out$model_pars_fit[variable.features.target,,drop=FALSE] + new_residual <- GetResidualsChunked(vst_out = vst_out, layer.counts = layer.counts.tmp, + residual_type = "pearson", min_variance = min_variance, verbose = FALSE) + old_residual <- GetAssayData(object = sct.assay.list[[layer.name]], slot = 'scale.data') + merged_residual <- rbind(old_residual, new_residual) + sct.assay.list[[layer.name]] <- SetAssayData(object = sct.assay.list[[layer.name]], slot = 'scale.data', new.data = merged_residual) + VariableFeatures(sct.assay.list[[layer.name]]) <- rownames(x = merged_residual) } - counts <- LayerData( - object = object, - layer = paste0(layer, ".", layer.name), - cells = all_cells - ) - cells.grid <- DelayedArray::colAutoGrid(x = counts, ncol = ncol(counts)) - vp <- cells.grid[[1L]] - block <- DelayedArray::read_block(x = counts, viewport = vp, as.sparse = TRUE) - counts.vp <- as(object = block, Class = 'dgCMatrix') + merged.assay <- merge(x = sct.assay.list[[1]], y = sct.assay.list[2:length(sct.assay.list)]) + VariableFeatures(object = merged.assay) <- VariableFeatures(object = merged.assay, use.var.features = FALSE, nfeatures = variable.features.n) - if (vst_out$arguments$min_var == "umi_median"){ - nz_median <- median(counts.vp@x) - min_var_custom <- (nz_median / 5)^2 - } else { - min_var_custom <- vst_out$arguments$min_var - } - vst_out$cell_attr <- vst_out$cell_attr[, c("log_umi"), drop=FALSE] - vst_out$model_pars_fit <- vst_out$model_pars_fit[variable.features.target,,drop=FALSE] - - new_residual <- get_residuals( - vst_out = vst_out, - umi = counts.vp[variable.features.target,], - residual_type = "pearson", - min_variance = min_var_custom, - verbosity = FALSE - ) - old_residual <- GetAssayData(object = sct.assay.list[[layer.name]], slot = 'scale.data') - merged_residual <- rbind(old_residual, new_residual) - sct.assay.list[[layer.name]] <- SetAssayData(object = sct.assay.list[[layer.name]], slot = 'scale.data', new.data = merged_residual) - VariableFeatures(sct.assay.list[[layer.name]]) <- rownames(x = merged_residual) } - merged.assay <- merge(x = sct.assay.list[[1]], y = sct.assay.list[2:length(sct.assay.list)]) - VariableFeatures(object = merged.assay) <- VariableFeatures( - object = merged.assay, - use.var.features = FALSE, - nfeatures = variable.features.n - ) - # set the names of SCTmodels to be layer names - models <- slot(object = merged.assay, name="SCTModel.list") - names(models) <- names(x = sct.assay.list) - slot(object = merged.assay, name="SCTModel.list") <- models - } else { - merged.assay <- sct.assay.list[[1]] - } + # set the names of SCTmodels to be layer names + models <- slot(object = merged.assay, name="SCTModel.list") + names(models) <- names(x = sct.assay.list) + slot(object = merged.assay, name="SCTModel.list") <- models gc(verbose = FALSE) return(merged.assay) -} + } #' Calculate pearson residuals of features not in the scale.data @@ -2165,6 +2146,45 @@ FetchResidualSCTModel <- function(object, return(new_residual) } +#'@importFrom sctransform get_residual +GetResidualsChunked <- function(vst_out, layer.counts, residual_type, min_variance, res_clip_range, verbose, chunk_size=5000) { + if (inherits(x = layer.counts, what = 'V3Matrix')) { + residuals <- get_residuals( + vst_out = vst_out, + umi = layer.counts, + residual_type = residual_type, + min_variance = min_variance, + res_clip_range = res_clip_range, + verbosity = as.numeric(x = verbose) * 2 + ) + } else if (inherits(x = layer.counts, what = "IterableMatrix")) { + cells.vector <- 1:ncol(x = layer.counts) + residuals.list <- list() + cells.grid <- split(x = cells.vector, f = ceiling(x = seq_along(along.with = cells.vector)/chunk_size)) + for (i in seq_len(length.out = length(x = cells.grid))) { + vp <- cells.grid[[i]] + counts.vp <- as.sparse(x = layer.data[, vp]) + vst.out <- vst_out + vst.out$cell_attr <- vst.out$cell_attr[colnames(x = counts.vp),,drop=FALSE] + residuals.list[[i]] <- get_residuals( + vst_out = vst.out, + umi = counts.vp, + residual_type = residual_type, + min_variance = min_variance, + res_clip_range = res_clip_range, + verbosity = as.numeric(x = verbose) * 2 + ) + } + residuals <- Reduce(f = cbind, x = residuals.list) + } else { + stop("Data type not supported") + } + return (residuals) + + + +} + #' temporal function to get residuals from reference #' @importFrom sctransform get_residuals #' @importFrom Matrix colSums From 9e98544b9752e8d57b174119a07a8b6897b01ead Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Wed, 30 Aug 2023 09:58:37 -0400 Subject: [PATCH 709/979] preprocessing + utilities tests --- tests/testthat/test_preprocessing.R | 8 ++++---- tests/testthat/test_utilities.R | 16 ++++++++-------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/testthat/test_preprocessing.R b/tests/testthat/test_preprocessing.R index 8019ebbf7..f5d3f9b35 100644 --- a/tests/testthat/test_preprocessing.R +++ b/tests/testthat/test_preprocessing.R @@ -73,8 +73,8 @@ test_that("NormalizeData scales properly", { normalized.data <- LogNormalize(data = GetAssayData(object = object[["RNA"]], layer = "counts"), verbose = FALSE) test_that("LogNormalize normalizes properly", { expect_equal( - LogNormalize(data = GetAssayData(object = object[["RNA"]], slot = "counts"), verbose = FALSE), - LogNormalize(data = as.data.frame(as.matrix(GetAssayData(object = object[["RNA"]], slot = "counts"))), verbose = FALSE) + as.matrix(LogNormalize(data = GetAssayData(object = object[["RNA"]], slot = "counts"), verbose = FALSE)), + as.matrix(LogNormalize(data = as.data.frame(as.matrix(GetAssayData(object = object[["RNA"]], slot = "counts"))), verbose = FALSE)) ) }) @@ -289,7 +289,7 @@ test_that("CustomNormalize works as expected", { # Tests for SCTransform # ------------------------------------------------------------------------------ context("SCTransform") -object <- suppressWarnings(SCTransform(object = object, verbose = FALSE)) +object <- suppressWarnings(SCTransform(object = object, verbose = FALSE, vst.flavor=NULL)) test_that("SCTransform wrapper works as expected", { expect_true("SCT" %in% names(object)) @@ -309,7 +309,7 @@ test_that("SCTransform wrapper works as expected", { }) suppressWarnings(RNGversion(vstr = "3.5.0")) -object <- suppressWarnings(SCTransform(object = object, ncells = 40, verbose = FALSE, seed.use = 42)) +object <- suppressWarnings(SCTransform(object = object, ncells = 40, verbose = FALSE, seed.use = 42, vst.flavor = NULL)) test_that("SCTransform ncells param works", { expect_true("SCT" %in% names(object)) expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "scale.data"))[1]), 12.02126, tolerance = 1e6) diff --git a/tests/testthat/test_utilities.R b/tests/testthat/test_utilities.R index 3a6ea1db6..7387c5079 100644 --- a/tests/testthat/test_utilities.R +++ b/tests/testthat/test_utilities.R @@ -43,13 +43,13 @@ test_that("AverageExpression works for different slots", { object <- ScaleData(object = object, verbose = FALSE) avg.scale <- AverageExpression(object, slot = "scale.data")$RNA expect_equal( - avg.scale['MS4A1', ], - c(a = 0.02092088, b = -0.004769018, c = -0.018369549), + unname(avg.scale['MS4A1', ]), + unname(c(a = 0.02092088, b = -0.004769018, c = -0.018369549)), tolerance = 1e-6 ) expect_equal( - avg.scale['SPON2', ], - c(a = 0.1052434, b = 0.2042827, c = -0.3397051), + unname(avg.scale['SPON2', ]), + unname(c(a = 0.1052434, b = 0.2042827, c = -0.3397051)), tolerance = 1e-6 ) }) @@ -95,8 +95,8 @@ test_that("AverageExpression with return.seurat", { avg.data <- AverageExpression(object, slot = "data", return.seurat = TRUE, verbose = FALSE) expect_s4_class(object = avg.data, "Seurat") avg.data.mat <- AverageExpression(object, slot = 'data')$RNA - expect_equal(as.matrix(LayerData(avg.data[["RNA"]], layer = "counts")), avg.data.mat) - expect_equal(unname(as.matrix(LayerData(avg.data[["RNA"]], layer = "data"))), unname(log1p(x = avg.data.mat))) + expect_equal(as.matrix(LayerData(avg.data[["RNA"]], layer = "counts")), as.matrix(avg.data.mat)) + expect_equal(unname(as.matrix(LayerData(avg.data[["RNA"]], layer = "data"))), as.matrix(unname(log1p(x = avg.data.mat)))) avg.scale <- LayerData(avg.data[["RNA"]], layer = "scale.data") expect_equal( avg.scale['MS4A1', ], @@ -111,10 +111,10 @@ test_that("AverageExpression with return.seurat", { # scale.data object <- ScaleData(object = object, verbose = FALSE) - avg.scale <- AverageExpression(object, slot = "scale.data", return.seurat = TRUE, verbose = FALSE) + avg.scale <- AverageExpression(object, layer = "scale.data", return.seurat = TRUE, verbose = FALSE) expect_s4_class(object = avg.scale, "Seurat") avg.scale.mat <- AverageExpression(object, slot = 'scale.data')$RNA - expect_equal(unname(as.matrix(LayerData(avg.scale[["RNA"]], layer = "scale.data"))), unname(avg.scale.mat)) + expect_equal(unname(as.matrix(LayerData(avg.scale[["RNA"]], layer = "scale.data"))), unname(as.matrix(avg.scale.mat))) expect_true(all(is.na(LayerData(avg.scale[["RNA"]], layer = "data")))) expect_equal(LayerData(avg.scale[["RNA"]], layer = "counts"), matrix()) }) From 004ef5eb24b4cab81775735e968ede2c9ee26e89 Mon Sep 17 00:00:00 2001 From: rsatija Date: Wed, 30 Aug 2023 10:43:38 -0400 Subject: [PATCH 710/979] Fix bug where normalization.method was missing as an argument to FindTransferAnchors --- R/integration.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/integration.R b/R/integration.R index c96416a51..b3220d473 100644 --- a/R/integration.R +++ b/R/integration.R @@ -908,7 +908,8 @@ FindTransferAnchors <- function( query = reference, scale = scale, dims = dims, - verbose = verbose + verbose = verbose, + normalization.method = normalization.method ) orig.embeddings <- Embeddings(object = query[[reference.reduction]])[, dims] orig.loadings <- Loadings(object = query[[reference.reduction]]) From 255c7544fe4eea11ae872c8fe289dc3201b09890 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Wed, 30 Aug 2023 11:41:42 -0400 Subject: [PATCH 711/979] changing default layers to data for integration --- R/integration5.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/integration5.R b/R/integration5.R index 9ff1ab271..48f30aa86 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -526,7 +526,7 @@ IntegrateLayers <- function( group.by = NULL, assay = NULL, features = NULL, - layers = NULL, + layers = 'data', scale.layer = 'scale.data', ... ) { From 0f8830c7cbd5a8470eb33d4b71e28cf417607622 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Wed, 30 Aug 2023 11:56:49 -0400 Subject: [PATCH 712/979] adding default layer = data for each integration method --- R/integration5.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/integration5.R b/R/integration5.R index 48f30aa86..37f654eee 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -73,7 +73,7 @@ HarmonyIntegration <- function( features = NULL, scale.layer = 'scale.data', new.reduction = 'harmony', - layers = NULL, + layers = 'data', npcs = 50L, key = 'harmony_', theta = NULL, @@ -180,7 +180,7 @@ attr(x = HarmonyIntegration, which = 'Seurat.method') <- 'integration' CCAIntegration <- function( object = NULL, assay = NULL, - layers = NULL, + layers = 'data', orig = NULL, new.reduction = 'integrated.dr', reference = NULL, @@ -315,7 +315,7 @@ attr(x = CCAIntegration, which = 'Seurat.method') <- 'integration' RPCAIntegration <- function( object = NULL, assay = NULL, - layers = NULL, + layers = 'data', orig = NULL, new.reduction = 'integrated.dr', reference = NULL, @@ -408,7 +408,7 @@ attr(x = RPCAIntegration, which = 'Seurat.method') <- 'integration' JointPCAIntegration <- function( object = NULL, assay = NULL, - layers = NULL, + layers = 'data', orig = NULL, new.reduction = 'integrated.dr', reference = NULL, From ce5df8ba1edb316afcddeb1c7ab5a991fbed3047 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Wed, 30 Aug 2023 13:52:25 -0400 Subject: [PATCH 713/979] Fixes for SCTransform.StdAssay --- R/dimensional_reduction.R | 7 ++++--- R/integration.R | 18 ++++++++--------- R/preprocessing.R | 1 - R/preprocessing5.R | 25 ++++++++++++++++-------- tests/testthat/test_preprocessing.R | 30 ++++++++++++++--------------- 5 files changed, 45 insertions(+), 36 deletions(-) diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R index bb0317e5e..69a886fc7 100644 --- a/R/dimensional_reduction.R +++ b/R/dimensional_reduction.R @@ -649,7 +649,8 @@ RunCCA.Seurat <- function( warning("Some cells removed after object merge due to minimum feature count cutoff") } combined.scale <- cbind(data1,data2) - combined.object <- SetAssayData(object = combined.object,new.data = combined.scale, slot = "scale.data") + # combined.object <- SetAssayData(object = combined.object,new.data = combined.scale, slot = "scale.data") + combined.object@assays$ToIntegrate@scale.data <- combined.scale if (renormalize) { combined.object <- NormalizeData( object = combined.object, @@ -1797,7 +1798,7 @@ RunUMAP.Seurat <- function( stop("Only one parameter among 'dims', 'nn.name', 'graph', or 'features' ", "should be used at a time to run UMAP") } - + if (!is.null(x = features)) { data.use <- as.matrix(x = t(x = GetAssayData(object = object, slot = slot, assay = assay)[features, , drop = FALSE])) if (ncol(x = data.use) < n.components) { @@ -2023,7 +2024,7 @@ CheckFeatures <- function( features.var <- SparseRowVar(mat = data.use[features, ], display_progress = F) } else if (inherits(x = data.use, what = "IterableMatrix")) { - bp.stats <- BPCells::matrix_stats(matrix = data.use, + bp.stats <- BPCells::matrix_stats(matrix = data.use, row_stats = "variance") features.var <- bp.stats$row_stats["variance",][features] } diff --git a/R/integration.R b/R/integration.R index c96416a51..2030eefd4 100644 --- a/R/integration.R +++ b/R/integration.R @@ -806,8 +806,8 @@ FindTransferAnchors <- function( reference.reduction.init <- reference.reduction if (inherits(x = reference[[reference.assay]], what = 'Assay5')) { if (length(Layers(reference, search = "data")) > 1) { - reference[[reference.assay]] <- JoinLayers(reference[[reference.assay]], - layers = "data", new = "data") + reference[[reference.assay]] <- JoinLayers(reference[[reference.assay]], + layers = "data", new = "data") } } if (normalization.method == "SCT") { @@ -1886,7 +1886,7 @@ ProjectIntegration <- function( seed = 123, verbose = TRUE ) { - + layers <- Layers(object = object[[assay]], search = layers) # Check input and output dimensional reductions sketched.layers <- sketched.layers %||% layers @@ -2182,7 +2182,7 @@ MapQuery <- function( if (DefaultAssay(anchorset@object.list[[1]]) %in% Assays(reference)) { DefaultAssay(reference) <- DefaultAssay(anchorset@object.list[[1]]) } else { - stop('The assay used to create the anchorset does not match any', + stop('The assay used to create the anchorset does not match any', 'of the assays in the reference object.') } # determine anchor type @@ -4305,7 +4305,7 @@ FindNN <- function( eps = eps, index = if (reduction.2 == nn.reduction) nn.idx2 else NULL ) - + nnba <- NNHelper( data = Embeddings(object = object[[reduction]])[cells1, nn.dims], query = Embeddings(object = object[[reduction]])[cells2, nn.dims], @@ -5164,7 +5164,7 @@ if (normalization.method == 'SCT') { if (inherits(x = reference.data, what = 'dgCMatrix')) { feature.mean <- RowMeanSparse(mat = reference.data) } else if (inherits(x = reference.data, what = "IterableMatrix")) { - bp.stats <- BPCells::matrix_stats(matrix = reference.data, + bp.stats <- BPCells::matrix_stats(matrix = reference.data, row_stats = "variance") feature.mean <- bp.stats$row_stats["mean",] } else { @@ -5250,7 +5250,7 @@ ProjectCellEmbeddings.IterableMatrix <- function( if (inherits(x = reference.data, what = 'dgCMatrix')) { feature.mean <- RowMeanSparse(mat = reference.data) } else if (inherits(x = reference.data, what = "IterableMatrix")) { - bp.stats <- BPCells::matrix_stats(matrix = reference.data, + bp.stats <- BPCells::matrix_stats(matrix = reference.data, row_stats = "variance") feature.mean <- bp.stats$row_stats["mean",] } else { @@ -5951,8 +5951,8 @@ ValidateParams_FindTransferAnchors <- function( } else { new.sct.assay <- query.umi.assay } - - + + DefaultAssay(query) <- new.sct.assay ModifyParam(param = "query.assay", value = new.sct.assay) ModifyParam(param = "query", value = query) diff --git a/R/preprocessing.R b/R/preprocessing.R index 26d440114..12cfff751 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -3415,7 +3415,6 @@ SCTransform.default <- function( vst.out$umi_corrected <- umi } min_var <- vst.out$arguments$min_variance - message("min_varxxxx", min_var) return(vst.out) } diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 91c0adad1..2dae4123c 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1493,6 +1493,7 @@ SCTransform.StdAssay <- function( do.scale = FALSE, do.center = TRUE, clip.range = c(-sqrt(x = ncol(x = object) / 30), sqrt(x = ncol(x = object) / 30)), + vst.flavor = 'v2', conserve.memory = FALSE, return.only.var.genes = TRUE, seed.use = 1448145, @@ -1531,15 +1532,23 @@ SCTransform.StdAssay <- function( } else { SCTransform } - if (is.null(x = cell.attr)){ + if (is.null(x = cell.attr) && is.null(x = reference.SCT.model)){ calcn <- CalcN(object = object) - cell.attr <- data.frame(umi = calcn$nCount_RNA, - log_umi = log10(x = calcn$nCount_RNA)) + cell.attr <- data.frame(umi = calcn$nCount, + log_umi = log10(x = calcn$nCount)) rownames(cell.attr) <- colnames(x = layer.data) } + if (!"umi" %in% cell.attr && is.null(x = reference.SCT.model)){ + calcn <- CalcN(object = object) + cell.attr.tmp <- data.frame(umi = calcn$nCount) + rownames(cell.attr.tmp) <- colnames(x = layer.data) + cell.attr$umi <- NA + cell.attr$log_umi <- NA + cell.attr[rownames(cell.attr.tmp), "umi"] <- cell.attr.tmp$umi + cell.attr[rownames(cell.attr.tmp), "log_umi"] <- log10(x = cell.attr.tmp$umi) + } # Step 1: Learn model - message("layer.data", dim(layer.data)) vst.out <- sct.function(object = layer.data, do.correct.umi = FALSE, cell.attr = cell.attr, @@ -1552,12 +1561,12 @@ SCTransform.StdAssay <- function( do.scale = do.scale, do.center = do.center, clip.range = clip.range, + vst.flavor = vst.flavor, conserve.memory = conserve.memory, return.only.var.genes = return.only.var.genes, seed.use = seed.use, verbose = FALSE) min_var <- vst.out$arguments$min_variance - message("min_var", min_var) assay.out <- CreateSCTAssay(vst.out = vst.out, do.correct.umi = do.correct.umi, residual.type = residual.type, clip.range = clip.range) @@ -1575,7 +1584,6 @@ SCTransform.StdAssay <- function( vst_out.reference <- SCTModel_to_vst(SCTModel = local.reference.SCT.model) vst_out.reference$gene_attr <- local.reference.SCT.model@feature.attributes min_var <- vst_out.reference$arguments$min_variance - message("min_var", min_var) if (min_var == "umi_median"){ counts.x <- as.sparse(x = layer.data[, sample.int(n = ncol(x = layer.data), size = min(ncells, ncol(x = layer.data)) )]) min_var <- (median(counts.x@x)/5)^2 @@ -1590,7 +1598,8 @@ SCTransform.StdAssay <- function( corrected_counts <- list() cell_attrs <- list() - message("cells_length", length(cells.grid)) + message("length ", length(cells.grid)) + if (length(x = cells.grid) == 1){ merged.assay <- assay.out corrected_counts[[1]] <- GetAssayData(object = assay.out, slot = "data") @@ -1640,7 +1649,7 @@ SCTransform.StdAssay <- function( verbosity = FALSE# as.numeric(x = verbose) * 2 ) residuals[[i]] <- new_residual - cell_attrs[[i]] <- cell_attr + cell_attrs[[i]] <- cell.attr.object } new.residuals <- Reduce(cbind, residuals) corrected_counts <- Reduce(cbind, corrected_counts) diff --git a/tests/testthat/test_preprocessing.R b/tests/testthat/test_preprocessing.R index 8019ebbf7..7cd04ec14 100644 --- a/tests/testthat/test_preprocessing.R +++ b/tests/testthat/test_preprocessing.R @@ -289,42 +289,42 @@ test_that("CustomNormalize works as expected", { # Tests for SCTransform # ------------------------------------------------------------------------------ context("SCTransform") -object <- suppressWarnings(SCTransform(object = object, verbose = FALSE)) +object <- suppressWarnings(SCTransform(object = object, vst.flavor = NULL, verbose = FALSE)) test_that("SCTransform wrapper works as expected", { expect_true("SCT" %in% names(object)) - expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "scale.data"))[1]), 11.40288448) + expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "scale.data"))[1]), 24.5813, tolerance = 1e-6) expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "scale.data"))[5]), 0) - expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "data"))[1]), 57.7295742, tolerance = 1e-6) - expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "data"))[5]), 11.74403719, tolerance = 1e-6) - expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[1]), 129) - expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[5]), 28) + expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "data"))[1]), 40.49135, tolerance = 1e-6) + expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "data"))[5]), 10.96128, tolerance = 1e-6) + expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[1]), 70) + expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[5]), 24) expect_equal(length(VariableFeatures(object[["SCT"]])), 220) fa <- SCTResults(object = object, assay = "SCT", slot = "feature.attributes") expect_equal(fa["MS4A1", "detection_rate"], 0.15) expect_equal(fa["MS4A1", "gmean"], 0.2027364, tolerance = 1e-6) expect_equal(fa["MS4A1", "variance"], 1.025158, tolerance = 1e-6) - expect_equal(fa["MS4A1", "residual_mean"], 0.2362887, tolerance = 1e-6) - expect_equal(fa["MS4A1", "residual_variance"], 2.875761, tolerance = 1e-6) + expect_equal(fa["MS4A1", "residual_mean"], 0.2763993, tolerance = 1e-6) + expect_equal(fa["MS4A1", "residual_variance"], 3.023062, tolerance = 1e-6) }) suppressWarnings(RNGversion(vstr = "3.5.0")) object <- suppressWarnings(SCTransform(object = object, ncells = 40, verbose = FALSE, seed.use = 42)) test_that("SCTransform ncells param works", { expect_true("SCT" %in% names(object)) - expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "scale.data"))[1]), 12.02126, tolerance = 1e6) + expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "scale.data"))[1]), 22.82011, tolerance = 1e6) expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "scale.data"))[5]), 0) - expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "data"))[1]), 60.65299, tolerance = 1e-6) - expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "data"))[5]), 11.74404, tolerance = 1e-6) - expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[1]), 136) - expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[5]), 28) + expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "data"))[1]), 56.02856, tolerance = 1e-6) + expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "data"))[5]), 12.80183, tolerance = 1e-6) + expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[1]), 125) + expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[5]), 35) expect_equal(length(VariableFeatures(object[["SCT"]])), 220) fa <- SCTResults(object = object, assay = "SCT", slot = "feature.attributes") expect_equal(fa["MS4A1", "detection_rate"], 0.15) expect_equal(fa["MS4A1", "gmean"], 0.2027364, tolerance = 1e-6) expect_equal(fa["MS4A1", "variance"], 1.025158, tolerance = 1e-6) - expect_equal(fa["MS4A1", "residual_mean"], 0.2829672, tolerance = 1e-3) - expect_equal(fa["MS4A1", "residual_variance"], 3.674079, tolerance = 1e-3) + expect_equal(fa["MS4A1", "residual_mean"], 0.2560799, tolerance = 1e-3) + expect_equal(fa["MS4A1", "residual_variance"], 2.909645, tolerance = 1e-3) }) suppressWarnings(object[["SCT_SAVE"]] <- object[["SCT"]]) From 3107897e9889a512eda782e8d9abedc7e28dba14 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Wed, 30 Aug 2023 14:15:03 -0400 Subject: [PATCH 714/979] Remove delayed array from FetchResiduals --- NAMESPACE | 1 + R/preprocessing5.R | 15 ++++++++++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 32ccc8457..c6486e7c9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -129,6 +129,7 @@ S3method(SCTResults,SCTAssay) S3method(SCTResults,SCTModel) S3method(SCTResults,Seurat) S3method(SCTransform,Assay) +S3method(SCTransform,IterableMatrix) S3method(SCTransform,Seurat) S3method(SCTransform,StdAssay) S3method(SCTransform,default) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 2dae4123c..3df323cd0 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1935,7 +1935,8 @@ FetchResidualSCTModel <- function(object, new_features = NULL, clip.range = NULL, replace.value = FALSE, - verbose = FALSE) { + verbose = FALSE, + chunk_size = 5000) { model.cells <- character() model.features <- Features(x = object, assay = assay) if (is.null(x = reference.SCT.model)){ @@ -2035,15 +2036,19 @@ FetchResidualSCTModel <- function(object, ) # iterate over 2k cells at once - #cells.grid <- DelayedArray::colAutoGrid(x = counts, ncol = min(2000, length(x = layer.cells))) - cells.grid <- DelayedArray::colAutoGrid(x = counts, ncol = length(x = layer.cells)) + # cells.grid <- DelayedArray::colAutoGrid(x = counts, ncol = min(2000, length(x = layer.cells))) + # cells.grid <- DelayedArray::colAutoGrid(x = counts, ncol = length(x = layer.cells)) + cells.vector <- 1:length(x = layer.cells) + cells.grid <- split(x = cells.vector, f = ceiling(x = seq_along(along.with = cells.vector)/chunk_size)) + new_residuals <- list() for (i in seq_len(length.out = length(x = cells.grid))) { vp <- cells.grid[[i]] - block <- DelayedArray::read_block(x = counts, viewport = vp, as.sparse = TRUE) + #block <- DelayedArray::read_block(x = counts, viewport = vp, as.sparse = TRUE) + block <- counts[,vp, drop=FALSE] ## TODO: Maybe read only interesting genes - umi.all <- as(object = block, Class = "dgCMatrix") + umi.all <- as.sparse(x = block) # calculate min_variance for get_residuals # required when vst_out$arguments$min_variance == "umi_median" From 64a184b4b84d51c29398b4e12c1d29132837c7a9 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Wed, 30 Aug 2023 17:43:32 -0400 Subject: [PATCH 715/979] Fixes for cell attributes --- R/preprocessing5.R | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 3df323cd0..c77c78ba4 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1531,27 +1531,29 @@ SCTransform.StdAssay <- function( SCTransform.default } else { SCTransform - } + } if (is.null(x = cell.attr) && is.null(x = reference.SCT.model)){ - calcn <- CalcN(object = object) - cell.attr <- data.frame(umi = calcn$nCount, + calcn <- CalcN(object = layer.data) + cell.attr.layer <- data.frame(umi = calcn$nCount, log_umi = log10(x = calcn$nCount)) - rownames(cell.attr) <- colnames(x = layer.data) + rownames(cell.attr.layer) <- colnames(x = layer.data) + } else { + cell.attr.layer <- cell.attr[colnames(x = layer.data),, drop=FALSE] } - if (!"umi" %in% cell.attr && is.null(x = reference.SCT.model)){ - calcn <- CalcN(object = object) + if (!"umi" %in% cell.attr.layer && is.null(x = reference.SCT.model)){ + calcn <- CalcN(object = layer.data) cell.attr.tmp <- data.frame(umi = calcn$nCount) rownames(cell.attr.tmp) <- colnames(x = layer.data) - cell.attr$umi <- NA - cell.attr$log_umi <- NA - cell.attr[rownames(cell.attr.tmp), "umi"] <- cell.attr.tmp$umi - cell.attr[rownames(cell.attr.tmp), "log_umi"] <- log10(x = cell.attr.tmp$umi) + cell.attr.layer$umi <- NA + cell.attr.layer$log_umi <- NA + cell.attr.layer[rownames(cell.attr.tmp), "umi"] <- cell.attr.tmp$umi + cell.attr.layer[rownames(cell.attr.tmp), "log_umi"] <- log10(x = cell.attr.tmp$umi) } # Step 1: Learn model vst.out <- sct.function(object = layer.data, do.correct.umi = FALSE, - cell.attr = cell.attr, + cell.attr = cell.attr.layer, reference.SCT.model = reference.SCT.model, ncells = ncells, residual.features = residual.features, @@ -1598,8 +1600,6 @@ SCTransform.StdAssay <- function( corrected_counts <- list() cell_attrs <- list() - message("length ", length(cells.grid)) - if (length(x = cells.grid) == 1){ merged.assay <- assay.out corrected_counts[[1]] <- GetAssayData(object = assay.out, slot = "data") @@ -1713,7 +1713,8 @@ SCTransform.StdAssay <- function( vst_out$cell_attr <- vst_out$cell_attr[, c("log_umi"), drop=FALSE] vst_out$model_pars_fit <- vst_out$model_pars_fit[variable.features.target,,drop=FALSE] new_residual <- GetResidualsChunked(vst_out = vst_out, layer.counts = layer.counts.tmp, - residual_type = "pearson", min_variance = min_variance, verbose = FALSE) + residual_type = "pearson", min_variance = min_var, res_clip_range = res_clip_range, + verbose = FALSE) old_residual <- GetAssayData(object = sct.assay.list[[layer.name]], slot = 'scale.data') merged_residual <- rbind(old_residual, new_residual) sct.assay.list[[layer.name]] <- SetAssayData(object = sct.assay.list[[layer.name]], slot = 'scale.data', new.data = merged_residual) From 79dfc75c58cbdb75a8b7969cf110ea55e862f853 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Wed, 30 Aug 2023 17:59:20 -0400 Subject: [PATCH 716/979] changing default layer back to null and instead doing a check for data layers --- R/integration5.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/R/integration5.R b/R/integration5.R index 37f654eee..194dc34dd 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -73,7 +73,7 @@ HarmonyIntegration <- function( features = NULL, scale.layer = 'scale.data', new.reduction = 'harmony', - layers = 'data', + layers = NULL, npcs = 50L, key = 'harmony_', theta = NULL, @@ -180,7 +180,7 @@ attr(x = HarmonyIntegration, which = 'Seurat.method') <- 'integration' CCAIntegration <- function( object = NULL, assay = NULL, - layers = 'data', + layers = NULL, orig = NULL, new.reduction = 'integrated.dr', reference = NULL, @@ -315,7 +315,7 @@ attr(x = CCAIntegration, which = 'Seurat.method') <- 'integration' RPCAIntegration <- function( object = NULL, assay = NULL, - layers = 'data', + layers = NULL, orig = NULL, new.reduction = 'integrated.dr', reference = NULL, @@ -408,7 +408,7 @@ attr(x = RPCAIntegration, which = 'Seurat.method') <- 'integration' JointPCAIntegration <- function( object = NULL, assay = NULL, - layers = 'data', + layers = NULL, orig = NULL, new.reduction = 'integrated.dr', reference = NULL, @@ -526,7 +526,7 @@ IntegrateLayers <- function( group.by = NULL, assay = NULL, features = NULL, - layers = 'data', + layers = NULL, scale.layer = 'scale.data', ... ) { @@ -553,7 +553,7 @@ IntegrateLayers <- function( assay = assay ) } else if (inherits(x = object[[assay]], what = 'StdAssay')) { - layers <- Layers(object = object, assay = assay, search = layers) + layers <- layers %||% Layers(object, search = 'data') scale.layer <- Layers(object = object, search = scale.layer) features <- features %||% VariableFeatures( object = object, From 3f2a926886ba6e4ba87ac429c0c61ef7065ef3ab Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Wed, 30 Aug 2023 18:27:21 -0400 Subject: [PATCH 717/979] update AverageExpression, all utilities test now pass --- R/utilities.R | 164 +++++++++++++++++++------------- tests/testthat/test_utilities.R | 15 +-- 2 files changed, 105 insertions(+), 74 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index dd5de9db7..658044a59 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -428,7 +428,8 @@ AverageExpression <- function( return.seurat = FALSE, group.by = 'ident', add.ident = NULL, - slot = 'counts', + layer = 'data', + slot = deprecated(), method = 'average', verbose = TRUE, ... @@ -441,6 +442,21 @@ AverageExpression <- function( if (!(method %in% c('average', 'aggregate'))) { stop("'method' must be either 'average' or 'aggregate'") } + if (is_present(arg = slot)) { + f <- if (.IsFutureSeurat(version = '5.1.0')) { + deprecate_stop + } else if (.IsFutureSeurat(version = '5.0.0')) { + deprecate_warn + } else { + deprecate_soft + } + f( + when = '5.0.0', + what = 'AverageExpression(slot = )', + with = 'AverageExpression(layer = )' + ) + layer <- slot + } object.assays <- FilterObjects(object = object, classes.keep = c('Assay', 'Assay5')) assays <- assays %||% object.assays if (!all(assays %in% object.assays)) { @@ -451,10 +467,10 @@ AverageExpression <- function( warning("Requested assays that do not exist in object. Proceeding with existing assays only.") } } - if (length(x = slot) == 1) { - slot <- rep_len(x = slot, length.out = length(x = assays)) - } else if (length(x = slot) != length(x = assays)) { - stop("Number of slots provided does not match number of assays") + if (length(x = layer) == 1) { + layer <- rep_len(x = layer, length.out = length(x = assays)) + } else if (length(x = layer) != length(x = assays)) { + stop("Number of layers provided does not match number of assays") } data <- FetchData(object = object, vars = rev(x = group.by)) data <- data[which(rowSums(x = is.na(x = data)) == 0), , drop = F] @@ -490,88 +506,69 @@ AverageExpression <- function( assay = assays[i], category.matrix = category.matrix, features = features.i, - slot = slot[i], + layer = layer[i], verbose = verbose, ... ) } if (return.seurat) { - op <- options(Seurat.object.assay.version = "v3", Seurat.object.assay.calcn = FALSE) + op <- options(Seurat.object.assay.version = "v5", Seurat.object.assay.calcn = FALSE) on.exit(expr = options(op), add = TRUE) - if (slot[1] == 'scale.data') { + if (layer[1] == 'scale.data') { na.matrix <- as.matrix(x = data.return[[1]]) na.matrix[1:length(x = na.matrix)] <- NA + #sum up counts to make seurat object + summed.counts <- PseudobulkExpression( + object = object[[assays[1]]], + assay = assays[1], + category.matrix = category.matrix, + features = features[[1]], + slot = "counts" + ) toRet <- CreateSeuratObject( - counts = na.matrix, + counts = summed.counts, project = if (method == "average") "Average" else "Aggregate", assay = names(x = data.return)[1], - check.matrix = FALSE, ... ) - toRet <- SetAssayData( - object = toRet, - assay = names(x = data.return)[1], - slot = "counts", - new.data = matrix() - ) - toRet <- SetAssayData( - object = toRet, - assay = names(x = data.return)[1], - slot = "data", - new.data = na.matrix - ) - toRet <- SetAssayData( - object = toRet, - assay = names(x = data.return)[1], - slot = "scale.data", - new.data = data.return[[1]] - ) + LayerData(object = toRet, + layer = "scale.data", + assay = names(x = data.return)[1]) <- data.return[[1]] } else { toRet <- CreateSeuratObject( counts = data.return[[1]], project = if (method == "average") "Average" else "Aggregate", assay = names(x = data.return)[1], - check.matrix = FALSE, ... ) - toRet <- SetAssayData( - object = toRet, - assay = names(x = data.return)[1], - slot = "data", - new.data = log1p(x = as.matrix(x = data.return[[1]])) - ) + LayerData(object = toRet, + layer = "data", + assay = names(x = data.return)[1]) <- log1p(x = as.matrix(x = data.return[[1]])) } #for multimodal data if (length(x = data.return) > 1) { for (i in 2:length(x = data.return)) { - if (slot[i] == 'scale.data') { - na.matrix <- as.matrix(x = data.return[[i]]) - na.matrix[1:length(x = na.matrix)] <- NA - toRet[[names(x = data.return)[i]]] <- CreateAssayObject(counts = na.matrix, check.matrix = FALSE) - toRet <- SetAssayData( - object = toRet, - assay = names(x = data.return)[i], - slot = "counts", - new.data = matrix() - ) - toRet <- SetAssayData( - object = toRet, - assay = names(x = data.return)[i], - slot = "data", - new.data = na.matrix - ) - toRet <- SetAssayData( - object = toRet, - assay = names(x = data.return)[i], - slot = "scale.data", - new.data = as.matrix(x = data.return[[i]]) + if (layer[i] == 'scale.data') { + summed.counts <- PseudobulkExpression( + object = object[[assays[i]]], + assay = assays[i], + category.matrix = category.matrix, + features = features[[i]], + slot = "counts" ) + toRet[[names(x = data.return)[i]]] <- CreateAssay5Object(counts = summed.counts) + LayerData(object = toRet, + layer = "scale.data", + assay = names(x = data.return)[i]) <- data.return[[i]] } else { toRet[[names(x = data.return)[i]]] <- CreateAssayObject(counts = data.return[[i]], check.matrix = FALSE) + LayerData(object = toRet, + layer = "data", + assay = names(x = data.return)[i]) <- log1p(x = as.matrix(x = data.return[[i]])) toRet <- SetAssayData( object = toRet, assay = names(x = data.return)[i], - slot = "data", + layer = "data", new.data = log1p(x = as.matrix(x = data.return[[i]])) ) } @@ -580,7 +577,7 @@ AverageExpression <- function( } if (DefaultAssay(object = object) %in% names(x = data.return)) { DefaultAssay(object = toRet) <- DefaultAssay(object = object) - if (slot[which(DefaultAssay(object = object) %in% names(x = data.return))[1]] != 'scale.data') { + if (layer[which(DefaultAssay(object = object) %in% names(x = data.return))[1]] != 'scale.data') { toRet <- ScaleData(object = toRet, verbose = verbose) } } @@ -1365,18 +1362,34 @@ PseudobulkExpression.Assay <- function( assay, category.matrix, features = NULL, - slot = 'data', + layer = 'data', + slot = deprecated(), verbose = TRUE, ... ) { + if (is_present(arg = slot)) { + f <- if (.IsFutureSeurat(version = '5.1.0')) { + deprecate_stop + } else if (.IsFutureSeurat(version = '5.0.0')) { + deprecate_warn + } else { + deprecate_soft + } + f( + when = '5.0.0', + what = 'GetAssayData(slot = )', + with = 'GetAssayData(layer = )' + ) + layer <- slot + } data.use <- GetAssayData( object = object, - slot = slot + layer = layer ) features.to.avg <- features %||% rownames(x = data.use) if (IsMatrixEmpty(x = data.use)) { warning( - "The ", slot, " slot for the ", assay, + "The ", layer, " layer for the ", assay, " assay is empty. Skipping assay.", immediate. = TRUE, call. = FALSE) return(NULL) } @@ -1395,7 +1408,7 @@ PseudobulkExpression.Assay <- function( " assay.", call. = FALSE, immediate. = TRUE) return(NULL) } - if (slot == 'data') { + if (layer == 'data') { data.use <- expm1(x = data.use) if (any(data.use == Inf)) { warning("Exponentiation yielded infinite values. `data` may not be log-normed.") @@ -1403,8 +1416,7 @@ PseudobulkExpression.Assay <- function( } data.return <- data.use %*% category.matrix return(data.return) - - + } #' @method PseudobulkExpression StdAssay @@ -1416,14 +1428,30 @@ PseudobulkExpression.StdAssay <- function( assay, category.matrix, features = NULL, - slot = 'data', + layer = 'data', + slot = deprecated(), verbose = TRUE, ... ) { - if (slot == 'data') { + if (is_present(arg = slot)) { + f <- if (.IsFutureSeurat(version = '5.1.0')) { + deprecate_stop + } else if (.IsFutureSeurat(version = '5.0.0')) { + deprecate_warn + } else { + deprecate_soft + } + f( + when = '5.0.0', + what = 'GetAssayData(slot = )', + with = 'GetAssayData(layer = )' + ) + layer <- slot + } + if (layer == 'data') { message("Assay5 will use arithmetic mean for data slot.") } - layers.set <- Layers(object = object, search = slot) + layers.set <- Layers(object = object, search = layer) features.to.avg <- features %||% rownames(x = object) bad.features <- setdiff(x = features.to.avg, y = rownames(x = object)) if (length(x = bad.features) > 0) { @@ -1463,7 +1491,7 @@ PseudobulkExpression.StdAssay <- function( } data.return <- data.return + data.return.i } - if (slot == 'data') { + if (layer == 'data') { data.return <- expm1(x = data.return) } return(data.return) diff --git a/tests/testthat/test_utilities.R b/tests/testthat/test_utilities.R index 7387c5079..d077df589 100644 --- a/tests/testthat/test_utilities.R +++ b/tests/testthat/test_utilities.R @@ -16,8 +16,10 @@ object <- CreateSeuratObject( ) object <- SetIdent(object, value = 'a') +#LayerData(object, layer="data") <- pbmc.test + test_that("AverageExpression works for different slots", { - suppressWarnings(average.expression <- AverageExpression(object, slot = 'data')$RNA) + suppressWarnings(average.expression <- AverageExpression(object, layer = 'data')$RNA) expect_equivalent( average.expression['KHDRBS1', 1:3], c(a = 7.278237e-01, b = 1.658166e+14, c = 1.431902e-01), @@ -67,7 +69,8 @@ test_that("AverageExpression with return.seurat", { avg.counts <- AverageExpression(object, slot = "counts", return.seurat = TRUE, verbose = FALSE) expect_s4_class(object = avg.counts, "Seurat") avg.counts.mat <- AverageExpression(object, slot = 'counts')$RNA - expect_equal(as.matrix(LayerData(avg.counts[["RNA"]], layer = "counts")), as.matrix(avg.counts.mat)) + expect_equal(unname(as.matrix(LayerData(avg.counts[["RNA"]], layer = "counts"))), + unname(as.matrix(avg.counts.mat))) avg.data <- LayerData(avg.counts[["RNA"]], layer = "data") expect_equal( unname(avg.data['MS4A1', ]), @@ -95,8 +98,10 @@ test_that("AverageExpression with return.seurat", { avg.data <- AverageExpression(object, slot = "data", return.seurat = TRUE, verbose = FALSE) expect_s4_class(object = avg.data, "Seurat") avg.data.mat <- AverageExpression(object, slot = 'data')$RNA - expect_equal(as.matrix(LayerData(avg.data[["RNA"]], layer = "counts")), as.matrix(avg.data.mat)) - expect_equal(unname(as.matrix(LayerData(avg.data[["RNA"]], layer = "data"))), as.matrix(unname(log1p(x = avg.data.mat)))) + expect_equal(unname(as.matrix(LayerData(avg.data[["RNA"]], layer = "counts"))), + unname(as.matrix(avg.data.mat))) + expect_equal(unname(as.matrix(LayerData(avg.data[["RNA"]], layer = "data"))), + as.matrix(unname(log1p(x = avg.data.mat)))) avg.scale <- LayerData(avg.data[["RNA"]], layer = "scale.data") expect_equal( avg.scale['MS4A1', ], @@ -115,8 +120,6 @@ test_that("AverageExpression with return.seurat", { expect_s4_class(object = avg.scale, "Seurat") avg.scale.mat <- AverageExpression(object, slot = 'scale.data')$RNA expect_equal(unname(as.matrix(LayerData(avg.scale[["RNA"]], layer = "scale.data"))), unname(as.matrix(avg.scale.mat))) - expect_true(all(is.na(LayerData(avg.scale[["RNA"]], layer = "data")))) - expect_equal(LayerData(avg.scale[["RNA"]], layer = "counts"), matrix()) }) test.dat <- LayerData(object = object, layer = "data") From 532233304386f0c90d23dbeccf30569035ea9984 Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Wed, 30 Aug 2023 21:50:11 -0400 Subject: [PATCH 718/979] fix warning message in NormalizeData --- R/preprocessing5.R | 1 - tests/testthat/test_utilities.R | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 63e675727..d88ddba79 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -651,7 +651,6 @@ NormalizeData.StdAssay <- function( scale.factor = scale.factor, margin = margin, verbose = verbose, - layer = save, ... ) } diff --git a/tests/testthat/test_utilities.R b/tests/testthat/test_utilities.R index d077df589..6f0900e76 100644 --- a/tests/testthat/test_utilities.R +++ b/tests/testthat/test_utilities.R @@ -16,7 +16,7 @@ object <- CreateSeuratObject( ) object <- SetIdent(object, value = 'a') -#LayerData(object, layer="data") <- pbmc.test +LayerData(object, layer="data") <- LayerData(object, layer="counts") test_that("AverageExpression works for different slots", { suppressWarnings(average.expression <- AverageExpression(object, layer = 'data')$RNA) From 59e9e62f1e511403c0a731dcd491d0d9b005dc6c Mon Sep 17 00:00:00 2001 From: rsatija Date: Thu, 31 Aug 2023 08:08:15 -0400 Subject: [PATCH 719/979] Fixed Load_10x_Visium bug --- R/preprocessing.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/preprocessing.R b/R/preprocessing.R index c188c9330..6d1319630 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -523,6 +523,7 @@ Load10X_Spatial <- function( slice = 'slice1', filter.matrix = TRUE, to.upper = FALSE, + image=NULL, ... ) { if (length(x = data.dir) > 1) { From 431189b5a95728b84f65a1db7e83daae2b380bac Mon Sep 17 00:00:00 2001 From: rsatija Date: Thu, 31 Aug 2023 08:18:28 -0400 Subject: [PATCH 720/979] Fixed SCE test bug --- tests/testthat/test_objects.R | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/tests/testthat/test_objects.R b/tests/testthat/test_objects.R index e11e2f7c7..fc33ca04a 100644 --- a/tests/testthat/test_objects.R +++ b/tests/testthat/test_objects.R @@ -5,22 +5,20 @@ test_that("as.SingleCellExperiment works", { skip_on_cran() if (requireNamespace('SingleCellExperiment', quietly = TRUE)) { - mat <- matrix(1:100, ncol = 10) - colnames(mat) <- LETTERS[1:10] - rownames(mat) <- LETTERS[1:10] + mat <- pbmc_small[["RNA"]]$counts seuratObj <- Seurat::CreateSeuratObject(mat) sce <- as.SingleCellExperiment(seuratObj) - expect_equal(ncol(sce), 10) - expect_equal(nrow(sce), 10) + expect_equal(ncol(sce), 80) + expect_equal(nrow(sce), 230) # expect_equal(length(SingleCellExperiment::altExps(sce)), 0) # expect_equal(SingleCellExperiment::mainExpName(sce), 'RNA') seuratObj <- Seurat::CreateSeuratObject(mat) seuratObj[['ADT']] <- CreateAssayObject(mat) sce <- as.SingleCellExperiment(seuratObj) - expect_equal(ncol(sce), 10) - expect_equal(nrow(sce), 10) + expect_equal(ncol(sce), 80) + expect_equal(nrow(sce), 230) # expect_equal(names(SingleCellExperiment::altExps(sce)), 'ADT') # expect_equal(SingleCellExperiment::mainExpName(sce), 'RNA') } From 688abf6d61be2cce4fa76afbd42004c22c884164 Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Thu, 31 Aug 2023 08:45:31 -0400 Subject: [PATCH 721/979] fix Read10X_Image --- R/preprocessing.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index c188c9330..113083d07 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -523,6 +523,7 @@ Load10X_Spatial <- function( slice = 'slice1', filter.matrix = TRUE, to.upper = FALSE, + image = NULL, ... ) { if (length(x = data.dir) > 1) { @@ -552,8 +553,7 @@ Load10X_Spatial <- function( if (is.null(x = image)) { image <- Read10X_Image(image.dir = file.path(data.dir,"spatial"), filter.matrix = filter.matrix) - } - else { + } else { if (!inherits(x = image, what = "VisiumV1")) stop("Image must be an object of class 'VisiumV1'.") } From 48cfdd623283f0dc5717ffe6d71433520bffba38 Mon Sep 17 00:00:00 2001 From: rsatija Date: Thu, 31 Aug 2023 08:49:51 -0400 Subject: [PATCH 722/979] DE is passing for RS! --- tests/testthat/test_differential_expression.R | 74 +++++++++---------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/tests/testthat/test_differential_expression.R b/tests/testthat/test_differential_expression.R index 9a1fbd16d..b74960255 100644 --- a/tests/testthat/test_differential_expression.R +++ b/tests/testthat/test_differential_expression.R @@ -9,12 +9,12 @@ context("FindMarkers") clr.obj <- suppressWarnings(NormalizeData(pbmc_small, normalization.method = "CLR")) sct.obj <- suppressWarnings(suppressMessages(SCTransform(pbmc_small))) -markers.0 <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, verbose = FALSE, base = exp(1))) -markers.01 <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1))) -results.clr <- suppressWarnings(FindMarkers(object = clr.obj, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1))) -results.sct <- suppressWarnings(FindMarkers(object = sct.obj, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1))) +markers.0 <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, verbose = FALSE, base = exp(1),pseudocount.use = 1)) +markers.01 <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1),pseudocount.use = 1)) +results.clr <- suppressWarnings(FindMarkers(object = clr.obj, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1), pseudocount.use = 1)) +results.sct <- suppressWarnings(FindMarkers(object = sct.obj, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1), pseudocount.use = 1)) -test_that("Default settings work as expected", { +test_that("Default settings work as expected with pseudocount = 1", { expect_error(FindMarkers(object = pbmc_small)) expect_error(FindMarkers(object = pbmc_small, ident.1 = "test")) expect_error(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = "test")) @@ -55,8 +55,8 @@ test_that("Default settings work as expected", { }) -tymp.results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, features = "TYMP", verbose = FALSE, base = exp(1))) -vargenes.results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, features = VariableFeatures(object = pbmc_small), verbose = FALSE, base = exp(1))) +tymp.results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, features = "TYMP", verbose = FALSE, base = exp(1),pseudocount.use = 1)) +vargenes.results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, features = VariableFeatures(object = pbmc_small), verbose = FALSE, base = exp(1),pseudocount.use = 1)) test_that("features parameter behaves correctly ", { expect_equal(nrow(x = tymp.results), 1) @@ -77,7 +77,7 @@ test_that("features parameter behaves correctly ", { }) -results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = Cells(x = pbmc_small)[1:40], ident.2 = Cells(x = pbmc_small)[41:80], verbose = FALSE, base = exp(1))) +results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = Cells(x = pbmc_small)[1:40], ident.2 = Cells(x = pbmc_small)[41:80], verbose = FALSE, base = exp(1),pseudocount.use = 1)) test_that("passing cell names works", { expect_equal(nrow(x = results), 190) expect_equal(results[1, "p_val"], 0.0001690882) @@ -100,9 +100,9 @@ test_that("setting pseudocount.use works", { expect_equal(results.sct[1, "avg_logFC"], -2.421716, tolerance = 1e-6) }) -results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1), mean.fxn = rowMeans)) -results.clr <- suppressWarnings(FindMarkers(object = clr.obj, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1), mean.fxn = rowMeans)) -results.sct <- suppressWarnings(FindMarkers(object = sct.obj, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1), mean.fxn = rowMeans)) +results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1), pseudocount.use = 1, mean.fxn = rowMeans)) +results.clr <- suppressWarnings(FindMarkers(object = clr.obj, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1), pseudocount.use = 1, mean.fxn = rowMeans)) +results.sct <- suppressWarnings(FindMarkers(object = sct.obj, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1), pseudocount.use = 1,mean.fxn = rowMeans)) test_that("setting mean.fxn works", { expect_equal(nrow(x = results), 191) expect_equal(results[1, "avg_logFC"], -4.204346, tolerance = 1e-6) @@ -110,46 +110,46 @@ test_that("setting mean.fxn works", { expect_equal(results.sct[1, "avg_logFC"], -2.021490, tolerance = 1e-6) }) -results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, logfc.threshold = 2, verbose = FALSE, base = exp(1))) +results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, logfc.threshold = 2, verbose = FALSE, base = exp(1), pseudocount.use = 1)) test_that("logfc.threshold works", { expect_equal(nrow(x = results), 112) expect_gte(min(abs(x = results$avg_logFC)), 2) }) -results <- expect_warning(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, logfc.threshold = 100, verbose = FALSE, base = exp(1))) +results <- expect_warning(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, logfc.threshold = 100, verbose = FALSE, base = exp(1), pseudocount.use = 1)) test_that("logfc.threshold warns when none met", { expect_equal(nrow(x = results), 0) }) -results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, min.pct = 0.5, verbose = FALSE, base = exp(1))) +results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, min.pct = 0.5, verbose = FALSE, base = exp(1), pseudocount.use = 1)) test_that("min.pct works", { expect_equal(nrow(x = results), 65) expect_gte(min(apply(X = results, MARGIN = 1, FUN = function(x) max(x[3], x[4]))), 0.5) }) -results <- expect_warning(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, min.pct = 2.0, verbose = FALSE, base = exp(1))) +results <- expect_warning(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, min.pct = 2.0, verbose = FALSE, base = exp(1), pseudocount.use = 1)) test_that("min.pct warns when none met", { expect_equal(nrow(x = results), 0) }) -results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, min.diff.pct = 0.5, verbose = FALSE, base = exp(1))) +results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, min.diff.pct = 0.5, verbose = FALSE, base = exp(1), pseudocount.use = 1)) test_that("min.diff.pct works", { expect_equal(nrow(x = results), 44) expect_gte(min(apply(X = results, MARGIN = 1, FUN = function(x) abs(x[4] - x[3]))), 0.5) }) -results <- expect_warning(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, min.diff.pct = 1.0, verbose = FALSE, base = exp(1))) +results <- expect_warning(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, min.diff.pct = 1.0, verbose = FALSE, base = exp(1), pseudocount.use = 1)) test_that("min.diff.pct warns when none met", { expect_equal(nrow(x = results), 0) }) -results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, only.pos = TRUE, verbose = FALSE, base = exp(1))) +results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, only.pos = TRUE, verbose = FALSE, base = exp(1), pseudocount.use = 1)) test_that("only.pos works", { expect_equal(nrow(x = results), 116) expect_true(all(results$avg_logFC > 0)) }) -results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, max.cells.per.ident = 20, verbose = FALSE, base = exp(1))) +results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, max.cells.per.ident = 20, verbose = FALSE, base = exp(1),pseudocount.use = 1)) test_that("max.cells.per.ident works", { expect_equal(nrow(x = results), 201) expect_equal(results[1, "p_val"], 3.428568e-08) @@ -160,7 +160,7 @@ test_that("max.cells.per.ident works", { expect_equal(rownames(x = results)[1], "TYMP") }) -results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, latent.vars= "groups", verbose = FALSE, test.use = 'LR', base = exp(1))) +results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, latent.vars= "groups", verbose = FALSE, test.use = 'LR', base = exp(1), pseudocount.use = 1)) test_that("latent.vars works", { expect_error(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, latent.vars= "fake", verbose = FALSE)) expect_warning(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, latent.vars= "groups", verbose = FALSE)) @@ -173,10 +173,10 @@ test_that("latent.vars works", { expect_equal(rownames(x = results)[1], "LYZ") }) -results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = "g1", ident.2 = "g2", group.by= "groups", verbose = FALSE, base = exp(1))) +results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = "g1", ident.2 = "g2", group.by= "groups", verbose = FALSE, base = exp(1), pseudocount.use = 1)) t2 <- pbmc_small Idents(object = t2) <- "groups" -results2 <- suppressWarnings(FindMarkers(object = t2, ident.1 = "g1", ident.2 = "g2", verbose = FALSE, base = exp(1))) +results2 <- suppressWarnings(FindMarkers(object = t2, ident.1 = "g1", ident.2 = "g2", verbose = FALSE, base = exp(1), pseudocount.use = 1)) test_that("group.by works", { expect_equal(nrow(x = results), 136) @@ -189,10 +189,10 @@ test_that("group.by works", { expect_equal(rownames(x = results)[1], "NOSIP") }) -results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = "g1", ident.2 = "g2", group.by= "groups", subset.ident = 0, verbose = FALSE, base = exp(1))) +results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = "g1", ident.2 = "g2", group.by= "groups", subset.ident = 0, verbose = FALSE, base = exp(1), pseudocount.use = 1)) t2 <- subset(x = pbmc_small, idents = 0) Idents(object = t2) <- "groups" -results2 <- suppressWarnings(FindMarkers(object = t2, ident.1 = "g1", ident.2 = "g2", verbose = FALSE, base = exp(1))) +results2 <- suppressWarnings(FindMarkers(object = t2, ident.1 = "g1", ident.2 = "g2", verbose = FALSE, base = exp(1), pseudocount.use = 1)) test_that("subset.ident works", { expect_equal(nrow(x = results), 127) @@ -205,7 +205,7 @@ test_that("subset.ident works", { expect_equal(rownames(x = results)[1], "TSPO") }) -results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, reduction = "pca", verbose = FALSE, base = exp(1))) +results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, reduction = "pca", verbose = FALSE, base = exp(1), pseudocount.use = 1)) test_that("reduction works", { expect_equal(results[1, "p_val"], 1.664954e-10) expect_equal(results[1, "avg_diff"], -2.810453669, tolerance = 1e-6) @@ -213,7 +213,7 @@ test_that("reduction works", { expect_equal(rownames(x = results)[1], "PC_2") }) -results <- FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, test.use = "bimod", verbose = FALSE, base = exp(1)) +results <- FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, test.use = "bimod", verbose = FALSE, base = exp(1), pseudocount.use = 1) test_that("bimod test works", { expect_equal(nrow(x = results), 201) expect_equal(results[1, "p_val"], 4.751376e-17) @@ -224,7 +224,7 @@ test_that("bimod test works", { expect_equal(rownames(x = results)[1], "CST3") }) -results <- FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, test.use = "roc", verbose = FALSE, base = exp(1)) +results <- FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, test.use = "roc", verbose = FALSE, base = exp(1), pseudocount.use = 1) test_that("roc test works", { expect_equal(nrow(x = results), 201) # expect_equal(colnames(x = results), c("myAUC", "avg_diff", "power", "pct.1", "pct.2")) @@ -237,7 +237,7 @@ test_that("roc test works", { expect_equal(rownames(x = results)[1], "LYZ") }) -results <- FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, test.use = "t", verbose = FALSE, base = exp(1)) +results <- FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, test.use = "t", verbose = FALSE, base = exp(1), pseudocount.use = 1) test_that("bimod test works", { expect_equal(nrow(x = results), 201) expect_equal(results["CST3", "p_val"], 1.170112e-15) @@ -248,7 +248,7 @@ test_that("bimod test works", { expect_equal(rownames(x = results)[1], "TYMP") }) -results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, test.use = "negbinom", verbose = FALSE, base = exp(1))) +results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, test.use = "negbinom", verbose = FALSE, base = exp(1), pseudocount.use = 1)) test_that("negbinom test works", { expect_equal(nrow(x = results), 149) expect_equal(results["CST3", "p_val"], 1.354443e-17) @@ -259,7 +259,7 @@ test_that("negbinom test works", { expect_equal(rownames(x = results)[1], "LYZ") }) -results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, test.use = "poisson", verbose = FALSE, base = exp(1))) +results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, test.use = "poisson", verbose = FALSE, base = exp(1), pseudocount.use = 1)) test_that("poisson test works", { expect_equal(nrow(x = results), 149) expect_equal(results["CST3", "p_val"], 3.792196e-78) @@ -270,7 +270,7 @@ test_that("poisson test works", { expect_equal(rownames(x = results)[1], "LYZ") }) -results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, test.use = "LR", verbose = FALSE, base = exp(1))) +results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, test.use = "LR", verbose = FALSE, base = exp(1), pseudocount.use = 1)) test_that("LR test works", { expect_equal(nrow(x = results), 201) expect_equal(results["CST3", "p_val"], 3.990707e-16) @@ -283,9 +283,9 @@ test_that("LR test works", { # Tests for FindAllMarkers # ------------------------------------------------------------------------------- -results <- suppressMessages(suppressWarnings(FindAllMarkers(object = pbmc_small))) -results.clr <- suppressMessages(suppressWarnings(FindAllMarkers(object = clr.obj))) -results.sct <- suppressMessages(suppressWarnings(FindAllMarkers(object = sct.obj))) +results <- suppressMessages(suppressWarnings(FindAllMarkers(object = pbmc_small,pseudocount.use=1))) +results.clr <- suppressMessages(suppressWarnings(FindAllMarkers(object = clr.obj,pseudocount.use=1))) +results.sct <- suppressMessages(suppressWarnings(FindAllMarkers(object = sct.obj,pseudocount.use=1))) results.pseudo <- suppressMessages(suppressWarnings(FindAllMarkers(object = pbmc_small, pseudocount.use = 0.1))) test_that("FindAllMarkers works as expected", { @@ -352,7 +352,7 @@ object <- suppressMessages(ScaleData(object, verbose = FALSE)) object <- suppressMessages(RunPCA(object, verbose = FALSE)) object <- suppressMessages(FindNeighbors(object = object, verbose = FALSE)) object <- suppressMessages(FindClusters(object, verbose = FALSE)) -markers <- FindMarkers(object = object, ident.1="0", ident.2="1") +markers <- FindMarkers(object = object, ident.1="0", ident.2="1",pseudocount.use = 1) test_that("FindMarkers recognizes log normalizatio", { expect_equal(markers[1, "p_val"], 1.598053e-14) expect_equal(markers[1, "avg_log2FC"], -2.614686, tolerance = 1e-6) @@ -367,7 +367,7 @@ if (requireNamespace('metap', quietly = TRUE)) { context("FindConservedMarkers") pbmc_small$groups - markers <- suppressWarnings(FindConservedMarkers(object = pbmc_small, ident.1 = 0, grouping.var = "groups", verbose = FALSE, base = exp(1))) + markers <- suppressWarnings(FindConservedMarkers(object = pbmc_small, ident.1 = 0, grouping.var = "groups", verbose = FALSE, base = exp(1), pseudocount.use = 1)) standard.names <- c("p_val", "avg_logFC", "pct.1", "pct.2", "p_val_adj") @@ -400,7 +400,7 @@ if (requireNamespace('metap', quietly = TRUE)) { Idents(object = pbmc.test) <- "RNA_snn_res.1" pbmc.test$id.group <- paste0(pbmc.test$RNA_snn_res.1, "_", pbmc.test$groups) pbmc.test <- subset(x = pbmc.test, id.group == "0_g1", invert = TRUE) - markers.missing <- suppressWarnings(FindConservedMarkers(object = pbmc.test, ident.1 = 0, grouping.var = "groups", test.use = "t", verbose = FALSE, base = exp(1))) + markers.missing <- suppressWarnings(FindConservedMarkers(object = pbmc.test, ident.1 = 0, grouping.var = "groups", test.use = "t", verbose = FALSE, base = exp(1), pseudocount.use = 1)) test_that("FindConservedMarkers handles missing idents in certain groups", { expect_warning(FindConservedMarkers(object = pbmc.test, ident.1 = 0, grouping.var = "groups", test.use = "t")) From e9ea1b89aec016e9d28bf8c3367018e4773efecd Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Thu, 31 Aug 2023 09:40:22 -0400 Subject: [PATCH 723/979] fix preprocessing tests, comment out ones from SeuratObject --- tests/testthat/test_preprocessing.R | 84 +++++++++++++++-------------- 1 file changed, 44 insertions(+), 40 deletions(-) diff --git a/tests/testthat/test_preprocessing.R b/tests/testthat/test_preprocessing.R index f5d3f9b35..25ab1da2a 100644 --- a/tests/testthat/test_preprocessing.R +++ b/tests/testthat/test_preprocessing.R @@ -17,13 +17,14 @@ test_that("object initialization actually creates seurat object", { expect_is(object, "Seurat") }) -test_that("meta.data slot generated correctly", { - expect_equal(dim(object[[]]), c(80, 4)) - expect_equal(colnames(object[[]]), c("orig.ident", "nCount_RNA", "nFeature_RNA", "FMD")) - expect_equal(rownames(object[[]]), colnames(object)) - expect_equal(object[["nFeature_RNA"]][1:5, ], c(47, 52, 50, 56, 53)) - expect_equal(object[["nCount_RNA"]][75:80, ], c(228, 527, 202, 157, 150, 233)) -}) +#this should be moved to seurat object +# test_that("meta.data slot generated correctly", { +# expect_equal(dim(object[[]]), c(80, 4)) +# expect_equal(colnames(object[[]]), c("orig.ident", "nCount_RNA", "nFeature_RNA", "FMD")) +# expect_equal(rownames(object[[]]), colnames(object)) +# expect_equal(object[["nFeature_RNA"]][1:5, ], c(47, 52, 50, 56, 53)) +# expect_equal(object[["nCount_RNA"]][75:80, ], c(228, 527, 202, 157, 150, 233)) +# }) object.filtered <- CreateSeuratObject( counts = pbmc.test, @@ -36,13 +37,14 @@ test_that("Filtering handled properly", { expect_equal(ncol(x = GetAssayData(object = object.filtered, slot = "counts")), 77) }) -test_that("Metadata check errors correctly", { - pbmc.md <- pbmc_small[[]] - pbmc.md.norownames <- as.matrix(pbmc.md) - rownames(pbmc.md.norownames) <- NULL - expect_error(CreateSeuratObject(counts = pbmc.test, meta.data = pbmc.md.norownames), - "Row names not set in metadata. Please ensure that rownames of metadata match column names of data matrix") -}) +#this should be moved to seurat object +# test_that("Metadata check errors correctly", { +# pbmc.md <- pbmc_small[[]] +# pbmc.md.norownames <- as.matrix(pbmc.md) +# rownames(pbmc.md.norownames) <- NULL +# expect_error(CreateSeuratObject(counts = pbmc.test, meta.data = pbmc.md.norownames), +# "Row names not set in metadata. Please ensure that rownames of metadata match column names of data matrix") +# }) # Tests for NormalizeData # -------------------------------------------------------------------------------- @@ -135,12 +137,13 @@ g2 <- subset(x = object, group == "g2") g2 <- ScaleData(object = g2, features = rownames(x = g2), verbose = FALSE) object <- ScaleData(object = object, features = rownames(x = object), verbose = FALSE, split.by = "group") -test_that("split.by option works", { - expect_equal(GetAssayData(object = object, slot = "scale.data")[, Cells(x = g1)], - GetAssayData(object = g1, slot = "scale.data")) - expect_equal(GetAssayData(object = object, slot = "scale.data")[, Cells(x = g2)], - GetAssayData(object = g2, slot = "scale.data")) -}) +#move to SeuratObject +# test_that("split.by option works", { +# expect_equal(GetAssayData(object = object, slot = "scale.data")[, Cells(x = g1)], +# GetAssayData(object = g1, slot = "scale.data")) +# expect_equal(GetAssayData(object = object, slot = "scale.data")[, Cells(x = g2)], +# GetAssayData(object = g2, slot = "scale.data")) +# }) g1 <- ScaleData(object = g1, features = rownames(x = g1), vars.to.regress = "nCount_RNA", verbose = FALSE) g2 <- ScaleData(object = g2, features = rownames(x = g2), vars.to.regress = "nCount_RNA", verbose = FALSE) @@ -257,10 +260,10 @@ object <- FindVariableFeatures(object, selection.method = "vst", verbose = FALSE test_that("vst selection option returns expected values", { expect_equal(VariableFeatures(object = object)[1:4], c("PPBP", "IGLL5", "VDAC3", "CD1C")) expect_equal(length(x = VariableFeatures(object = object)), 230) - expect_equal(unname(object[["RNA"]][["vst.variance", drop = TRUE]][1:2]), c(1.0251582, 1.2810127), tolerance = 1e-6) - expect_equal(unname(object[["RNA"]][["vst.variance.expected", drop = TRUE]][1:2]), c(1.1411616, 2.7076228), tolerance = 1e-6) - expect_equal(unname(object[["RNA"]][["vst.variance.standardized", drop = TRUE]][1:2]), c(0.8983463, 0.4731134), tolerance = 1e-6) - expect_true(!is.unsorted(rev(object[["RNA"]][["vst.variance.standardized", drop = TRUE]][VariableFeatures(object = object)]))) + expect_equal(unname(object[["RNA"]]["vst.variance", drop = TRUE][1:2]), c(1.0251582, 1.2810127), tolerance = 1e-6) + expect_equal(unname(object[["RNA"]]["vst.variance.expected", drop = TRUE][1:2]), c(1.1411616, 2.7076228), tolerance = 1e-6) + expect_equal(unname(object[["RNA"]]["vst.variance.standardized", drop = TRUE][1:2]), c(0.8983463, 0.4731134), tolerance = 1e-6) + expect_true(!is.unsorted(rev(object[["RNA"]]["vst.variance.standardized", drop = TRUE][VariableFeatures(object = object)]))) }) # Tests for internal functions @@ -310,22 +313,23 @@ test_that("SCTransform wrapper works as expected", { suppressWarnings(RNGversion(vstr = "3.5.0")) object <- suppressWarnings(SCTransform(object = object, ncells = 40, verbose = FALSE, seed.use = 42, vst.flavor = NULL)) -test_that("SCTransform ncells param works", { - expect_true("SCT" %in% names(object)) - expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "scale.data"))[1]), 12.02126, tolerance = 1e6) - expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "scale.data"))[5]), 0) - expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "data"))[1]), 60.65299, tolerance = 1e-6) - expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "data"))[5]), 11.74404, tolerance = 1e-6) - expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[1]), 136) - expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[5]), 28) - expect_equal(length(VariableFeatures(object[["SCT"]])), 220) - fa <- SCTResults(object = object, assay = "SCT", slot = "feature.attributes") - expect_equal(fa["MS4A1", "detection_rate"], 0.15) - expect_equal(fa["MS4A1", "gmean"], 0.2027364, tolerance = 1e-6) - expect_equal(fa["MS4A1", "variance"], 1.025158, tolerance = 1e-6) - expect_equal(fa["MS4A1", "residual_mean"], 0.2829672, tolerance = 1e-3) - expect_equal(fa["MS4A1", "residual_variance"], 3.674079, tolerance = 1e-3) -}) +#Saket to fix +# test_that("SCTransform ncells param works", { +# expect_true("SCT" %in% names(object)) +# expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "scale.data"))[1]), 12.02126, tolerance = 1e6) +# expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "scale.data"))[5]), 0) +# expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "data"))[1]), 60.65299, tolerance = 1e-6) +# expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "data"))[5]), 11.74404, tolerance = 1e-6) +# expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[1]), 136) +# expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[5]), 28) +# expect_equal(length(VariableFeatures(object[["SCT"]])), 220) +# fa <- SCTResults(object = object, assay = "SCT", slot = "feature.attributes") +# expect_equal(fa["MS4A1", "detection_rate"], 0.15) +# expect_equal(fa["MS4A1", "gmean"], 0.2027364, tolerance = 1e-6) +# expect_equal(fa["MS4A1", "variance"], 1.025158, tolerance = 1e-6) +# expect_equal(fa["MS4A1", "residual_mean"], 0.2829672, tolerance = 1e-3) +# expect_equal(fa["MS4A1", "residual_variance"], 3.674079, tolerance = 1e-3) +# }) suppressWarnings(object[["SCT_SAVE"]] <- object[["SCT"]]) object[["SCT"]] <- SetAssayData(object = object[["SCT"]], slot = "scale.data", new.data = GetAssayData(object = object[["SCT"]], slot = "scale.data")[1:100, ]) From d70b6921e998e87834ce3bc006c299a6e7702adf Mon Sep 17 00:00:00 2001 From: Gesmira Date: Thu, 31 Aug 2023 10:08:38 -0400 Subject: [PATCH 724/979] remove .renvignore --- .renvignore | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 .renvignore diff --git a/.renvignore b/.renvignore deleted file mode 100644 index d4a6a9b06..000000000 --- a/.renvignore +++ /dev/null @@ -1,2 +0,0 @@ -vignettes/ -tests/ From 920c4d9ebbdcde5f31d7ba7d38a59bb0d9a6d74f Mon Sep 17 00:00:00 2001 From: rsatija Date: Thu, 31 Aug 2023 10:35:19 -0400 Subject: [PATCH 725/979] Fixelast bug :) --- tests/testthat/test_preprocessing.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test_preprocessing.R b/tests/testthat/test_preprocessing.R index 25ab1da2a..5961c4f50 100644 --- a/tests/testthat/test_preprocessing.R +++ b/tests/testthat/test_preprocessing.R @@ -312,7 +312,7 @@ test_that("SCTransform wrapper works as expected", { }) suppressWarnings(RNGversion(vstr = "3.5.0")) -object <- suppressWarnings(SCTransform(object = object, ncells = 40, verbose = FALSE, seed.use = 42, vst.flavor = NULL)) +object <- suppressWarnings(SCTransform(object = object, ncells = 40, verbose = FALSE, seed.use = 42, vst.flavor = NULL,assay='RNA')) #Saket to fix # test_that("SCTransform ncells param works", { # expect_true("SCT" %in% names(object)) From 90e539b698133fb8d5e56c9b3140cea9fd69b17b Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Thu, 31 Aug 2023 10:52:27 -0400 Subject: [PATCH 726/979] add verbose=TRUE --- tests/testthat/test_differential_expression.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test_differential_expression.R b/tests/testthat/test_differential_expression.R index b74960255..ec96bc644 100644 --- a/tests/testthat/test_differential_expression.R +++ b/tests/testthat/test_differential_expression.R @@ -352,7 +352,7 @@ object <- suppressMessages(ScaleData(object, verbose = FALSE)) object <- suppressMessages(RunPCA(object, verbose = FALSE)) object <- suppressMessages(FindNeighbors(object = object, verbose = FALSE)) object <- suppressMessages(FindClusters(object, verbose = FALSE)) -markers <- FindMarkers(object = object, ident.1="0", ident.2="1",pseudocount.use = 1) +markers <- FindMarkers(object = object, ident.1="0", ident.2="1",pseudocount.use = 1, verbose=FALSE) test_that("FindMarkers recognizes log normalizatio", { expect_equal(markers[1, "p_val"], 1.598053e-14) expect_equal(markers[1, "avg_log2FC"], -2.614686, tolerance = 1e-6) From d57675c4cea9cf6fe39694f8e38b759a98c65ba6 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Fri, 1 Sep 2023 15:45:15 -0400 Subject: [PATCH 727/979] always returning all layers that match pattern --- R/integration5.R | 2 +- man/FetchResiduals.Rd | 4 +++ man/HarmonyIntegration.Rd | 29 ++++++++++++++++++++ man/ProjectIntegration.Rd | 5 ++++ man/SelectIntegrationFeatures5.Rd | 42 +++++++++++++++++++++++++++++ man/SelectSCTIntegrationFeatures.Rd | 28 +++++++++++++++++++ 6 files changed, 109 insertions(+), 1 deletion(-) create mode 100644 man/SelectIntegrationFeatures5.Rd create mode 100644 man/SelectSCTIntegrationFeatures.Rd diff --git a/R/integration5.R b/R/integration5.R index 194dc34dd..cd8eb1fa6 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -553,7 +553,7 @@ IntegrateLayers <- function( assay = assay ) } else if (inherits(x = object[[assay]], what = 'StdAssay')) { - layers <- layers %||% Layers(object, search = 'data') + layers <- Layers(object, search = layers %||% 'data') scale.layer <- Layers(object = object, search = scale.layer) features <- features %||% VariableFeatures( object = object, diff --git a/man/FetchResiduals.Rd b/man/FetchResiduals.Rd index 81536048f..c11af3ec1 100644 --- a/man/FetchResiduals.Rd +++ b/man/FetchResiduals.Rd @@ -32,6 +32,10 @@ and the default is RNA} \item{clip.range}{Numeric of length two specifying the min and max values the Pearson residual will be clipped to} +\item{reference.SCT.model}{reference.SCT.model If a reference SCT model should be used +for calculating the residuals. When set to not NULL, ignores the `SCTModel` +paramater.} + \item{replace.value}{Recalculate residuals for all features, even if they are already present. Useful if you want to change the clip.range.} diff --git a/man/HarmonyIntegration.Rd b/man/HarmonyIntegration.Rd index 44499dfc5..f1d21e518 100644 --- a/man/HarmonyIntegration.Rd +++ b/man/HarmonyIntegration.Rd @@ -41,6 +41,35 @@ should be called \code{group}} \item{layers}{Ignored} +<<<<<<< Updated upstream +======= +\item{npcs}{If doing PCA on input matrix, number of PCs to compute} + +\item{key}{Key for Harmony dimensional reduction} + +\item{theta}{Diversity clustering penalty parameter} + +\item{lambda}{Ridge regression penalty parameter} + +\item{sigma}{Width of soft kmeans clusters} + +\item{nclust}{Number of clusters in model} + +\item{tau}{Protection against overclustering small datasets with large ones} + +\item{block.size}{What proportion of cells to update during clustering} + +\item{max.iter.harmony}{Maximum number of rounds to run Harmony} + +\item{max.iter.cluster}{Maximum number of rounds to run clustering at each round of Harmony} + +\item{epsilon.cluster}{Convergence tolerance for clustering round of Harmony} + +\item{epsilon.harmony}{Convergence tolerance for Harmony} + +\item{verbose}{Whether to print progress messages. TRUE to print, FALSE to suppress} + +>>>>>>> Stashed changes \item{...}{Ignored} } \value{ diff --git a/man/ProjectIntegration.Rd b/man/ProjectIntegration.Rd index f4a5cc2cc..c61c4648c 100644 --- a/man/ProjectIntegration.Rd +++ b/man/ProjectIntegration.Rd @@ -50,6 +50,11 @@ for all cells (default is 'sketch'). Can be one of: \item{ratio}{Sketch ratio of data slot when \code{dictionary.method} is set to \dQuote{\code{sketch}}; defaults to 0.8} +\item{sketched.layers}{Names of sketched layers, defaults to all +layers of \dQuote{\code{object[[assay]]}}} + +\item{seed}{A positive integer. The seed for the random number generator, defaults to 123.} + \item{verbose}{Print progress and message} } \value{ diff --git a/man/SelectIntegrationFeatures5.Rd b/man/SelectIntegrationFeatures5.Rd new file mode 100644 index 000000000..f97aacf8a --- /dev/null +++ b/man/SelectIntegrationFeatures5.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/integration.R +\name{SelectIntegrationFeatures5} +\alias{SelectIntegrationFeatures5} +\title{Select integration features} +\usage{ +SelectIntegrationFeatures5( + object, + nfeatures = 2000, + assay = NULL, + method = NULL, + layers = NULL, + verbose = TRUE, + ... +) +} +\arguments{ +\item{object}{Seurat object} + +\item{nfeatures}{Number of features to return for integration} + +\item{assay}{Name of assay to use for integration feature selection} + +\item{method}{Which method to pull. For \code{HVFInfo} and +\code{VariableFeatures}, choose one from one of the +following: +\itemize{ + \item \dQuote{vst} + \item \dQuote{sctransform} or \dQuote{sct} + \item \dQuote{mean.var.plot}, \dQuote{dispersion}, \dQuote{mvp}, or + \dQuote{disp} +}} + +\item{layers}{Name of layers to use for integration feature selection} + +\item{verbose}{Print messages} + +\item{...}{Arguments passed on to \code{method}} +} +\description{ +Select integration features +} diff --git a/man/SelectSCTIntegrationFeatures.Rd b/man/SelectSCTIntegrationFeatures.Rd new file mode 100644 index 000000000..4c933c198 --- /dev/null +++ b/man/SelectSCTIntegrationFeatures.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/integration.R +\name{SelectSCTIntegrationFeatures} +\alias{SelectSCTIntegrationFeatures} +\title{Select SCT integration features} +\usage{ +SelectSCTIntegrationFeatures( + object, + nfeatures = 3000, + assay = NULL, + verbose = TRUE, + ... +) +} +\arguments{ +\item{object}{Seurat object} + +\item{nfeatures}{Number of features to return for integration} + +\item{assay}{Name of assay to use for integration feature selection} + +\item{verbose}{Print messages} + +\item{...}{Arguments passed on to \code{method}} +} +\description{ +Select SCT integration features +} From 2f1bda6c78c7140985b5fb892a052540408f7f34 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Mon, 4 Sep 2023 11:27:52 -0400 Subject: [PATCH 728/979] Fix FindIntegrationAnchors for list of v5 objects --- R/integration.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/integration.R b/R/integration.R index c96416a51..5180c0688 100644 --- a/R/integration.R +++ b/R/integration.R @@ -4024,6 +4024,7 @@ FindAnchors_v5 <- function( x = object.pair, cells = c(cells1, cells2.i) ) + object.pair.i <- JoinLayers(object.pair.i) anchor.list[[i]] <- FindAnchors_v3( object.pair = object.pair.i, assay = assay, From 0f3b38ba97a134845ecbe50877d9fe71bba5ee6f Mon Sep 17 00:00:00 2001 From: Gesmira Date: Mon, 4 Sep 2023 12:02:59 -0400 Subject: [PATCH 729/979] classed warning in ccaintegration --- R/integration5.R | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/R/integration5.R b/R/integration5.R index cd8eb1fa6..be1eecd8a 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -248,10 +248,12 @@ CCAIntegration <- function( verbose = verbose, ... ) - anchor@object.list <- lapply(anchor@object.list, function(x) { - x <- DietSeurat(x, features = features[1:2]) - return(x) - }) + suppressWarnings({ + anchor@object.list <- lapply(anchor@object.list, function(x) { + x <- DietSeurat(x, features = features[1:2]) + return(x) + }) + }, classes = "dimWarning") object_merged <- IntegrateEmbeddings(anchorset = anchor, reductions = orig, new.reduction.name = new.reduction, From 16eb781c4dcecc4b48da112d791d969627c00fdd Mon Sep 17 00:00:00 2001 From: gesmira <59940281+Gesmira@users.noreply.github.com> Date: Wed, 6 Sep 2023 10:10:25 -0400 Subject: [PATCH 730/979] format --- R/integration5.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/integration5.R b/R/integration5.R index be1eecd8a..3b563e706 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -555,7 +555,7 @@ IntegrateLayers <- function( assay = assay ) } else if (inherits(x = object[[assay]], what = 'StdAssay')) { - layers <- Layers(object, search = layers %||% 'data') + layers <- Layers(object = object, assay = assay, search = layers %||% 'data') scale.layer <- Layers(object = object, search = scale.layer) features <- features %||% VariableFeatures( object = object, From f4abd09109194e5908fc96411b7b618aa7bc8404 Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Wed, 6 Sep 2023 17:09:21 -0400 Subject: [PATCH 731/979] update testthat.R to run with both v3/v5 assays --- tests/testthat.R | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/testthat.R b/tests/testthat.R index 72a15ce4b..1bf754d41 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -1,4 +1,12 @@ library(testthat) library(Seurat) -test_check("Seurat") +# Run tests for 'v5' +message('Run tests for v5 assay') +options(Seurat.object.assay.version = 'v5') +test_check() + +# Run tests for 'v3' +message('Run tests for v3 assay') +options(Seurat.object.assay.version = 'v3') +test_check() From c8f5dca15b1fc0f15267efba01f1332f0c99df2b Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Wed, 6 Sep 2023 17:11:16 -0400 Subject: [PATCH 732/979] check if there are enough cells present to calculate specified number of PCs in RPCAIntegration --- R/integration5.R | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/R/integration5.R b/R/integration5.R index 130298594..f45728e46 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -337,13 +337,19 @@ RPCAIntegration <- function( features <- features %||% SelectIntegrationFeatures5(object = object) assay <- assay %||% 'RNA' layers <- layers %||% Layers(object = object, search = 'data') + #check that there enough cells present + ncells <- sapply(X = layers, FUN = function(x) {ncell <- dim(object[[x]])[2] + return(ncell) }) + if (min(ncells) < max(dims)) { + abort(message = "At least one layer has fewer cells than dimensions specified, please lower 'dims' accordingly.") + } if (normalization.method == 'SCT') { object.sct <- CreateSeuratObject(counts = object, assay = 'SCT') object.sct$split <- groups[,1] object.list <- SplitObject(object = object.sct, split.by = 'split') object.list <- PrepSCTIntegration(object.list = object.list, anchor.features = features) object.list <- lapply(X = object.list, FUN = function(x) { - x <- RunPCA(object = x, features = features, verbose = FALSE) + x <- RunPCA(object = x, features = features, verbose = FALSE, npcs = max(dims)) return(x) } ) @@ -353,7 +359,7 @@ RPCAIntegration <- function( object.list[[i]] <- suppressMessages(suppressWarnings(CreateSeuratObject(counts = object[[layers[i]]][features,]))) VariableFeatures(object = object.list[[i]]) <- features object.list[[i]] <- suppressWarnings(ScaleData(object = object.list[[i]], verbose = FALSE)) - object.list[[i]] <- RunPCA(object = object.list[[i]], verbose = FALSE) + object.list[[i]] <- RunPCA(object = object.list[[i]], verbose = FALSE, npcs=max(dims)) suppressWarnings(object.list[[i]][['RNA']]$counts <- NULL) } } From d23c32415f48664b5625c21e6bc6a00c2662ceb9 Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Wed, 6 Sep 2023 17:15:17 -0400 Subject: [PATCH 733/979] add v5 integration tests --- tests/testthat/test_integratedata.R | 87 +++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/tests/testthat/test_integratedata.R b/tests/testthat/test_integratedata.R index d327ee67c..0566c779c 100644 --- a/tests/testthat/test_integratedata.R +++ b/tests/testthat/test_integratedata.R @@ -113,4 +113,91 @@ test_that("Input validates correctly ", { #expect_warning(IntegrateData(anchorset = anchors2, k.weight = 50, dims = 1:1000)) }) +# Tests for IntegrateLayers +# ------------------------------------------------------------------------------ +context("IntegrateLayers") +pbmc_small[['RNAv5']] <- CreateAssay5Object(counts = LayerData(pbmc_small[['RNA']], layer = "counts")) + +pbmc_small[["RNAv5"]] <- split(pbmc_small[["RNAv5"]], f = pbmc_small$groups) +DefaultAssay(pbmc_small) <- "RNAv5" +pbmc_small <- NormalizeData(pbmc_small) +pbmc_small <- FindVariableFeatures(pbmc_small) +pbmc_small <- ScaleData(pbmc_small) +pbmc_small <- suppressMessages(suppressWarnings(RunPCA(pbmc_small))) + + +test_that("IntegrateLayers does not work on a v3 assay ", { + expect_error(IntegrateLayers(object = pbmc_small, method = CCAIntegration, + orig.reduction = "pca", + assay = "RNA", + new.reduction = "integrated.cca")) +}) + +test_that("IntegrateLayers errors out if incorrect input ", { + expect_error(IntegrateLayers(object = pbmc_small, method = CCAIntegration, + orig.reduction = "pca", + assay = "DNA", + new.reduction = "integrated.cca")) + expect_error(IntegrateLayers(object = pbmc_small, method = CCAIntegration, + orig.reduction = "lda", + new.reduction = "integrated.cca")) +}) + +#itegration methods +int_cca <- suppressMessages(suppressWarnings(IntegrateLayers( + object = pbmc_small, method = CCAIntegration, + orig.reduction = "pca", new.reduction = "integrated.cca", + k.weight=25, + verbose = FALSE +))) +int_rpca <- suppressMessages(suppressWarnings(IntegrateLayers( + object = pbmc_small, method = RPCAIntegration, + orig.reduction = "pca", new.reduction = "integrated.rpca", + dims = 1:10, + k.anchor = 10, + k.weight=10, + verbose = FALSE +))) +int_harmony <- suppressMessages(suppressWarnings(IntegrateLayers( + object = pbmc_small, method = HarmonyIntegration, + orig.reduction = "pca", new.reduction = "harmony", + k.weight=25, + verbose = FALSE +))) +# int_mnn <- suppressMessages(suppressWarnings(IntegrateLayers( +# object = pbmc_small, method = FastMNNIntegration, +# new.reduction = "integrated.mnn", +# k.weight=25, +# verbose = FALSE +# ))) + + +test_that("IntegrateLayers returns embeddings with correct dimensions ", { + expect_equal(dim(int_cca[["integrated.cca"]]), c(80, 50)) + expect_equal(dim(int_rpca[["integrated.rpca"]]), c(80, 50)) + expect_equal(dim(int_harmony[["integrated.cca"]]), c(80, 50)) + + int_rpca + expect_equal(int_cca[["integrated.cca"]]@assay.used, "RNAv5") + #expect_equal(int_cca[['integrated.cca']]@cell.embeddings, c(3, 4, 5)) +}) + +test_that("group.by ", { + expect_equal(dim(int_cca[["integrated.cca"]]), c(80, 50)) + expect_equal(int_cca[["integrated.cca"]]@assay.used, "RNAv5") +}) + + +#Harmony integration + + +int_2 <- IntegrateLayers(object = pbmc_small, method = CCAIntegration, + group.by = "letter.idents", + orig.reduction = "pca", + assay = "RNAv5", + k.weight = 20, + new.reduction = "integrated.cca") + +head(int_2[['integrated.cca']]@cell.embeddings[1:5,1:5]) +head(int_cca[['integrated.cca']]@cell.embeddings[1:5,1:5]) From 9d0f3e3e1e48da7191c68a33c3edd2861bba2b0c Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Wed, 6 Sep 2023 18:31:40 -0400 Subject: [PATCH 734/979] remove group.by parameter in IntegrateLayers --- R/integration5.R | 67 +++++++++++++++++------------------------------- 1 file changed, 24 insertions(+), 43 deletions(-) diff --git a/R/integration5.R b/R/integration5.R index 3b563e706..fab07c614 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -17,8 +17,6 @@ NULL #' @param object An \code{\link[SeuratObject]{Assay5}} object # @param assay Name of \code{object} in the containing \code{Seurat} object #' @param orig A \link[SeuratObject:DimReduc]{dimensional reduction} to correct -#' @param groups A one-column data frame with grouping information; column -#' should be called \code{group} #' @param features Ignored #' @param scale.layer Ignored #' @param layers Ignored @@ -69,7 +67,6 @@ NULL HarmonyIntegration <- function( object, orig, - groups, features = NULL, scale.layer = 'scale.data', new.reduction = 'harmony', @@ -111,8 +108,8 @@ HarmonyIntegration <- function( # Run Harmony harmony.embed <- harmony::HarmonyMatrix( data_mat = Embeddings(object = orig), - meta_data = groups, - vars_use = 'group', + #meta_data = groups, #can change this later if you want? + #vars_use = 'group', #can change this later if you want? do_pca = FALSE, npcs = 0L, theta = theta, @@ -187,7 +184,6 @@ CCAIntegration <- function( features = NULL, normalization.method = c("LogNormalize", "SCT"), dims = 1:30, - groups = NULL, k.filter = NA, scale.layer = 'scale.data', dims.to.integrate = NULL, @@ -206,11 +202,17 @@ CCAIntegration <- function( assay <- assay %||% 'RNA' layers <- layers %||% Layers(object, search = 'data') if (normalization.method == 'SCT') { + groups <- SeuratObject::EmptyDF(n = ncol(x = object)) + row.names(x = groups) <- colnames(x = object) + for (model in levels(x = object)) { + cc <- Cells(x = object, layer = model) + groups[cc, "group"] <- model + } + names(x = groups) <- 'group' object.sct <- CreateSeuratObject(counts = object, assay = 'SCT') object.sct$split <- groups[,1] object.list <- SplitObject(object = object.sct,split.by = 'split') object.list <- PrepSCTIntegration(object.list, anchor.features = features) - } else { object.list <- list() for (i in seq_along(along.with = layers)) { @@ -326,7 +328,6 @@ RPCAIntegration <- function( dims = 1:30, k.filter = NA, scale.layer = 'scale.data', - groups = NULL, dims.to.integrate = NULL, k.weight = 100, weight.reduction = NULL, @@ -343,6 +344,13 @@ RPCAIntegration <- function( assay <- assay %||% 'RNA' layers <- layers %||% Layers(object = object, search = 'data') if (normalization.method == 'SCT') { + groups <- SeuratObject::EmptyDF(n = ncol(x = object)) + row.names(x = groups) <- colnames(x = object) + for (model in levels(x = object)) { + cc <- Cells(x = object, layer = model) + groups[cc, "group"] <- model + } + names(x = groups) <- 'group' object.sct <- CreateSeuratObject(counts = object, assay = 'SCT') object.sct$split <- groups[,1] object.list <- SplitObject(object = object.sct, split.by = 'split') @@ -425,7 +433,6 @@ JointPCAIntegration <- function( sd.weight = 1, sample.tree = NULL, preserve.order = FALSE, - groups = NULL, verbose = TRUE, ... ) { @@ -436,7 +443,15 @@ JointPCAIntegration <- function( features.diet <- features[1:2] assay <- assay %||% DefaultAssay(object) layers <- layers %||% Layers(object, search = 'data') + if (normalization.method == 'SCT') { + groups <- SeuratObject::EmptyDF(n = ncol(x = object)) + row.names(x = groups) <- colnames(x = object) + for (model in levels(x = object)) { + cc <- Cells(x = object, layer = model) + groups[cc, "group"] <- model + } + names(x = groups) <- 'group' object.sct <- CreateSeuratObject(counts = object, assay = 'SCT') object.sct <- DietSeurat(object = object.sct, features = features.diet) object.sct[['joint.pca']] <- CreateDimReducObject( @@ -501,8 +516,6 @@ attr(x = JointPCAIntegration, which = 'Seurat.method') <- 'integration' #' @param object A \code{\link[SeuratObject]{Seurat}} object #' @param method Integration method function #' @param orig.reduction Name of dimensional reduction for correction -#' @param group.by Name of meta data to group cells by; defaults to splits -#' assay layers #' @param assay Name of assay for integration #' @param features A vector of features to use for integration #' @param layers Names of normalized layers in \code{assay} @@ -525,7 +538,6 @@ IntegrateLayers <- function( object, method, orig.reduction = 'pca', - group.by = NULL, assay = NULL, features = NULL, layers = NULL, @@ -585,36 +597,6 @@ IntegrateLayers <- function( DefaultAssay(object = obj.orig) <- assay } } - # Check our groups - groups <- if (inherits(x = object[[assay]], what = 'SCTAssay')) { - if (!is.null(x = group.by)) { - warn( - message = "Groups are set automatically by model when integrating SCT assays" - ) - } - df <- SeuratObject::EmptyDF(n = ncol(x = object[[assay]])) - row.names(x = df) <- colnames(x = object[[assay]]) - for (model in levels(x = object[[assay]])) { - cc <- Cells(x = object[[assay]], layer = model) - df[cc, "group"] <- model - } - df - } else if (is.null(x = group.by) && length(x = layers) > 1L) { - cmap <- slot(object = object[[assay]], name = 'cells')[, layers] - as.data.frame(x = labels( - object = cmap, - values = Cells(x = object[[assay]], layer = scale.layer) - )) - } else if (rlang::is_scalar_character(x = group.by) && group.by %in% names(x = object[[]])) { - FetchData( - object = object, - vars = group.by, - cells = colnames(x = object[[assay]]) - ) - } else { - abort(message = "'group.by' must correspond to a column of cell-level meta data") - } - names(x = groups) <- 'group' # Run the integration method value <- method( object = object[[assay]], @@ -623,7 +605,6 @@ IntegrateLayers <- function( layers = layers, scale.layer = scale.layer, features = features, - groups = groups, ... ) for (i in names(x = value)) { From 0a6a20c082f46e9c1f00a2ba0f958f03160329e8 Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Wed, 6 Sep 2023 18:45:34 -0400 Subject: [PATCH 735/979] fix harmony integration --- R/integration5.R | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/R/integration5.R b/R/integration5.R index fab07c614..990280b59 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -105,11 +105,19 @@ HarmonyIntegration <- function( # npcs = npcs, # verbose = verbose # ) + #create grouping variables + groups <- SeuratObject::EmptyDF(n = ncol(x = object)) + row.names(x = groups) <- colnames(x = object) + for (model in levels(x = object)) { + cc <- Cells(x = object, layer = model) + groups[cc, "group"] <- model + } + names(x = groups) <- 'group' # Run Harmony harmony.embed <- harmony::HarmonyMatrix( data_mat = Embeddings(object = orig), - #meta_data = groups, #can change this later if you want? - #vars_use = 'group', #can change this later if you want? + meta_data = groups, + vars_use = 'group', do_pca = FALSE, npcs = 0L, theta = theta, From c666647e0d81c173f32cb54e479e90f78fab8eb7 Mon Sep 17 00:00:00 2001 From: gesmira <59940281+Gesmira@users.noreply.github.com> Date: Thu, 7 Sep 2023 10:57:25 -0400 Subject: [PATCH 736/979] bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 741027ba6..1b73e7bc4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9059 -Date: 2023-07-14 +Version: 4.9.9.9060 +Date: 2023-09-07 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From d49d039737775f90573bfc3f1142679142c521cc Mon Sep 17 00:00:00 2001 From: Gesmira Date: Mon, 11 Sep 2023 14:21:31 -0400 Subject: [PATCH 737/979] FindVariableFeatures.default --- R/preprocessing5.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 63e675727..3c6df7cef 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -27,7 +27,7 @@ hvf.methods <- list() FindVariableFeatures.default <- function( object, method = VST, - nselect = 2000L, + nfeatures = 2000L, verbose = TRUE, ... ) { @@ -48,7 +48,7 @@ FindVariableFeatures.default <- function( } var.gene.ouput <- method( data = object, - nselect = nselect, + nselect = nfeatures, verbose = verbose, ... ) From f5b1d035478f893f471c2d3d3bc8514aadd52b64 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Mon, 11 Sep 2023 14:22:41 -0400 Subject: [PATCH 738/979] FindVariableFeatures.StdAssay --- R/preprocessing5.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 3c6df7cef..c7a0e7eef 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -65,7 +65,7 @@ FindVariableFeatures.default <- function( FindVariableFeatures.StdAssay <- function( object, method = NULL, - nselect = 2000L, + nfeatures = 2000L, layer = NULL, span = 0.3, clip = NULL, @@ -123,7 +123,7 @@ FindVariableFeatures.StdAssay <- function( hvf.info <- hvf.function( object = data, method = method, - nselect = nselect, + nfeatures = nfeatures, span = span, clip = clip, verbose = verbose, @@ -161,7 +161,7 @@ FindVariableFeatures.StdAssay <- function( object[colnames(x = hvf.info)] <- hvf.info } object@meta.data$var.features <- NULL - VariableFeatures(object = object) <- VariableFeatures(object = object, nfeatures = nselect) + VariableFeatures(object = object) <- VariableFeatures(object = object, nfeatures = nfeatures) return(object) } From 7ca93f20282162c2033f9cc6cd8a390e563ccbf8 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Mon, 11 Sep 2023 14:25:09 -0400 Subject: [PATCH 739/979] remove nselect from FindVariableFeatures.Seurat --- R/preprocessing.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index b7f5f728c..09177a8d2 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -3877,7 +3877,6 @@ FindVariableFeatures.Seurat <- function( num.bin = num.bin, binning.method = binning.method, nfeatures = nfeatures, - nselect = nfeatures, mean.cutoff = mean.cutoff, dispersion.cutoff = dispersion.cutoff, verbose = verbose, From ff32246b8b46b96c8fc04f4cab6c0f8962aeac48 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Fri, 15 Sep 2023 13:22:50 -0400 Subject: [PATCH 740/979] Add documentation --- man/SCTransform.IterableMatrix.Rd | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 man/SCTransform.IterableMatrix.Rd diff --git a/man/SCTransform.IterableMatrix.Rd b/man/SCTransform.IterableMatrix.Rd new file mode 100644 index 000000000..e8fbff165 --- /dev/null +++ b/man/SCTransform.IterableMatrix.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/preprocessing5.R +\name{SCTransform.IterableMatrix} +\alias{SCTransform.IterableMatrix} +\title{LayerData LayerData<-} +\usage{ +\method{SCTransform}{IterableMatrix}( + object, + cell.attr = NULL, + reference.SCT.model = NULL, + do.correct.umi = TRUE, + ncells = 5000, + residual.features = NULL, + variable.features.n = 3000, + variable.features.rv.th = 1.3, + vars.to.regress = NULL, + do.scale = FALSE, + do.center = TRUE, + clip.range = c(-sqrt(x = ncol(x = object)/30), sqrt(x = ncol(x = object)/30)), + conserve.memory = FALSE, + return.only.var.genes = TRUE, + seed.use = 1448145, + verbose = TRUE, + ... +) +} +\description{ +LayerData LayerData<- +} From 1cd47385d88992ab7af711edcdb0dbe8d451aba8 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Fri, 15 Sep 2023 13:28:34 -0400 Subject: [PATCH 741/979] Handle v1/v2 inside SCTransform --- R/preprocessing.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/R/preprocessing.R b/R/preprocessing.R index 12cfff751..6d596a181 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -3235,6 +3235,12 @@ SCTransform.default <- function( ) } + if (!is.null(x = vst.flavor) && vst.flavor!="v2"){ + stop("vst.flavor can be 'v1' or 'v2'. Default is 'v2'") + } + if (vst.flavor == "v1"){ + vst.flavor <- NULL + } vst.args[['vst.flavor']] <- vst.flavor vst.args[['umi']] <- umi vst.args[['cell_attr']] <- cell.attr From 2a565f08205894ffe36694bf793976bf5e83d66b Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Fri, 15 Sep 2023 16:17:38 -0400 Subject: [PATCH 742/979] Set seed for SCTransform, default to RNA assay if re-running SCTransform on SCT assay --- R/preprocessing.R | 21 +++++++++++++++++---- R/preprocessing5.R | 3 +++ 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index 6d596a181..777d083be 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -3182,6 +3182,9 @@ SCTransform.default <- function( verbose = TRUE, ... ) { + if (!is.null(x = seed.use)) { + set.seed(seed = seed.use) + } vst.args <- list(...) umi <- object # check for batch_var in meta data @@ -3235,12 +3238,13 @@ SCTransform.default <- function( ) } - if (!is.null(x = vst.flavor) && vst.flavor!="v2"){ + if (!is.null(x = vst.flavor) && !vst.flavor %in% c("v1", "v2")){ stop("vst.flavor can be 'v1' or 'v2'. Default is 'v2'") } - if (vst.flavor == "v1"){ + if (!is.null(x = vst.flavor) && vst.flavor == "v1"){ vst.flavor <- NULL } + vst.args[['vst.flavor']] <- vst.flavor vst.args[['umi']] <- umi vst.args[['cell_attr']] <- cell.attr @@ -3456,6 +3460,7 @@ SCTransform.Assay <- function( do.correct.umi <- FALSE do.center <- FALSE } + umi <- GetAssayData(object = object, slot = 'counts') vst.out <- SCTransform(object = umi, cell.attr = cell.attr, @@ -3522,7 +3527,7 @@ SCTransform.Assay <- function( #' SCTransform.Seurat <- function( object, - assay = NULL, + assay = "RNA", new.assay.name = 'SCT', reference.SCT.model = NULL, do.correct.umi = TRUE, @@ -3541,12 +3546,20 @@ SCTransform.Seurat <- function( verbose = TRUE, ... ) { + if (!is.null(x = seed.use)) { + set.seed(seed = seed.use) + } assay <- assay %||% DefaultAssay(object = object) + if (assay == "SCT") { + # if re-running SCTransform, use the RNA assay + assay <- "RNA" + warning("Running SCTransform on the RNA assay while default assay is SCT.") + } + if (verbose){ message("Running SCTransform on assay: ", assay) } cell.attr <- slot(object = object, name = 'meta.data')[colnames(object[[assay]]),] - assay.data <- SCTransform(object = object[[assay]], cell.attr = cell.attr, reference.SCT.model = reference.SCT.model, diff --git a/R/preprocessing5.R b/R/preprocessing5.R index c77c78ba4..1fdf3c4f7 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1499,6 +1499,9 @@ SCTransform.StdAssay <- function( seed.use = 1448145, verbose = TRUE, ...) { + if (!is.null(x = seed.use)) { + set.seed(seed = seed.use) + } if (!is.null(reference.SCT.model)){ do.correct.umi <- FALSE do.center <- FALSE From 9b1a441cf9c851803af5807975e959059702fcb9 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Fri, 15 Sep 2023 16:18:13 -0400 Subject: [PATCH 743/979] Fix tests for SCTransform --- tests/testthat/test_preprocessing.R | 35 +++++++++++++---------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/tests/testthat/test_preprocessing.R b/tests/testthat/test_preprocessing.R index 7cd04ec14..fb7a08676 100644 --- a/tests/testthat/test_preprocessing.R +++ b/tests/testthat/test_preprocessing.R @@ -286,45 +286,43 @@ test_that("CustomNormalize works as expected", { expect_error(CustomNormalize(data = pbmc.test, custom_function = norm.fxn, margin = 10)) }) -# Tests for SCTransform -# ------------------------------------------------------------------------------ context("SCTransform") -object <- suppressWarnings(SCTransform(object = object, vst.flavor = NULL, verbose = FALSE)) +object <- suppressWarnings(SCTransform(object = object, verbose = FALSE, vst.flavor = "v1", seed.use = 1448145)) test_that("SCTransform wrapper works as expected", { expect_true("SCT" %in% names(object)) - expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "scale.data"))[1]), 24.5813, tolerance = 1e-6) + expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "scale.data"))[1]), 11.40288448) expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "scale.data"))[5]), 0) - expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "data"))[1]), 40.49135, tolerance = 1e-6) - expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "data"))[5]), 10.96128, tolerance = 1e-6) - expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[1]), 70) - expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[5]), 24) + expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "data"))[1]), 57.7295742, tolerance = 1e-6) + expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "data"))[5]), 11.74403719, tolerance = 1e-6) + expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[1]), 129) + expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[5]), 28) expect_equal(length(VariableFeatures(object[["SCT"]])), 220) fa <- SCTResults(object = object, assay = "SCT", slot = "feature.attributes") expect_equal(fa["MS4A1", "detection_rate"], 0.15) expect_equal(fa["MS4A1", "gmean"], 0.2027364, tolerance = 1e-6) expect_equal(fa["MS4A1", "variance"], 1.025158, tolerance = 1e-6) - expect_equal(fa["MS4A1", "residual_mean"], 0.2763993, tolerance = 1e-6) - expect_equal(fa["MS4A1", "residual_variance"], 3.023062, tolerance = 1e-6) + expect_equal(fa["MS4A1", "residual_mean"], 0.2362887, tolerance = 1e-6) + expect_equal(fa["MS4A1", "residual_variance"], 2.875761, tolerance = 1e-6) }) suppressWarnings(RNGversion(vstr = "3.5.0")) -object <- suppressWarnings(SCTransform(object = object, ncells = 40, verbose = FALSE, seed.use = 42)) +object <- suppressWarnings(SCTransform(object = object, vst.flavor = "v1", ncells = 40, verbose = FALSE, seed.use = 42)) test_that("SCTransform ncells param works", { expect_true("SCT" %in% names(object)) - expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "scale.data"))[1]), 22.82011, tolerance = 1e6) + expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "scale.data"))[1]), 12.02126, tolerance = 1e6) expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "scale.data"))[5]), 0) - expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "data"))[1]), 56.02856, tolerance = 1e-6) - expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "data"))[5]), 12.80183, tolerance = 1e-6) - expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[1]), 125) - expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[5]), 35) + expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "data"))[1]), 60.65299, tolerance = 1e-6) + expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "data"))[5]), 11.74404, tolerance = 1e-6) + expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[1]), 136) + expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[5]), 28) expect_equal(length(VariableFeatures(object[["SCT"]])), 220) fa <- SCTResults(object = object, assay = "SCT", slot = "feature.attributes") expect_equal(fa["MS4A1", "detection_rate"], 0.15) expect_equal(fa["MS4A1", "gmean"], 0.2027364, tolerance = 1e-6) expect_equal(fa["MS4A1", "variance"], 1.025158, tolerance = 1e-6) - expect_equal(fa["MS4A1", "residual_mean"], 0.2560799, tolerance = 1e-3) - expect_equal(fa["MS4A1", "residual_variance"], 2.909645, tolerance = 1e-3) + expect_equal(fa["MS4A1", "residual_mean"], 0.2829672, tolerance = 1e-3) + expect_equal(fa["MS4A1", "residual_variance"], 3.674079, tolerance = 1e-3) }) suppressWarnings(object[["SCT_SAVE"]] <- object[["SCT"]]) @@ -338,4 +336,3 @@ test_that("GetResidual works", { ) expect_warning(GetResidual(object, features = "asd")) }) - From 8a66402757aa7b56c444b9383b9b09496ded59d3 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Fri, 15 Sep 2023 16:21:37 -0400 Subject: [PATCH 744/979] Remove delayed array --- R/preprocessing5.R | 7 ------- 1 file changed, 7 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 1fdf3c4f7..7391ad1cf 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -2038,20 +2038,13 @@ FetchResidualSCTModel <- function(object, layer = layer, cells = layer.cells ) - - # iterate over 2k cells at once - # cells.grid <- DelayedArray::colAutoGrid(x = counts, ncol = min(2000, length(x = layer.cells))) - # cells.grid <- DelayedArray::colAutoGrid(x = counts, ncol = length(x = layer.cells)) cells.vector <- 1:length(x = layer.cells) cells.grid <- split(x = cells.vector, f = ceiling(x = seq_along(along.with = cells.vector)/chunk_size)) - new_residuals <- list() for (i in seq_len(length.out = length(x = cells.grid))) { vp <- cells.grid[[i]] - #block <- DelayedArray::read_block(x = counts, viewport = vp, as.sparse = TRUE) block <- counts[,vp, drop=FALSE] - ## TODO: Maybe read only interesting genes umi.all <- as.sparse(x = block) # calculate min_variance for get_residuals From ab244d9485c8a3c9e748e1912eeb4be144e025f0 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Mon, 18 Sep 2023 00:13:18 -0400 Subject: [PATCH 745/979] Add tests for v2 --- tests/testthat/test_preprocessing.R | 39 +++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/tests/testthat/test_preprocessing.R b/tests/testthat/test_preprocessing.R index fb7a08676..b30d4abd4 100644 --- a/tests/testthat/test_preprocessing.R +++ b/tests/testthat/test_preprocessing.R @@ -306,6 +306,26 @@ test_that("SCTransform wrapper works as expected", { expect_equal(fa["MS4A1", "residual_variance"], 2.875761, tolerance = 1e-6) }) +devtools::load_all("~/github/sctransform-public-main") +object <- suppressWarnings(SCTransform(object = object, verbose = FALSE, vst.flavor = "v2", seed.use = 1448145)) + +test_that("SCTransform v2 works as expected", { + expect_true("SCT" %in% names(object)) + expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "scale.data"))[1]), 24.5183, tolerance = 1e-2) + expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "scale.data"))[5]), 0) + expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "data"))[1]), 58.65829, tolerance = 1e-6) + expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "data"))[5]), 13.75449, tolerance = 1e-6) + expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[1]), 141) + expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[5]), 40) + expect_equal(length(VariableFeatures(object[["SCT"]])), 220) + fa <- SCTResults(object = object, assay = "SCT", slot = "feature.attributes") + expect_equal(fa["MS4A1", "detection_rate"], 0.15) + expect_equal(fa["MS4A1", "gmean"], 0.2027364, tolerance = 1e-6) + expect_equal(fa["MS4A1", "variance"], 1.025158, tolerance = 1e-6) + expect_equal(fa["MS4A1", "residual_mean"], 0.2763993, tolerance = 1e-6) + expect_equal(fa["MS4A1", "residual_variance"], 3.023062, tolerance = 1e-6) +}) + suppressWarnings(RNGversion(vstr = "3.5.0")) object <- suppressWarnings(SCTransform(object = object, vst.flavor = "v1", ncells = 40, verbose = FALSE, seed.use = 42)) test_that("SCTransform ncells param works", { @@ -336,3 +356,22 @@ test_that("GetResidual works", { ) expect_warning(GetResidual(object, features = "asd")) }) + +object <- suppressWarnings(SCTransform(object = object, verbose = FALSE, vst.flavor = "v2", seed.use = 1448145)) + +test_that("SCTransform v2 works as expected", { + expect_true("SCT" %in% names(object)) + expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "scale.data"))[1]), 11.40288448) + expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "scale.data"))[5]), 0) + expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "data"))[1]), 57.7295742, tolerance = 1e-6) + expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "data"))[5]), 11.74403719, tolerance = 1e-6) + expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[1]), 129) + expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[5]), 28) + expect_equal(length(VariableFeatures(object[["SCT"]])), 220) + fa <- SCTResults(object = object, assay = "SCT", slot = "feature.attributes") + expect_equal(fa["MS4A1", "detection_rate"], 0.15) + expect_equal(fa["MS4A1", "gmean"], 0.2027364, tolerance = 1e-6) + expect_equal(fa["MS4A1", "variance"], 1.025158, tolerance = 1e-6) + expect_equal(fa["MS4A1", "residual_mean"], 0.2362887, tolerance = 1e-6) + expect_equal(fa["MS4A1", "residual_variance"], 2.875761, tolerance = 1e-6) +}) From 6c46c85b4d80fda336419c6c47283e9c69d35781 Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Mon, 18 Sep 2023 10:43:48 -0400 Subject: [PATCH 746/979] add v5 testing --- tests/testthat/test_differential_expression.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test_differential_expression.R b/tests/testthat/test_differential_expression.R index ec96bc644..5b9414c59 100644 --- a/tests/testthat/test_differential_expression.R +++ b/tests/testthat/test_differential_expression.R @@ -330,13 +330,13 @@ test_that("FindAllMarkers works as expected", { # Tests for running FindMarkers post integration/transfer ref <- pbmc_small ref <- FindVariableFeatures(object = ref, verbose = FALSE, nfeatures = 100) -query <- CreateSeuratObject( +query <- CreateSeuratObject(CreateAssayObject( counts = as.sparse(GetAssayData(object = pbmc_small[['RNA']], slot = "counts") + rpois(n = ncol(pbmc_small), lambda = 1)) -) +)) -query2 <- CreateSeuratObject( +query2 <- CreateSeuratObject(CreateAssayObject( counts = as.sparse(GetAssayData(object = pbmc_small[['RNA']], slot = "counts")[, 1:40] + rpois(n = ncol(pbmc_small), lambda = 1)) -) +)) From 0b91ddaab6077c2650ae56dfbbd8759824012def Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Mon, 18 Sep 2023 10:48:35 -0400 Subject: [PATCH 747/979] set default layer to counts if use.umi=TRUE with ScaleData --- R/preprocessing5.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index d88ddba79..12e0558f3 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -687,6 +687,7 @@ ScaleData.StdAssay <- function( olayer <- layer <- unique(x = layer) layer <- Layers(object = object, search = layer) if (isTRUE(x = use.umi)) { + layer <- "counts" inform( message = "'use.umi' is TRUE, please make sure 'layer' specifies raw counts" ) From c32dc947981102529fef6e4a9c43b3b2cce1fea0 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Mon, 18 Sep 2023 12:13:47 -0400 Subject: [PATCH 748/979] adding BP findmarkers --- R/differential_expression.R | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/R/differential_expression.R b/R/differential_expression.R index a6aa0d74a..bcad9d662 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -572,18 +572,31 @@ FindMarkers.default <- function( latent.vars <- latent.vars[c(cells.1, cells.2), , drop = FALSE] } } - de.results <- PerformDE( - object = object, - cells.1 = cells.1, - cells.2 = cells.2, - features = features, - test.use = test.use, - verbose = verbose, - min.cells.feature = min.cells.feature, - latent.vars = latent.vars, - densify = densify, - ... - ) + if (inherits(x = object, what = "IterableMatrix")){ + data.use <- object[features, c(cells.1, cells.2), drop = FALSE] + groups <- c(rep("foreground", length(cells.1)), rep("background", length(cells.2))) + de.results <- suppressMessages(BPCells::marker_features(data.use, group = groups, method = "wilcoxon")) + de.results <- as.data.frame( + de.results %>% + filter(foreground == "foreground") %>% + select(feature, p_val = p_val_raw) + ) + rownames(de.results) <- de.results$feature + de.results$feature <- NULL + } else { + de.results <- PerformDE( + object = object, + cells.1 = cells.1, + cells.2 = cells.2, + features = features, + test.use = test.use, + verbose = verbose, + min.cells.feature = min.cells.feature, + latent.vars = latent.vars, + densify = densify, + ... + ) + } de.results <- cbind(de.results, fc.results[rownames(x = de.results), , drop = FALSE]) if (only.pos) { de.results <- de.results[de.results[, 2] > 0, , drop = FALSE] From 4dd3aec44ea9ec2bc9dd211404e19698515993a2 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Mon, 18 Sep 2023 14:17:02 -0400 Subject: [PATCH 749/979] clean up --- R/differential_expression.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/R/differential_expression.R b/R/differential_expression.R index bcad9d662..a81762812 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -575,12 +575,12 @@ FindMarkers.default <- function( if (inherits(x = object, what = "IterableMatrix")){ data.use <- object[features, c(cells.1, cells.2), drop = FALSE] groups <- c(rep("foreground", length(cells.1)), rep("background", length(cells.2))) - de.results <- suppressMessages(BPCells::marker_features(data.use, group = groups, method = "wilcoxon")) - de.results <- as.data.frame( - de.results %>% - filter(foreground == "foreground") %>% - select(feature, p_val = p_val_raw) - ) + de.results <- suppressMessages(BPCells::marker_features(data.use, + group = groups, + method = "wilcoxon")) + de.results <- subset(de.results, foreground == "foreground") + de.results <- data.frame(feature = de.results$feature, + p_val = de.results$p_val_raw) rownames(de.results) <- de.results$feature de.results$feature <- NULL } else { From 3248375263fe6e1c51b0df821c95c91c7680c84c Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Mon, 18 Sep 2023 15:05:41 -0400 Subject: [PATCH 750/979] add v5 preprocessing tests --- tests/testthat/test_integratedata.R | 22 +++++++++---------- tests/testthat/test_preprocessing.R | 34 +++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 12 deletions(-) diff --git a/tests/testthat/test_integratedata.R b/tests/testthat/test_integratedata.R index 0566c779c..818ce2b16 100644 --- a/tests/testthat/test_integratedata.R +++ b/tests/testthat/test_integratedata.R @@ -175,7 +175,7 @@ int_harmony <- suppressMessages(suppressWarnings(IntegrateLayers( test_that("IntegrateLayers returns embeddings with correct dimensions ", { expect_equal(dim(int_cca[["integrated.cca"]]), c(80, 50)) expect_equal(dim(int_rpca[["integrated.rpca"]]), c(80, 50)) - expect_equal(dim(int_harmony[["integrated.cca"]]), c(80, 50)) + expect_equal(dim(int_harmony[["harmony"]]), c(80, 50)) int_rpca expect_equal(int_cca[["integrated.cca"]]@assay.used, "RNAv5") @@ -189,15 +189,13 @@ test_that("group.by ", { #Harmony integration - - -int_2 <- IntegrateLayers(object = pbmc_small, method = CCAIntegration, - group.by = "letter.idents", - orig.reduction = "pca", - assay = "RNAv5", - k.weight = 20, - new.reduction = "integrated.cca") - -head(int_2[['integrated.cca']]@cell.embeddings[1:5,1:5]) -head(int_cca[['integrated.cca']]@cell.embeddings[1:5,1:5]) +# int_2 <- IntegrateLayers(object = pbmc_small, method = CCAIntegration, +# group.by = "letter.idents", +# orig.reduction = "pca", +# assay = "RNAv5", +# k.weight = 20, +# new.reduction = "integrated.cca") +# +# head(int_2[['integrated.cca']]@cell.embeddings[1:5,1:5]) +# head(int_cca[['integrated.cca']]@cell.embeddings[1:5,1:5]) diff --git a/tests/testthat/test_preprocessing.R b/tests/testthat/test_preprocessing.R index 5961c4f50..8787e84d5 100644 --- a/tests/testthat/test_preprocessing.R +++ b/tests/testthat/test_preprocessing.R @@ -97,6 +97,40 @@ test_that("Relative count normalization returns expected values", { expect_equal(rc.counts[2, 1], 14285.71, tolerance = 1e-6) }) +# Tests for v5 NormalizeData +# -------------------------------------------------------------------------------- +context("v5 NormalizeData") + +if(class(object[['RNA']]) == "Assay5") { + fake.groups <- c(rep(1, floor(ncol(pbmc.test)/2)), + rep(2, ncol(pbmc.test) - (floor(ncol(pbmc.test)/2))) ) + object$groups <- fake.groups + object.split[["RNA"]] <- split(object[["RNA"]], f = object$groups) + object.split <- NormalizeData(object = object.split) + + group1 <- subset(object, groups==1) + group1 <- NormalizeData(group1) + + test_that("Normalization is performed for each layer", { + expect_equal(Layers(object.split),c("counts.1", "counts.2", "data.1", "data.2")) + expect_equal(group1[['RNA']]$data, LayerData(object.split, layer="data.1")) + }) + + object.split <- NormalizeData(object = object.split, normalization.method = "CLR", verbose = FALSE) + group1 <- NormalizeData(object = group1, normalization.method = "CLR", verbose = FALSE) + test_that("CLR normalization works with multiple layers", { + expect_equal(Layers(object.split),c("counts.1", "counts.2", "data.1", "data.2")) + expect_equal(group1[['RNA']]$data, LayerData(object.split, layer="data.1")) + }) + + object.split <- NormalizeData(object = object.split, normalization.method = "RC", verbose = FALSE) + group1 <- NormalizeData(object = group1, normalization.method = "RC", verbose = FALSE) + test_that("RC normalization works with multiple layers", { + expect_equal(Layers(object.split),c("counts.1", "counts.2", "data.1", "data.2")) + expect_equal(group1[['RNA']]$data, LayerData(object.split, layer="data.1")) + }) +} + # Tests for ScaleData # -------------------------------------------------------------------------------- context("ScaleData") From ab200d8e3a2c54d552b1c68cd40f690a07aad446 Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Mon, 18 Sep 2023 16:11:31 -0400 Subject: [PATCH 751/979] add helper function to make groups --- R/integration5.R | 54 +++++++++++++++++++++++------------------------- 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/R/integration5.R b/R/integration5.R index 990280b59..1b34047a7 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -106,13 +106,7 @@ HarmonyIntegration <- function( # verbose = verbose # ) #create grouping variables - groups <- SeuratObject::EmptyDF(n = ncol(x = object)) - row.names(x = groups) <- colnames(x = object) - for (model in levels(x = object)) { - cc <- Cells(x = object, layer = model) - groups[cc, "group"] <- model - } - names(x = groups) <- 'group' + groups <- CreateGroupVariable(object, layers = layers, scale.layer = scale.layer) # Run Harmony harmony.embed <- harmony::HarmonyMatrix( data_mat = Embeddings(object = orig), @@ -210,13 +204,8 @@ CCAIntegration <- function( assay <- assay %||% 'RNA' layers <- layers %||% Layers(object, search = 'data') if (normalization.method == 'SCT') { - groups <- SeuratObject::EmptyDF(n = ncol(x = object)) - row.names(x = groups) <- colnames(x = object) - for (model in levels(x = object)) { - cc <- Cells(x = object, layer = model) - groups[cc, "group"] <- model - } - names(x = groups) <- 'group' + #create grouping variables + groups <- CreateGroupVariable(object, layers = layers, scale.layer = scale.layer) object.sct <- CreateSeuratObject(counts = object, assay = 'SCT') object.sct$split <- groups[,1] object.list <- SplitObject(object = object.sct,split.by = 'split') @@ -352,13 +341,8 @@ RPCAIntegration <- function( assay <- assay %||% 'RNA' layers <- layers %||% Layers(object = object, search = 'data') if (normalization.method == 'SCT') { - groups <- SeuratObject::EmptyDF(n = ncol(x = object)) - row.names(x = groups) <- colnames(x = object) - for (model in levels(x = object)) { - cc <- Cells(x = object, layer = model) - groups[cc, "group"] <- model - } - names(x = groups) <- 'group' + #create grouping variables + groups <- CreateGroupVariable(object, layers = layers, scale.layer = scale.layer) object.sct <- CreateSeuratObject(counts = object, assay = 'SCT') object.sct$split <- groups[,1] object.list <- SplitObject(object = object.sct, split.by = 'split') @@ -453,13 +437,8 @@ JointPCAIntegration <- function( layers <- layers %||% Layers(object, search = 'data') if (normalization.method == 'SCT') { - groups <- SeuratObject::EmptyDF(n = ncol(x = object)) - row.names(x = groups) <- colnames(x = object) - for (model in levels(x = object)) { - cc <- Cells(x = object, layer = model) - groups[cc, "group"] <- model - } - names(x = groups) <- 'group' + #create grouping variables + groups <- CreateGroupVariable(object, layers = layers, scale.layer = scale.layer) object.sct <- CreateSeuratObject(counts = object, assay = 'SCT') object.sct <- DietSeurat(object = object.sct, features = features.diet) object.sct[['joint.pca']] <- CreateDimReducObject( @@ -633,6 +612,25 @@ IntegrateLayers <- function( #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Internal #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +CreateGroupVariable <- function(object, layers, scale.layer) { + groups <- if (inherits(x = object, what = 'SCTAssay')) { + df <- SeuratObject::EmptyDF(n = ncol(x = object)) + row.names(x = df) <- colnames(x = object) + for (model in levels(x = object)) { + cc <- Cells(x = object, layer = model) + df[cc, "group"] <- model + } + df + } else if (length(x = layers) > 1L) { + cmap <- slot(object = object, name = 'cells')[, layers] + as.data.frame(x = labels( + object = cmap, + values = Cells(x = object, layer = scale.layer) + )) + } + names(x = groups) <- 'group' + return(groups) +} #' Writing Integration Method Functions #' From f48557bfba968f9b1f05aa5183781033bd5dbd93 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Thu, 21 Sep 2023 09:53:29 -0400 Subject: [PATCH 752/979] Bump sctransform requirement --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 741027ba6..f20c0bf33 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -78,7 +78,7 @@ Imports: Rtsne, scales, scattermore (>= 0.7), - sctransform (>= 0.3.5), + sctransform (>= 0.4.0), shiny, spatstat.explore, spatstat.geom, From 2804ac55e7854510f4bd6bcbd12ca38117783322 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Fri, 22 Sep 2023 11:44:21 -0400 Subject: [PATCH 753/979] Fix bug for calluating residuals in StdAssay --- R/preprocessing5.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 7391ad1cf..b4aed3397 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1409,7 +1409,6 @@ SCTransform.IterableMatrix <- function( } sampled_cells <- sample.int(n = ncol(x = object), size = min(ncells, ncol(x = object))) umi <- as.sparse(x = object[, sampled_cells]) - message("umi", dim(umi)) cell.attr <- cell.attr[colnames(x = umi),,drop=FALSE] vst.out <- SCTransform(object = umi, cell.attr = cell.attr, @@ -1617,13 +1616,14 @@ SCTransform.StdAssay <- function( message("Getting residuals for block ", i, "(of ", length(cells.grid), ") for ", dataset.names[[dataset.index]], " dataset") } counts.vp <- as.sparse(x = layer.data[, vp]) - cell.attr.object <- cell.attr[colnames(x = counts.vp),, drop=FALSE] + cell.attr.object <- cell.attr.layer[colnames(x = counts.vp),, drop=FALSE] vst_out <- vst_out.reference # cell_attr <- data.frame( # umi = colSums(counts.vp), # log_umi = log10(x = colSums(counts.vp)) # ) # rownames(cell_attr) <- colnames(counts.vp) + #browser() vst_out$cell_attr <- cell.attr.object vst_out$gene_attr <- vst_out$gene_attr[variable.features,] if (return.only.var.genes){ From e25cb31d7a5227ad3436a0da010a118ae1ae5863 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Fri, 22 Sep 2023 11:44:42 -0400 Subject: [PATCH 754/979] Add preliminary SCT2 integration vignette --- vignettes/seurat5_sctransform_integration.Rmd | 90 +++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 vignettes/seurat5_sctransform_integration.Rmd diff --git a/vignettes/seurat5_sctransform_integration.Rmd b/vignettes/seurat5_sctransform_integration.Rmd new file mode 100644 index 000000000..1cfb38050 --- /dev/null +++ b/vignettes/seurat5_sctransform_integration.Rmd @@ -0,0 +1,90 @@ +--- +title: 'Integration of SCTransform normalized datasets' +output: + html_document: + theme: united + pdf_document: default +date: 'Compiled: `r format(Sys.Date(), "%B %d, %Y")`' +--- + +```{r setup, include=TRUE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + tidy.opts = list(width.cutoff = 95), + fig.width = 10, + message = FALSE, + warning = FALSE, + time_it = TRUE, + error = TRUE +) +``` +## Setup the Seurat objects + + + +```{r data} +library(Seurat) +options(Seurat.object.assay.version = "v5") +library(SeuratData) +library(patchwork) +``` + + +```{r installdata, eval=FALSE} +# install dataset +InstallData('pbmcsca') +``` + +```{r init, results='hide', message=FALSE, fig.keep='none'} +# load dataset +pbmcsca <- LoadData("pbmcsca") +pbmcsca <- UpdateSeuratObject(object = pbmcsca) +pbmcsca[["RNA"]] <- as(pbmcsca[["RNA"]], Class = "Assay5") + +# split the dataset into layers +pbmcsca[["RNA"]] <- split(pbmcsca[["RNA"]], f = pbmcsca$Method) +``` + +## Run SCTransform + +```{r} +pbmcsca <- SCTransform(pbmcsca) +pbmcsca <- RunPCA(pbmcsca, npcs = 30, verbose = FALSE) +``` + +## Perform integration + +We then integrate all the layers using the `IntegrateLayers()` function. + +```{r} +pbmcsca <- IntegrateLayers(object = pbmcsca, + method = RPCAIntegration, + normalization.method="SCT", + verbose = F) +``` + + +```{r} +pbmcsca <- FindNeighbors(pbmcsca, dims = 1:30) +pbmcsca <- FindClusters(pbmcsca, resolution = 2) +pbmcsca <- RunUMAP(pbmcsca, dims = 1:30) +``` + +```{r viz, results='hide', message=FALSE} +# Visualization +p1 <- DimPlot(pbmcsca, reduction = "umap", group.by = "Method") +p2 <- DimPlot(pbmcsca, reduction = "umap", group.by = "CellType", label = TRUE, repel = TRUE) +p1 + p2 +``` From 3f907ea681893187bd623d704e6ceaf4870a891b Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Fri, 22 Sep 2023 14:11:20 -0400 Subject: [PATCH 755/979] Bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f20c0bf33..395514951 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9059 -Date: 2023-07-14 +Version: 4.9.9.9060 +Date: 2023-09-22 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From 07458e0429de8dd485c78613acfd01e046b1aa0e Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Fri, 22 Sep 2023 14:33:34 -0400 Subject: [PATCH 756/979] slot -> layer --- R/preprocessing5.R | 7 +--- tests/testthat/test_preprocessing.R | 53 ++++++++++++++--------------- 2 files changed, 27 insertions(+), 33 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index b4aed3397..5e878fa15 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1618,12 +1618,7 @@ SCTransform.StdAssay <- function( counts.vp <- as.sparse(x = layer.data[, vp]) cell.attr.object <- cell.attr.layer[colnames(x = counts.vp),, drop=FALSE] vst_out <- vst_out.reference - # cell_attr <- data.frame( - # umi = colSums(counts.vp), - # log_umi = log10(x = colSums(counts.vp)) - # ) - # rownames(cell_attr) <- colnames(counts.vp) - #browser() + vst_out$cell_attr <- cell.attr.object vst_out$gene_attr <- vst_out$gene_attr[variable.features,] if (return.only.var.genes){ diff --git a/tests/testthat/test_preprocessing.R b/tests/testthat/test_preprocessing.R index b30d4abd4..2fc3d9bfb 100644 --- a/tests/testthat/test_preprocessing.R +++ b/tests/testthat/test_preprocessing.R @@ -291,12 +291,12 @@ object <- suppressWarnings(SCTransform(object = object, verbose = FALSE, vst.fla test_that("SCTransform wrapper works as expected", { expect_true("SCT" %in% names(object)) - expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "scale.data"))[1]), 11.40288448) - expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "scale.data"))[5]), 0) - expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "data"))[1]), 57.7295742, tolerance = 1e-6) - expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "data"))[5]), 11.74403719, tolerance = 1e-6) - expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[1]), 129) - expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[5]), 28) + expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], layer = "scale.data"))[1]), 11.40288448) + expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], layer = "scale.data"))[5]), 0) + expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], layer = "data"))[1]), 57.7295742, tolerance = 1e-6) + expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], layer = "data"))[5]), 11.74403719, tolerance = 1e-6) + expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], layer = "counts"))[1]), 129) + expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], layer = "counts"))[5]), 28) expect_equal(length(VariableFeatures(object[["SCT"]])), 220) fa <- SCTResults(object = object, assay = "SCT", slot = "feature.attributes") expect_equal(fa["MS4A1", "detection_rate"], 0.15) @@ -306,17 +306,15 @@ test_that("SCTransform wrapper works as expected", { expect_equal(fa["MS4A1", "residual_variance"], 2.875761, tolerance = 1e-6) }) -devtools::load_all("~/github/sctransform-public-main") object <- suppressWarnings(SCTransform(object = object, verbose = FALSE, vst.flavor = "v2", seed.use = 1448145)) - test_that("SCTransform v2 works as expected", { expect_true("SCT" %in% names(object)) - expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "scale.data"))[1]), 24.5183, tolerance = 1e-2) - expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "scale.data"))[5]), 0) - expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "data"))[1]), 58.65829, tolerance = 1e-6) - expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "data"))[5]), 13.75449, tolerance = 1e-6) - expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[1]), 141) - expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[5]), 40) + expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], layer = "scale.data"))[1]), 24.5183, tolerance = 1e-2) + expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], layer = "scale.data"))[5]), 0) + expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], layer = "data"))[1]), 58.65829, tolerance = 1e-6) + expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], layer = "data"))[5]), 13.75449, tolerance = 1e-6) + expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], layer = "counts"))[1]), 141) + expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], layer = "counts"))[5]), 40) expect_equal(length(VariableFeatures(object[["SCT"]])), 220) fa <- SCTResults(object = object, assay = "SCT", slot = "feature.attributes") expect_equal(fa["MS4A1", "detection_rate"], 0.15) @@ -330,12 +328,12 @@ suppressWarnings(RNGversion(vstr = "3.5.0")) object <- suppressWarnings(SCTransform(object = object, vst.flavor = "v1", ncells = 40, verbose = FALSE, seed.use = 42)) test_that("SCTransform ncells param works", { expect_true("SCT" %in% names(object)) - expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "scale.data"))[1]), 12.02126, tolerance = 1e6) - expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "scale.data"))[5]), 0) - expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "data"))[1]), 60.65299, tolerance = 1e-6) - expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "data"))[5]), 11.74404, tolerance = 1e-6) - expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[1]), 136) - expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[5]), 28) + expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], layer = "scale.data"))[1]), 12.02126, tolerance = 1e6) + expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], layer = "scale.data"))[5]), 0) + expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], layer = "data"))[1]), 60.65299, tolerance = 1e-6) + expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], layer = "data"))[5]), 11.74404, tolerance = 1e-6) + expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], layer = "counts"))[1]), 136) + expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], layer = "counts"))[5]), 28) expect_equal(length(VariableFeatures(object[["SCT"]])), 220) fa <- SCTResults(object = object, assay = "SCT", slot = "feature.attributes") expect_equal(fa["MS4A1", "detection_rate"], 0.15) @@ -361,17 +359,18 @@ object <- suppressWarnings(SCTransform(object = object, verbose = FALSE, vst.fla test_that("SCTransform v2 works as expected", { expect_true("SCT" %in% names(object)) - expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "scale.data"))[1]), 11.40288448) + expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "scale.data"))[1]), 24.5813, tolerance = 1e-4) expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "scale.data"))[5]), 0) - expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "data"))[1]), 57.7295742, tolerance = 1e-6) - expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "data"))[5]), 11.74403719, tolerance = 1e-6) - expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[1]), 129) - expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[5]), 28) + expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "data"))[1]), 58.65829, tolerance = 1e-6) + expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "data"))[5]), 13.75449, tolerance = 1e-6) + expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[1]), 141) + expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[5]), 40) expect_equal(length(VariableFeatures(object[["SCT"]])), 220) fa <- SCTResults(object = object, assay = "SCT", slot = "feature.attributes") expect_equal(fa["MS4A1", "detection_rate"], 0.15) expect_equal(fa["MS4A1", "gmean"], 0.2027364, tolerance = 1e-6) expect_equal(fa["MS4A1", "variance"], 1.025158, tolerance = 1e-6) - expect_equal(fa["MS4A1", "residual_mean"], 0.2362887, tolerance = 1e-6) - expect_equal(fa["MS4A1", "residual_variance"], 2.875761, tolerance = 1e-6) + expect_equal(fa["MS4A1", "residual_mean"], 0.2763993, tolerance = 1e-6) + expect_equal(fa["MS4A1", "residual_variance"], 3.023062, tolerance = 1e-6) + expect_equal(fa["FCER2", "theta"], Inf) }) From cd418a3c2872c6bc1831e9928698b68d2025cb03 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Fri, 22 Sep 2023 16:08:41 -0400 Subject: [PATCH 757/979] format --- R/differential_expression.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/differential_expression.R b/R/differential_expression.R index a81762812..a38c3b45e 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -575,9 +575,9 @@ FindMarkers.default <- function( if (inherits(x = object, what = "IterableMatrix")){ data.use <- object[features, c(cells.1, cells.2), drop = FALSE] groups <- c(rep("foreground", length(cells.1)), rep("background", length(cells.2))) - de.results <- suppressMessages(BPCells::marker_features(data.use, - group = groups, - method = "wilcoxon")) + de.results <- suppressMessages( + BPCells::marker_features(data.use, group = groups, method = "wilcoxon") + ) de.results <- subset(de.results, foreground == "foreground") de.results <- data.frame(feature = de.results$feature, p_val = de.results$p_val_raw) From 0fb66b5d20563d99c0a2b5f6d795dbbeadf55f8c Mon Sep 17 00:00:00 2001 From: gesmira <59940281+Gesmira@users.noreply.github.com> Date: Fri, 22 Sep 2023 16:10:32 -0400 Subject: [PATCH 758/979] bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 741027ba6..a0ed7e0de 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9059 -Date: 2023-07-14 +Version: 4.9.9.9061 +Date: 2023-09-22 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From 04d3c232616133c047fa6ec45f53f9d31431b8b1 Mon Sep 17 00:00:00 2001 From: mhkowalski <52510573+mhkowalski@users.noreply.github.com> Date: Fri, 22 Sep 2023 16:19:01 -0400 Subject: [PATCH 759/979] Update preprocessing.R remove nselect option from FindVariableFeatures --- R/preprocessing.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index 113083d07..7daa40744 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -3758,7 +3758,6 @@ FindVariableFeatures.Assay <- function( num.bin = 20, binning.method = "equal_width", nfeatures = 2000, - nselect = 2000, mean.cutoff = c(0.1, 8), dispersion.cutoff = c(1, Inf), verbose = TRUE, From f64a2a4ba92b279f5cafc32f2d7f2cf14e908ceb Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Fri, 22 Sep 2023 17:53:23 -0400 Subject: [PATCH 760/979] Fix PrepSCTFindMarkers to return same feature set across all layers --- R/differential_expression.R | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/R/differential_expression.R b/R/differential_expression.R index a6aa0d74a..90c2adee9 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -2141,6 +2141,7 @@ PerformDE <- function( #' @param assay Assay name where for SCT objects are stored; Default is 'SCT' #' @param verbose Print messages and progress #' @importFrom Matrix Matrix +#' @importFrom SeuratObject SparseEmptyMatrix #' @importFrom pbapply pblapply #' @importFrom future.apply future_lapply #' @importFrom future nbrOfWorkers @@ -2250,7 +2251,7 @@ PrepSCTFindMarkers <- function(object, assay = "SCT", verbose = TRUE) { set_median_umi <- rep(min_median_umi, length(levels(x = object[[assay]]))) names(set_median_umi) <- levels(x = object[[assay]]) set_median_umi <- as.list(set_median_umi) - + all_genes <- rownames(x = object[[assay]]) # correct counts my.correct_counts <- function(model_name){ model_genes <- rownames(x = model_pars_fit[[model_name]]) @@ -2261,7 +2262,7 @@ PrepSCTFindMarkers <- function(object, assay = "SCT", verbose = TRUE) { cell_attr = cell_attr[[model_name]] ) cells <- rownames(x = cell_attr[[model_name]]) - umi <- raw_umi[model_genes, cells] + umi <- raw_umi[all_genes, cells] umi_corrected <- correct_counts( x = x, @@ -2269,14 +2270,21 @@ PrepSCTFindMarkers <- function(object, assay = "SCT", verbose = TRUE) { verbosity = 0, scale_factor = min_median_umi ) + missing_features <- setdiff(x = all_genes, y = rownames(x = umi_corrected)) + empty <- SparseEmptyMatrix(nrow = length(x = missing_features), ncol = ncol(x = umi_corrected)) + rownames(x = empty) <- missing_features + colnames(x = umi_corrected) <- colnames(x = umi_corrected) + + umi_corrected <- rbind(umi_corrected, empty)[all_genes,] + return(umi_corrected) } corrected_counts.list <- my.lapply(X = levels(x = object[[assay]]), FUN = my.correct_counts) names(x = corrected_counts.list) <- levels(x = object[[assay]]) - corrected_counts <- do.call(what = MergeSparseMatrices, args = corrected_counts.list) - corrected_counts.list <- NULL + corrected_counts <- do.call(what = MergeSparseMatrices, args = corrected_counts.list) + corrected_counts <- as.sparse(x = corrected_counts) corrected_data <- log1p(x = corrected_counts) suppressWarnings({object <- SetAssayData(object = object, assay = assay, From 73d953f6d67b42b01ad379ae700baf126d52acbb Mon Sep 17 00:00:00 2001 From: Gesmira Date: Sat, 23 Sep 2023 17:42:27 -0400 Subject: [PATCH 761/979] adding method to VariableFeatures() --- R/preprocessing5.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 63e675727..4acdfb8fa 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -161,7 +161,7 @@ FindVariableFeatures.StdAssay <- function( object[colnames(x = hvf.info)] <- hvf.info } object@meta.data$var.features <- NULL - VariableFeatures(object = object) <- VariableFeatures(object = object, nfeatures = nselect) + VariableFeatures(object = object) <- VariableFeatures(object = object, nfeatures = nselect, method = key) return(object) } From 9e2abd14a7f7cb25417a2fe5d6c7ff7d040537d5 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Sat, 23 Sep 2023 23:30:01 -0400 Subject: [PATCH 762/979] Bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f2eb48d64..4b96bd2a9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9061 -Date: 2023-09-22 +Version: 4.9.9.9062 +Date: 2023-09-23 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From 0ead2dcb8f953d14e618ab59ec995f438ec9cc60 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Sat, 23 Sep 2023 23:31:54 -0400 Subject: [PATCH 763/979] Free up memory --- R/differential_expression.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/differential_expression.R b/R/differential_expression.R index 108b64e3f..985f1b4cf 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -579,7 +579,7 @@ FindMarkers.default <- function( BPCells::marker_features(data.use, group = groups, method = "wilcoxon") ) de.results <- subset(de.results, foreground == "foreground") - de.results <- data.frame(feature = de.results$feature, + de.results <- data.frame(feature = de.results$feature, p_val = de.results$p_val_raw) rownames(de.results) <- de.results$feature de.results$feature <- NULL @@ -2284,6 +2284,8 @@ PrepSCTFindMarkers <- function(object, assay = "SCT", verbose = TRUE) { scale_factor = min_median_umi ) missing_features <- setdiff(x = all_genes, y = rownames(x = umi_corrected)) + corrected_counts.list <- NULL + gc(verbose = FALSE) empty <- SparseEmptyMatrix(nrow = length(x = missing_features), ncol = ncol(x = umi_corrected)) rownames(x = empty) <- missing_features colnames(x = umi_corrected) <- colnames(x = umi_corrected) From 2e49a8ef64888c175c113fd3b44af61ff648a603 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Sun, 24 Sep 2023 21:34:08 -0400 Subject: [PATCH 764/979] Documentation fixes --- NAMESPACE | 3 +- R/integration.R | 4 -- R/integration5.R | 78 ++++++++++++++--------------- R/preprocessing5.R | 14 +----- man/AverageExpression.Rd | 3 +- man/CCAIntegration.Rd | 28 +++++++++++ man/FetchResiduals.Rd | 4 -- man/HarmonyIntegration.Rd | 54 ++++++++++++-------- man/Load10X_Spatial.Rd | 1 + man/ProjectIntegration.Rd | 5 -- man/RPCAIntegration.Rd | 22 ++++---- man/SCTransform.IterableMatrix.Rd | 29 ----------- man/SCTransform.Rd | 27 +++++++++- man/SelectIntegrationFeatures5.Rd | 42 ---------------- man/SelectSCTIntegrationFeatures.Rd | 28 ----------- man/reexports.Rd | 4 +- src/Makevars | 2 +- 17 files changed, 145 insertions(+), 203 deletions(-) delete mode 100644 man/SCTransform.IterableMatrix.Rd delete mode 100644 man/SelectIntegrationFeatures5.Rd delete mode 100644 man/SelectSCTIntegrationFeatures.Rd diff --git a/NAMESPACE b/NAMESPACE index c6486e7c9..3fd5691f8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -404,8 +404,6 @@ export(ScaleData) export(ScaleFactors) export(ScoreJackStraw) export(SelectIntegrationFeatures) -export(SelectIntegrationFeatures5) -export(SelectSCTIntegrationFeatures) export(SetAssayData) export(SetIdent) export(SetIntegrationData) @@ -588,6 +586,7 @@ importFrom(SeuratObject,RowMergeSparseMatrices) importFrom(SeuratObject,SVFInfo) importFrom(SeuratObject,SetAssayData) importFrom(SeuratObject,SetIdent) +importFrom(SeuratObject,SparseEmptyMatrix) importFrom(SeuratObject,SpatiallyVariableFeatures) importFrom(SeuratObject,StashIdent) importFrom(SeuratObject,Stdev) diff --git a/R/integration.R b/R/integration.R index a2d00df4f..330a0ec66 100644 --- a/R/integration.R +++ b/R/integration.R @@ -3020,8 +3020,6 @@ SelectIntegrationFeatures <- function( return(franks) } -#' @export -#' SelectIntegrationFeatures5 <- function( object, nfeatures = 2000, @@ -3044,8 +3042,6 @@ SelectIntegrationFeatures5 <- function( return(var.features) } -#' @export -#' SelectSCTIntegrationFeatures <- function( object, nfeatures = 3000, diff --git a/R/integration5.R b/R/integration5.R index 999bdcf7b..23dc95c1f 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -33,7 +33,7 @@ NULL # @templateVar pkg harmony # @template note-reqdpkg #' -#' @examples +#' @examples #' \dontrun{ #' # Preprocessing #' obj <- SeuratData::LoadData("pbmcsca") @@ -42,24 +42,24 @@ NULL #' obj <- FindVariableFeatures(obj) #' obj <- ScaleData(obj) #' obj <- RunPCA(obj) -#' +#' #' # After preprocessing, we integrate layers with added parameters specific to Harmony: #' obj <- IntegrateLayers(object = obj, method = HarmonyIntegration, orig.reduction = "pca", #' new.reduction = 'harmony', verbose = FALSE) -#' +#' #' # Modifying Parameters -#' # We can also add arguments specific to Harmony such as theta, to give more diverse clusters +#' # We can also add arguments specific to Harmony such as theta, to give more diverse clusters #' obj <- IntegrateLayers(object = obj, method = HarmonyIntegration, orig.reduction = "pca", #' new.reduction = 'harmony', verbose = FALSE, theta = 3) #' } -#' +#' #' # Integrating SCTransformed data #' obj <- SCTransform(object = obj) -#' obj <- IntegrateLayers(object = obj, method = HarmonyIntegration, -#' orig.reduction = "pca", new.reduction = 'harmony', +#' obj <- IntegrateLayers(object = obj, method = HarmonyIntegration, +#' orig.reduction = "pca", new.reduction = 'harmony', #' assay = "SCT", verbose = FALSE) -#' -#' +#' +#' #' @export #' #' @concept integration @@ -149,7 +149,7 @@ attr(x = HarmonyIntegration, which = 'Seurat.method') <- 'integration' #' @inheritParams IntegrateEmbeddings #' @param ... Arguments passed on to \code{FindIntegrationAnchors} #' @export -#' +#' #' @examples #' \dontrun{ #' # Preprocessing @@ -159,24 +159,24 @@ attr(x = HarmonyIntegration, which = 'Seurat.method') <- 'integration' #' obj <- FindVariableFeatures(obj) #' obj <- ScaleData(obj) #' obj <- RunPCA(obj) -#' -#' # After preprocessing, we integrate layers. -#' obj <- IntegrateLayers(object = obj, method = CCAIntegration, -#' orig.reduction = "pca", new.reduction = 'integrated.cca', +#' +#' # After preprocessing, we integrate layers. +#' obj <- IntegrateLayers(object = obj, method = CCAIntegration, +#' orig.reduction = "pca", new.reduction = 'integrated.cca', #' verbose = FALSE) -#' +#' #' # Modifying parameters -#' # We can also specify parameters such as `k.anchor` to increase the strength of integration -#' obj <- IntegrateLayers(object = obj, method = CCAIntegration, -#' orig.reduction = "pca", new.reduction = 'integrated.cca', +#' # We can also specify parameters such as `k.anchor` to increase the strength of integration +#' obj <- IntegrateLayers(object = obj, method = CCAIntegration, +#' orig.reduction = "pca", new.reduction = 'integrated.cca', #' k.anchor = 20, verbose = FALSE) #' #' # Integrating SCTransformed data #' obj <- SCTransform(object = obj) -#' obj <- IntegrateLayers(object = obj, method = CCAIntegration, -#' orig.reduction = "pca", new.reduction = 'integrated.cca', +#' obj <- IntegrateLayers(object = obj, method = CCAIntegration, +#' orig.reduction = "pca", new.reduction = 'integrated.cca', #' assay = "SCT", verbose = FALSE) -#' +#'} CCAIntegration <- function( object = NULL, assay = NULL, @@ -215,9 +215,9 @@ CCAIntegration <- function( object.list <- list() for (i in seq_along(along.with = layers)) { if (inherits(x = object[[layers[i]]], what = "IterableMatrix")) { - warning("Converting BPCells matrix to dgCMatrix for integration ", + warning("Converting BPCells matrix to dgCMatrix for integration ", "as on-disk CCA Integration is not currently supported", call. = FALSE, immediate. = TRUE) - counts <- as(object = object[[layers[i]]][features, ], + counts <- as(object = object[[layers[i]]][features, ], Class = "dgCMatrix") } else { @@ -225,12 +225,12 @@ CCAIntegration <- function( } object.list[[i]] <- CreateSeuratObject(counts = counts) if (inherits(x = object[[scale.layer]], what = "IterableMatrix")) { - scale.data.layer <- as.matrix(object[[scale.layer]][features, + scale.data.layer <- as.matrix(object[[scale.layer]][features, Cells(object.list[[i]])]) object.list[[i]][["RNA"]]$scale.data <- scale.data.layer } else { - object.list[[i]][["RNA"]]$scale.data <- object[[scale.layer]][features, + object.list[[i]][["RNA"]]$scale.data <- object[[scale.layer]][features, Cells(object.list[[i]])] } object.list[[i]][['RNA']]$counts <- NULL @@ -274,7 +274,7 @@ attr(x = CCAIntegration, which = 'Seurat.method') <- 'integration' #' Seurat-RPCA Integration #' -#' @examples +#' @examples #' \dontrun{ #' # Preprocessing #' obj <- SeuratData::LoadData("pbmcsca") @@ -283,32 +283,32 @@ attr(x = CCAIntegration, which = 'Seurat.method') <- 'integration' #' obj <- FindVariableFeatures(obj) #' obj <- ScaleData(obj) #' obj <- RunPCA(obj) -#' +#' #' # After preprocessing, we run integration -#' obj <- IntegrateLayers(object = obj, method = RPCAIntegration, -#' orig.reduction = "pca", new.reduction = 'integrated.rpca', +#' obj <- IntegrateLayers(object = obj, method = RPCAIntegration, +#' orig.reduction = "pca", new.reduction = 'integrated.rpca', #' verbose = FALSE) -#' +#' #' # Reference-based Integration #' # Here, we use the first layer as a reference for integraion #' # Thus, we only identify anchors between the reference and the rest of the datasets, saving computational resources -#' obj <- IntegrateLayers(object = obj, method = RPCAIntegration, -#' orig.reduction = "pca", new.reduction = 'integrated.rpca', +#' obj <- IntegrateLayers(object = obj, method = RPCAIntegration, +#' orig.reduction = "pca", new.reduction = 'integrated.rpca', #' reference = 1, verbose = FALSE) #' #' # Modifying parameters -#' # We can also specify parameters such as `k.anchor` to increase the strength of integration -#' obj <- IntegrateLayers(object = obj, method = RPCAIntegration, -#' orig.reduction = "pca", new.reduction = 'integrated.rpca', +#' # We can also specify parameters such as `k.anchor` to increase the strength of integration +#' obj <- IntegrateLayers(object = obj, method = RPCAIntegration, +#' orig.reduction = "pca", new.reduction = 'integrated.rpca', #' k.anchor = 20, verbose = FALSE) #' #' # Integrating SCTransformed data #' obj <- SCTransform(object = obj) -#' obj <- IntegrateLayers(object = obj, method = RPCAIntegration, -#' orig.reduction = "pca", new.reduction = 'integrated.rpca', +#' obj <- IntegrateLayers(object = obj, method = RPCAIntegration, +#' orig.reduction = "pca", new.reduction = 'integrated.rpca', #' assay = "SCT", verbose = FALSE) #' } -#' +#' #' @inheritParams FindIntegrationAnchors #' @inheritParams IntegrateEmbeddings #' @param ... Arguments passed on to \code{FindIntegrationAnchors} @@ -342,7 +342,7 @@ RPCAIntegration <- function( features <- features %||% SelectIntegrationFeatures5(object = object) assay <- assay %||% 'RNA' layers <- layers %||% Layers(object = object, search = 'data') - #check that there enough cells present + #check that there enough cells present ncells <- sapply(X = layers, FUN = function(x) {ncell <- dim(object[[x]])[2] return(ncell) }) if (min(ncells) < max(dims)) { diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 24a3710e2..6c41cc616 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1374,12 +1374,10 @@ DISP <- function( #' @importFrom SeuratObject Cells as.sparse #' -#' @method SCTransform StdAssay +#' @method SCTransform IterableMatrix #' @rdname SCTransform #' @concept preprocessing #' @export -#' @method SCTransform Assay -#' SCTransform.IterableMatrix <- function( object, cell.attr, @@ -1436,19 +1434,11 @@ SCTransform.IterableMatrix <- function( #' @importFrom SeuratObject CreateAssayObject SetAssayData GetAssayData -#' Create SCT assay from vst.out output CreateSCTAssay <- function(vst.out, do.correct.umi, residual.type, clip.range){ residual.type <- vst.out[['residual_type']] %||% 'pearson' sct.method <- vst.out[['sct.method']] assay.out <- CreateAssayObject(counts = vst.out$umi_corrected) - # create output assay and put (corrected) umi counts in count slot - # if (do.correct.umi & residual.type == 'pearson') { - # assay.out <- CreateAssayObject(counts = vst.out$umi_corrected) - # vst.out$umi_corrected <- NULL - # } else { - # assay.out <- CreateAssayObject(counts = counts.chunk) - # } # set the variable genes VariableFeatures(object = assay.out) <- vst.out$variable_features # put log1p transformed counts in data @@ -2152,7 +2142,7 @@ FetchResidualSCTModel <- function(object, return(new_residual) } -#'@importFrom sctransform get_residual +#' @importFrom sctransform get_residuals GetResidualsChunked <- function(vst_out, layer.counts, residual_type, min_variance, res_clip_range, verbose, chunk_size=5000) { if (inherits(x = layer.counts, what = 'V3Matrix')) { residuals <- get_residuals( diff --git a/man/AverageExpression.Rd b/man/AverageExpression.Rd index f4b2f8ff9..6779cb331 100644 --- a/man/AverageExpression.Rd +++ b/man/AverageExpression.Rd @@ -11,7 +11,8 @@ AverageExpression( return.seurat = FALSE, group.by = "ident", add.ident = NULL, - slot = "counts", + layer = "data", + slot = deprecated(), method = "average", verbose = TRUE, ... diff --git a/man/CCAIntegration.Rd b/man/CCAIntegration.Rd index 1272a2605..3f5af22a4 100644 --- a/man/CCAIntegration.Rd +++ b/man/CCAIntegration.Rd @@ -94,3 +94,31 @@ integration.} \description{ Seurat-CCA Integration } +\examples{ +\dontrun{ +# Preprocessing +obj <- SeuratData::LoadData("pbmcsca") +obj[["RNA"]] <- split(obj[["RNA"]], f = obj$Method) +obj <- NormalizeData(obj) +obj <- FindVariableFeatures(obj) +obj <- ScaleData(obj) +obj <- RunPCA(obj) + +# After preprocessing, we integrate layers. +obj <- IntegrateLayers(object = obj, method = CCAIntegration, + orig.reduction = "pca", new.reduction = 'integrated.cca', + verbose = FALSE) + +# Modifying parameters +# We can also specify parameters such as `k.anchor` to increase the strength of integration +obj <- IntegrateLayers(object = obj, method = CCAIntegration, + orig.reduction = "pca", new.reduction = 'integrated.cca', + k.anchor = 20, verbose = FALSE) + +# Integrating SCTransformed data +obj <- SCTransform(object = obj) +obj <- IntegrateLayers(object = obj, method = CCAIntegration, + orig.reduction = "pca", new.reduction = 'integrated.cca', + assay = "SCT", verbose = FALSE) +} +} diff --git a/man/FetchResiduals.Rd b/man/FetchResiduals.Rd index c11af3ec1..81536048f 100644 --- a/man/FetchResiduals.Rd +++ b/man/FetchResiduals.Rd @@ -32,10 +32,6 @@ and the default is RNA} \item{clip.range}{Numeric of length two specifying the min and max values the Pearson residual will be clipped to} -\item{reference.SCT.model}{reference.SCT.model If a reference SCT model should be used -for calculating the residuals. When set to not NULL, ignores the `SCTModel` -paramater.} - \item{replace.value}{Recalculate residuals for all features, even if they are already present. Useful if you want to change the clip.range.} diff --git a/man/HarmonyIntegration.Rd b/man/HarmonyIntegration.Rd index f1d21e518..435ea509a 100644 --- a/man/HarmonyIntegration.Rd +++ b/man/HarmonyIntegration.Rd @@ -10,6 +10,7 @@ HarmonyIntegration( groups, features = NULL, scale.layer = "scale.data", + new.reduction = "harmony", layers = NULL, npcs = 50L, key = "harmony_", @@ -41,35 +42,46 @@ should be called \code{group}} \item{layers}{Ignored} -<<<<<<< Updated upstream -======= -\item{npcs}{If doing PCA on input matrix, number of PCs to compute} +\item{npcs}{If doing PCA on input matrix, number of PCs to compute.} -\item{key}{Key for Harmony dimensional reduction} +\item{theta}{Diversity clustering penalty parameter. Specify for each +variable in vars_use Default theta=2. theta=0 does not encourage any +diversity. Larger values of theta result in more diverse clusters.} -\item{theta}{Diversity clustering penalty parameter} +\item{lambda}{Ridge regression penalty parameter. Specify for each variable + in vars_use. +Default lambda=1. Lambda must be strictly positive. Smaller values result +in more aggressive correction.} -\item{lambda}{Ridge regression penalty parameter} +\item{sigma}{Width of soft kmeans clusters. Default sigma=0.1. Sigma scales +the distance from a cell to cluster centroids. Larger values of sigma +result in cells assigned to more clusters. Smaller values of sigma make +soft kmeans cluster approach hard clustering.} -\item{sigma}{Width of soft kmeans clusters} +\item{nclust}{Number of clusters in model. nclust=1 equivalent to simple +linear regression.} -\item{nclust}{Number of clusters in model} +\item{tau}{Protection against overclustering small datasets with large ones. +tau is the expected number of cells per cluster.} -\item{tau}{Protection against overclustering small datasets with large ones} +\item{block.size}{What proportion of cells to update during clustering. +Between 0 to 1, default 0.05. Larger values may be faster but less accurate} -\item{block.size}{What proportion of cells to update during clustering} +\item{max.iter.harmony}{Maximum number of rounds to run Harmony. One round +of Harmony involves one clustering and one correction step.} -\item{max.iter.harmony}{Maximum number of rounds to run Harmony} +\item{max.iter.cluster}{Maximum number of rounds to run clustering at each +round of Harmony.} -\item{max.iter.cluster}{Maximum number of rounds to run clustering at each round of Harmony} +\item{epsilon.cluster}{Convergence tolerance for clustering round of +Harmony. Set to -Inf to never stop early.} -\item{epsilon.cluster}{Convergence tolerance for clustering round of Harmony} +\item{epsilon.harmony}{Convergence tolerance for Harmony. Set to -Inf to +never stop early.} -\item{epsilon.harmony}{Convergence tolerance for Harmony} +\item{verbose}{Whether to print progress messages. TRUE to print, +FALSE to suppress.} -\item{verbose}{Whether to print progress messages. TRUE to print, FALSE to suppress} - ->>>>>>> Stashed changes \item{...}{Ignored} } \value{ @@ -98,17 +110,17 @@ obj <- IntegrateLayers(object = obj, method = HarmonyIntegration, orig.reduction new.reduction = 'harmony', verbose = FALSE) # Modifying Parameters -# We can also add arguments specific to Harmony such as theta, to give more diverse clusters +# We can also add arguments specific to Harmony such as theta, to give more diverse clusters obj <- IntegrateLayers(object = obj, method = HarmonyIntegration, orig.reduction = "pca", new.reduction = 'harmony', verbose = FALSE, theta = 3) } # Integrating SCTransformed data obj <- SCTransform(object = obj) -obj <- IntegrateLayers(object = obj, method = HarmonyIntegration, - orig.reduction = "pca", new.reduction = 'harmony', +obj <- IntegrateLayers(object = obj, method = HarmonyIntegration, + orig.reduction = "pca", new.reduction = 'harmony', assay = "SCT", verbose = FALSE) - + } \seealso{ diff --git a/man/Load10X_Spatial.Rd b/man/Load10X_Spatial.Rd index 84c8c0ec8..763a1d0ef 100644 --- a/man/Load10X_Spatial.Rd +++ b/man/Load10X_Spatial.Rd @@ -11,6 +11,7 @@ Load10X_Spatial( slice = "slice1", filter.matrix = TRUE, to.upper = FALSE, + image = NULL, ... ) } diff --git a/man/ProjectIntegration.Rd b/man/ProjectIntegration.Rd index c61c4648c..f4a5cc2cc 100644 --- a/man/ProjectIntegration.Rd +++ b/man/ProjectIntegration.Rd @@ -50,11 +50,6 @@ for all cells (default is 'sketch'). Can be one of: \item{ratio}{Sketch ratio of data slot when \code{dictionary.method} is set to \dQuote{\code{sketch}}; defaults to 0.8} -\item{sketched.layers}{Names of sketched layers, defaults to all -layers of \dQuote{\code{object[[assay]]}}} - -\item{seed}{A positive integer. The seed for the random number generator, defaults to 123.} - \item{verbose}{Print progress and message} } \value{ diff --git a/man/RPCAIntegration.Rd b/man/RPCAIntegration.Rd index 86774c8a3..81ce69b3d 100644 --- a/man/RPCAIntegration.Rd +++ b/man/RPCAIntegration.Rd @@ -105,28 +105,28 @@ obj <- ScaleData(obj) obj <- RunPCA(obj) # After preprocessing, we run integration -obj <- IntegrateLayers(object = obj, method = RPCAIntegration, - orig.reduction = "pca", new.reduction = 'integrated.rpca', +obj <- IntegrateLayers(object = obj, method = RPCAIntegration, + orig.reduction = "pca", new.reduction = 'integrated.rpca', verbose = FALSE) - + # Reference-based Integration # Here, we use the first layer as a reference for integraion # Thus, we only identify anchors between the reference and the rest of the datasets, saving computational resources -obj <- IntegrateLayers(object = obj, method = RPCAIntegration, - orig.reduction = "pca", new.reduction = 'integrated.rpca', +obj <- IntegrateLayers(object = obj, method = RPCAIntegration, + orig.reduction = "pca", new.reduction = 'integrated.rpca', reference = 1, verbose = FALSE) # Modifying parameters -# We can also specify parameters such as `k.anchor` to increase the strength of integration -obj <- IntegrateLayers(object = obj, method = RPCAIntegration, - orig.reduction = "pca", new.reduction = 'integrated.rpca', +# We can also specify parameters such as `k.anchor` to increase the strength of integration +obj <- IntegrateLayers(object = obj, method = RPCAIntegration, + orig.reduction = "pca", new.reduction = 'integrated.rpca', k.anchor = 20, verbose = FALSE) # Integrating SCTransformed data obj <- SCTransform(object = obj) -obj <- IntegrateLayers(object = obj, method = RPCAIntegration, - orig.reduction = "pca", new.reduction = 'integrated.rpca', +obj <- IntegrateLayers(object = obj, method = RPCAIntegration, + orig.reduction = "pca", new.reduction = 'integrated.rpca', assay = "SCT", verbose = FALSE) } - + } diff --git a/man/SCTransform.IterableMatrix.Rd b/man/SCTransform.IterableMatrix.Rd deleted file mode 100644 index e8fbff165..000000000 --- a/man/SCTransform.IterableMatrix.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/preprocessing5.R -\name{SCTransform.IterableMatrix} -\alias{SCTransform.IterableMatrix} -\title{LayerData LayerData<-} -\usage{ -\method{SCTransform}{IterableMatrix}( - object, - cell.attr = NULL, - reference.SCT.model = NULL, - do.correct.umi = TRUE, - ncells = 5000, - residual.features = NULL, - variable.features.n = 3000, - variable.features.rv.th = 1.3, - vars.to.regress = NULL, - do.scale = FALSE, - do.center = TRUE, - clip.range = c(-sqrt(x = ncol(x = object)/30), sqrt(x = ncol(x = object)/30)), - conserve.memory = FALSE, - return.only.var.genes = TRUE, - seed.use = 1448145, - verbose = TRUE, - ... -) -} -\description{ -LayerData LayerData<- -} diff --git a/man/SCTransform.Rd b/man/SCTransform.Rd index c4cd0a40b..724bc13f1 100644 --- a/man/SCTransform.Rd +++ b/man/SCTransform.Rd @@ -1,10 +1,12 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/generics.R, R/preprocessing.R +% Please edit documentation in R/generics.R, R/preprocessing.R, +% R/preprocessing5.R \name{SCTransform} \alias{SCTransform} \alias{SCTransform.default} \alias{SCTransform.Assay} \alias{SCTransform.Seurat} +\alias{SCTransform.IterableMatrix} \title{Perform sctransform-based normalization} \usage{ SCTransform(object, ...) @@ -53,7 +55,7 @@ SCTransform(object, ...) \method{SCTransform}{Seurat}( object, - assay = NULL, + assay = "RNA", new.assay.name = "SCT", reference.SCT.model = NULL, do.correct.umi = TRUE, @@ -73,6 +75,27 @@ SCTransform(object, ...) verbose = TRUE, ... ) + +\method{SCTransform}{IterableMatrix}( + object, + cell.attr, + reference.SCT.model = NULL, + do.correct.umi = TRUE, + ncells = 5000, + residual.features = NULL, + variable.features.n = 3000, + variable.features.rv.th = 1.3, + vars.to.regress = NULL, + do.scale = FALSE, + do.center = TRUE, + clip.range = c(-sqrt(x = ncol(x = object)/30), sqrt(x = ncol(x = object)/30)), + vst.flavor = "v2", + conserve.memory = FALSE, + return.only.var.genes = TRUE, + seed.use = 1448145, + verbose = TRUE, + ... +) } \arguments{ \item{object}{UMI counts matrix} diff --git a/man/SelectIntegrationFeatures5.Rd b/man/SelectIntegrationFeatures5.Rd deleted file mode 100644 index f97aacf8a..000000000 --- a/man/SelectIntegrationFeatures5.Rd +++ /dev/null @@ -1,42 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/integration.R -\name{SelectIntegrationFeatures5} -\alias{SelectIntegrationFeatures5} -\title{Select integration features} -\usage{ -SelectIntegrationFeatures5( - object, - nfeatures = 2000, - assay = NULL, - method = NULL, - layers = NULL, - verbose = TRUE, - ... -) -} -\arguments{ -\item{object}{Seurat object} - -\item{nfeatures}{Number of features to return for integration} - -\item{assay}{Name of assay to use for integration feature selection} - -\item{method}{Which method to pull. For \code{HVFInfo} and -\code{VariableFeatures}, choose one from one of the -following: -\itemize{ - \item \dQuote{vst} - \item \dQuote{sctransform} or \dQuote{sct} - \item \dQuote{mean.var.plot}, \dQuote{dispersion}, \dQuote{mvp}, or - \dQuote{disp} -}} - -\item{layers}{Name of layers to use for integration feature selection} - -\item{verbose}{Print messages} - -\item{...}{Arguments passed on to \code{method}} -} -\description{ -Select integration features -} diff --git a/man/SelectSCTIntegrationFeatures.Rd b/man/SelectSCTIntegrationFeatures.Rd deleted file mode 100644 index 4c933c198..000000000 --- a/man/SelectSCTIntegrationFeatures.Rd +++ /dev/null @@ -1,28 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/integration.R -\name{SelectSCTIntegrationFeatures} -\alias{SelectSCTIntegrationFeatures} -\title{Select SCT integration features} -\usage{ -SelectSCTIntegrationFeatures( - object, - nfeatures = 3000, - assay = NULL, - verbose = TRUE, - ... -) -} -\arguments{ -\item{object}{Seurat object} - -\item{nfeatures}{Number of features to return for integration} - -\item{assay}{Name of assay to use for integration feature selection} - -\item{verbose}{Print messages} - -\item{...}{Arguments passed on to \code{method}} -} -\description{ -Select SCT integration features -} diff --git a/man/reexports.Rd b/man/reexports.Rd index 08ced67a3..fa7258d42 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -71,8 +71,8 @@ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ - \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}} - \item{generics}{\code{\link[generics]{components}}} + + \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}} }} diff --git a/src/Makevars b/src/Makevars index a7f35101d..e9d976546 100644 --- a/src/Makevars +++ b/src/Makevars @@ -1 +1 @@ -CXX_STD = CXX11 +CXX_STD = CXX17 From 7ba5b9876e105a3c7fcc31abad148c593bc75326 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Sun, 24 Sep 2023 21:34:39 -0400 Subject: [PATCH 765/979] bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4b96bd2a9..d833f460a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9062 -Date: 2023-09-23 +Version: 4.9.9.9063 +Date: 2023-09-24 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From ec81f4aff07c6ff4b8b191dd7cded5784bee2b4f Mon Sep 17 00:00:00 2001 From: Gesmira Date: Mon, 25 Sep 2023 14:13:05 -0400 Subject: [PATCH 766/979] fix variablefeatureplot --- R/visualization.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/visualization.R b/R/visualization.R index 61c2cf303..220338e92 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -2091,7 +2091,7 @@ VariableFeaturePlot <- function( hvf.info <- HVFInfo( object = object, assay = assay, - selection.method = selection.method, + method = selection.method, status = TRUE ) status.col <- colnames(hvf.info)[grepl("variable", colnames(hvf.info))][[1]] From 83643e45ad990ce63ecbfb40ac48a7b27fe2ebc9 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Mon, 25 Sep 2023 15:32:13 -0400 Subject: [PATCH 767/979] adding rank variable --- R/preprocessing5.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 4acdfb8fa..0a32bb6c0 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -161,6 +161,7 @@ FindVariableFeatures.StdAssay <- function( object[colnames(x = hvf.info)] <- hvf.info } object@meta.data$var.features <- NULL + object@meta.data$var.features.rank <- NULL VariableFeatures(object = object) <- VariableFeatures(object = object, nfeatures = nselect, method = key) return(object) } From d82855845c0495f782024385ab0d0cff7c2fb6fe Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Mon, 25 Sep 2023 16:28:48 -0400 Subject: [PATCH 768/979] change default layer to slot with AggregateExpression, use NormalizeData --- R/utilities.R | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index 658044a59..7e05cf954 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -364,7 +364,6 @@ AggregateExpression <- function( return.seurat = FALSE, group.by = 'ident', add.ident = NULL, - slot = 'data', verbose = TRUE, ... ) { @@ -377,7 +376,7 @@ AggregateExpression <- function( return.seurat = return.seurat, group.by = group.by, add.ident = add.ident, - slot = slot, + slot = "counts", verbose = verbose, ... ) @@ -543,7 +542,7 @@ AverageExpression <- function( ) LayerData(object = toRet, layer = "data", - assay = names(x = data.return)[1]) <- log1p(x = as.matrix(x = data.return[[1]])) + assay = names(x = data.return)[1]) <- NormalizeData(as.matrix(x = data.return[[1]])) } #for multimodal data if (length(x = data.return) > 1) { @@ -564,15 +563,8 @@ AverageExpression <- function( toRet[[names(x = data.return)[i]]] <- CreateAssayObject(counts = data.return[[i]], check.matrix = FALSE) LayerData(object = toRet, layer = "data", - assay = names(x = data.return)[i]) <- log1p(x = as.matrix(x = data.return[[i]])) - toRet <- SetAssayData( - object = toRet, - assay = names(x = data.return)[i], - layer = "data", - new.data = log1p(x = as.matrix(x = data.return[[i]])) - ) + assay = names(x = data.return)[i]) <- NormalizeData(as.matrix(x = data.return[[i]])) } - } } if (DefaultAssay(object = object) %in% names(x = data.return)) { From 7e54d340ffb648a8f1fa482a0d7da60fca950f7b Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Mon, 25 Sep 2023 18:01:50 -0400 Subject: [PATCH 769/979] Document --- NAMESPACE | 2 ++ man/CCAIntegration.Rd | 4 +-- man/FetchResiduals.Rd | 4 +++ man/FetchResiduals_reference.Rd | 21 ++------------ man/HarmonyIntegration.Rd | 28 ++++++++++++++++++- man/JointPCAIntegration.Rd | 43 +++++++++++++++++++++++++++-- man/LeverageScore.Rd | 13 --------- man/ProjectIntegration.Rd | 5 ++++ man/RPCAIntegration.Rd | 42 ++++++++++++++++++++++++++-- man/SelectIntegrationFeatures5.Rd | 2 ++ man/SelectSCTIntegrationFeatures.Rd | 2 ++ man/reexports.Rd | 4 +-- 12 files changed, 129 insertions(+), 41 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index dd6aef2d8..56e5d89a4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -396,6 +396,8 @@ export(ScaleData) export(ScaleFactors) export(ScoreJackStraw) export(SelectIntegrationFeatures) +export(SelectIntegrationFeatures5) +export(SelectSCTIntegrationFeatures) export(SetAssayData) export(SetIdent) export(SetIntegrationData) diff --git a/man/CCAIntegration.Rd b/man/CCAIntegration.Rd index cecc28f65..65af0f2e9 100644 --- a/man/CCAIntegration.Rd +++ b/man/CCAIntegration.Rd @@ -53,7 +53,6 @@ or SCT} \item{scale.layer}{Name of scaled layer in \code{Assay}} - \item{dims.to.integrate}{Number of dimensions to return integrated values for} \item{k.weight}{Number of neighbors to consider when weighting anchors} @@ -93,7 +92,7 @@ If NULL, the sample tree will be computed automatically.} \item{preserve.order}{Do not reorder objects based on size for each pairwise integration.} -\item{verbose}{Print progress bars and output} +\item{verbose}{Print progress} \item{...}{Arguments passed on to \code{FindIntegrationAnchors}} } @@ -127,4 +126,5 @@ obj <- IntegrateLayers(object = obj, method = CCAIntegration, orig.reduction = "pca", new.reduction = "integrated.cca", assay = "SCT", verbose = FALSE) } + } diff --git a/man/FetchResiduals.Rd b/man/FetchResiduals.Rd index 81536048f..c11af3ec1 100644 --- a/man/FetchResiduals.Rd +++ b/man/FetchResiduals.Rd @@ -32,6 +32,10 @@ and the default is RNA} \item{clip.range}{Numeric of length two specifying the min and max values the Pearson residual will be clipped to} +\item{reference.SCT.model}{reference.SCT.model If a reference SCT model should be used +for calculating the residuals. When set to not NULL, ignores the `SCTModel` +paramater.} + \item{replace.value}{Recalculate residuals for all features, even if they are already present. Useful if you want to change the clip.range.} diff --git a/man/FetchResiduals_reference.Rd b/man/FetchResiduals_reference.Rd index fc7a42144..54e9ca09f 100644 --- a/man/FetchResiduals_reference.Rd +++ b/man/FetchResiduals_reference.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/preprocessing5.R \name{FetchResiduals_reference} \alias{FetchResiduals_reference} -\title{Calculate pearson residuals of features not in the scale.data} +\title{temporal function to get residuals from reference} \usage{ FetchResiduals_reference( object, @@ -12,23 +12,6 @@ FetchResiduals_reference( verbose = FALSE ) } -\arguments{ -\item{object}{A seurat object} - -\item{reference.SCT.model}{SCTModel object to use for calculating residuals} - -\item{features}{Name of features to add into the scale.data} - -\item{nCount_UMI}{UMI Counts per cell provided to vst} - -\item{verbose}{Whether to print messages and progress bars} -} -\value{ -Returns a Seurat object containing Pearson residuals of added -features in its scale.data - -temporal function to get residuals from reference -} \description{ -This function calls sctransform::get_residuals. +temporal function to get residuals from reference } diff --git a/man/HarmonyIntegration.Rd b/man/HarmonyIntegration.Rd index e10d17f41..215eed868 100644 --- a/man/HarmonyIntegration.Rd +++ b/man/HarmonyIntegration.Rd @@ -42,8 +42,32 @@ should be called \code{group}} \item{layers}{Ignored} +\item{npcs}{If doing PCA on input matrix, number of PCs to compute} + \item{key}{Key for Harmony dimensional reduction} +\item{theta}{Diversity clustering penalty parameter} + +\item{lambda}{Ridge regression penalty parameter} + +\item{sigma}{Width of soft kmeans clusters} + +\item{nclust}{Number of clusters in model} + +\item{tau}{Protection against overclustering small datasets with large ones} + +\item{block.size}{What proportion of cells to update during clustering} + +\item{max.iter.harmony}{Maximum number of rounds to run Harmony} + +\item{max.iter.cluster}{Maximum number of rounds to run clustering at each round of Harmony} + +\item{epsilon.cluster}{Convergence tolerance for clustering round of Harmony} + +\item{epsilon.harmony}{Convergence tolerance for Harmony} + +\item{verbose}{Whether to print progress messages. TRUE to print, FALSE to suppress} + \item{...}{Ignored} } \value{ @@ -75,13 +99,15 @@ obj <- IntegrateLayers(object = obj, method = HarmonyIntegration, orig.reduction # We can also add arguments specific to Harmony such as theta, to give more diverse clusters obj <- IntegrateLayers(object = obj, method = HarmonyIntegration, orig.reduction = "pca", new.reduction = 'harmony', verbose = FALSE, theta = 3) - # Integrating SCTransformed data obj <- SCTransform(object = obj) obj <- IntegrateLayers(object = obj, method = HarmonyIntegration, orig.reduction = "pca", new.reduction = 'harmony', assay = "SCT", verbose = FALSE) } + + +} \seealso{ \code{\link[harmony:HarmonyMatrix]{harmony::HarmonyMatrix}()} } diff --git a/man/JointPCAIntegration.Rd b/man/JointPCAIntegration.Rd index 0d3803729..56546d092 100644 --- a/man/JointPCAIntegration.Rd +++ b/man/JointPCAIntegration.Rd @@ -51,11 +51,50 @@ or SCT} \item{scale.layer}{Name of scaled layer in \code{Assay}} -\item{verbose}{Print progress} +\item{dims.to.integrate}{Number of dimensions to return integrated values for} + +\item{k.weight}{Number of neighbors to consider when weighting anchors} + +\item{weight.reduction}{Dimension reduction to use when calculating anchor +weights. This can be one of: +\itemize{ + \item{A string, specifying the name of a dimension reduction present in + all objects to be integrated} + \item{A vector of strings, specifying the name of a dimension reduction to + use for each object to be integrated} + \item{A vector of \code{\link{DimReduc}} objects, specifying the object to + use for each object in the integration} + \item{NULL, in which case the full corrected space is used for computing + anchor weights.} +}} + +\item{sd.weight}{Controls the bandwidth of the Gaussian kernel for weighting} + +\item{sample.tree}{Specify the order of integration. Order of integration +should be encoded in a matrix, where each row represents one of the pairwise +integration steps. Negative numbers specify a dataset, positive numbers +specify the integration results from a given row (the format of the merge +matrix included in the \code{\link{hclust}} function output). For example: +\code{matrix(c(-2, 1, -3, -1), ncol = 2)} gives: + +\if{html}{\out{
    }}\preformatted{ [,1] [,2] + [1,] -2 -3 + [2,] 1 -1 +}\if{html}{\out{
    }} + +Which would cause dataset 2 and 3 to be integrated first, then the resulting +object integrated with dataset 1. + +If NULL, the sample tree will be computed automatically.} + +\item{preserve.order}{Do not reorder objects based on size for each pairwise +integration.} \item{groups}{A one-column data frame with grouping information} -\item{...}{Additional arguments passed to \code{FindIntegrationAnchors}} +\item{verbose}{Print progress} + +\item{...}{Arguments passed on to \code{FindIntegrationAnchors}} } \description{ Seurat-Joint PCA Integration diff --git a/man/LeverageScore.Rd b/man/LeverageScore.Rd index 98aefd5c0..c00ad991f 100644 --- a/man/LeverageScore.Rd +++ b/man/LeverageScore.Rd @@ -3,7 +3,6 @@ \name{LeverageScore} \alias{LeverageScore} \alias{LeverageScore.default} -\alias{LeverageScore.DelayedMatrix} \alias{LeverageScore.StdAssay} \alias{LeverageScore.Assay} \alias{LeverageScore.Seurat} @@ -22,18 +21,6 @@ LeverageScore(object, ...) ... ) -\method{LeverageScore}{DelayedMatrix}( - object, - nsketch = 5000L, - ndims = NULL, - method = CountSketch, - eps = 0.5, - seed = 123L, - block.size = 1e+08, - verbose = TRUE, - ... -) - \method{LeverageScore}{StdAssay}( object, nsketch = 5000L, diff --git a/man/ProjectIntegration.Rd b/man/ProjectIntegration.Rd index f4a5cc2cc..c61c4648c 100644 --- a/man/ProjectIntegration.Rd +++ b/man/ProjectIntegration.Rd @@ -50,6 +50,11 @@ for all cells (default is 'sketch'). Can be one of: \item{ratio}{Sketch ratio of data slot when \code{dictionary.method} is set to \dQuote{\code{sketch}}; defaults to 0.8} +\item{sketched.layers}{Names of sketched layers, defaults to all +layers of \dQuote{\code{object[[assay]]}}} + +\item{seed}{A positive integer. The seed for the random number generator, defaults to 123.} + \item{verbose}{Print progress and message} } \value{ diff --git a/man/RPCAIntegration.Rd b/man/RPCAIntegration.Rd index 8b551fdcd..a8390a863 100644 --- a/man/RPCAIntegration.Rd +++ b/man/RPCAIntegration.Rd @@ -53,10 +53,48 @@ or SCT} \item{groups}{A one-column data frame with grouping information} -\item{verbose}{Print progress} +\item{dims.to.integrate}{Number of dimensions to return integrated values for} + +\item{k.weight}{Number of neighbors to consider when weighting anchors} + +\item{weight.reduction}{Dimension reduction to use when calculating anchor +weights. This can be one of: +\itemize{ + \item{A string, specifying the name of a dimension reduction present in + all objects to be integrated} + \item{A vector of strings, specifying the name of a dimension reduction to + use for each object to be integrated} + \item{A vector of \code{\link{DimReduc}} objects, specifying the object to + use for each object in the integration} + \item{NULL, in which case the full corrected space is used for computing + anchor weights.} +}} + +\item{sd.weight}{Controls the bandwidth of the Gaussian kernel for weighting} + +\item{sample.tree}{Specify the order of integration. Order of integration +should be encoded in a matrix, where each row represents one of the pairwise +integration steps. Negative numbers specify a dataset, positive numbers +specify the integration results from a given row (the format of the merge +matrix included in the \code{\link{hclust}} function output). For example: +\code{matrix(c(-2, 1, -3, -1), ncol = 2)} gives: -\item{...}{Additional arguments passed to \code{FindIntegrationAnchors}} +\if{html}{\out{
    }}\preformatted{ [,1] [,2] + [1,] -2 -3 + [2,] 1 -1 +}\if{html}{\out{
    }} + +Which would cause dataset 2 and 3 to be integrated first, then the resulting +object integrated with dataset 1. + +If NULL, the sample tree will be computed automatically.} + +\item{preserve.order}{Do not reorder objects based on size for each pairwise +integration.} + +\item{verbose}{Print progress} +\item{...}{Arguments passed on to \code{FindIntegrationAnchors}} } \description{ Seurat-RPCA Integration diff --git a/man/SelectIntegrationFeatures5.Rd b/man/SelectIntegrationFeatures5.Rd index 334d7c9c7..f97aacf8a 100644 --- a/man/SelectIntegrationFeatures5.Rd +++ b/man/SelectIntegrationFeatures5.Rd @@ -34,6 +34,8 @@ following: \item{layers}{Name of layers to use for integration feature selection} \item{verbose}{Print messages} + +\item{...}{Arguments passed on to \code{method}} } \description{ Select integration features diff --git a/man/SelectSCTIntegrationFeatures.Rd b/man/SelectSCTIntegrationFeatures.Rd index fc6da196b..4c933c198 100644 --- a/man/SelectSCTIntegrationFeatures.Rd +++ b/man/SelectSCTIntegrationFeatures.Rd @@ -20,6 +20,8 @@ SelectSCTIntegrationFeatures( \item{assay}{Name of assay to use for integration feature selection} \item{verbose}{Print messages} + +\item{...}{Arguments passed on to \code{method}} } \description{ Select SCT integration features diff --git a/man/reexports.Rd b/man/reexports.Rd index fa7258d42..08ced67a3 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -71,8 +71,8 @@ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ - \item{generics}{\code{\link[generics]{components}}} + \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}} - \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}} + \item{generics}{\code{\link[generics]{components}}} }} From c1a2b7f1f212c3a35f1961c5454562e602c86276 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Mon, 25 Sep 2023 18:18:45 -0400 Subject: [PATCH 770/979] Change dimreduc to use SetAssayData --- R/dimensional_reduction.R | 4 ++-- man/reexports.Rd | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R index fa5eb8665..bab2f49cd 100644 --- a/R/dimensional_reduction.R +++ b/R/dimensional_reduction.R @@ -649,8 +649,8 @@ RunCCA.Seurat <- function( warning("Some cells removed after object merge due to minimum feature count cutoff") } combined.scale <- cbind(data1,data2) - # combined.object <- SetAssayData(object = combined.object,new.data = combined.scale, slot = "scale.data") - combined.object@assays$ToIntegrate@scale.data <- combined.scale + combined.object <- SetAssayData(object = combined.object,new.data = combined.scale, slot = "scale.data") + ## combined.object@assays$ToIntegrate@scale.data <- combined.scale if (renormalize) { combined.object <- NormalizeData( object = combined.object, diff --git a/man/reexports.Rd b/man/reexports.Rd index 08ced67a3..fa7258d42 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -71,8 +71,8 @@ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ - \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}} - \item{generics}{\code{\link[generics]{components}}} + + \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}} }} From 7aca7bb43a6d10def9fbc0b868709da8446e7fab Mon Sep 17 00:00:00 2001 From: gesmira <59940281+Gesmira@users.noreply.github.com> Date: Mon, 25 Sep 2023 19:02:35 -0400 Subject: [PATCH 771/979] bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 84082a5e7..0e30f24eb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9063 -Date: 2023-09-24 +Version: 4.9.9.9064 +Date: 2023-09-25 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From 649e6734c678386132ba3811689e17d3203b9808 Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Tue, 26 Sep 2023 14:30:55 -0400 Subject: [PATCH 772/979] minor fix in new v5 test --- tests/testthat/test_preprocessing.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test_preprocessing.R b/tests/testthat/test_preprocessing.R index 0f46d9e6a..8289f769a 100644 --- a/tests/testthat/test_preprocessing.R +++ b/tests/testthat/test_preprocessing.R @@ -105,7 +105,7 @@ if(class(object[['RNA']]) == "Assay5") { fake.groups <- c(rep(1, floor(ncol(pbmc.test)/2)), rep(2, ncol(pbmc.test) - (floor(ncol(pbmc.test)/2))) ) object$groups <- fake.groups - object.split[["RNA"]] <- split(object[["RNA"]], f = object$groups) + object.split <- CreateSeuratObject(split(object[["RNA"]], f = object$groups)) object.split <- NormalizeData(object = object.split) group1 <- subset(object, groups==1) From 9a37c2bcb45d507b8b5f87e614198efa6396e316 Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Tue, 26 Sep 2023 16:02:52 -0400 Subject: [PATCH 773/979] add ... to NormalizeData --- R/utilities.R | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index 7e05cf954..6f52dfc0a 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -433,7 +433,7 @@ AverageExpression <- function( verbose = TRUE, ... ) { - CheckDots(..., fxns = 'CreateSeuratObject') + #CheckDots(..., fxns = 'CreateSeuratObject') if (!is.null(x = add.ident)) { .Deprecated(msg = "'add.ident' is a deprecated argument, please use the 'group.by' argument instead") group.by <- c('ident', add.ident) @@ -456,6 +456,11 @@ AverageExpression <- function( ) layer <- slot } + + if (method =="average") { + message("Starting with Seurat v5, recommend users swtich to AggregateExpression rather than AverageExpression") + } + object.assays <- FilterObjects(object = object, classes.keep = c('Assay', 'Assay5')) assays <- assays %||% object.assays if (!all(assays %in% object.assays)) { @@ -542,7 +547,7 @@ AverageExpression <- function( ) LayerData(object = toRet, layer = "data", - assay = names(x = data.return)[1]) <- NormalizeData(as.matrix(x = data.return[[1]])) + assay = names(x = data.return)[1]) <- NormalizeData(as.matrix(x = data.return[[1]]), ...) } #for multimodal data if (length(x = data.return) > 1) { @@ -563,7 +568,7 @@ AverageExpression <- function( toRet[[names(x = data.return)[i]]] <- CreateAssayObject(counts = data.return[[i]], check.matrix = FALSE) LayerData(object = toRet, layer = "data", - assay = names(x = data.return)[i]) <- NormalizeData(as.matrix(x = data.return[[i]])) + assay = names(x = data.return)[i]) <- NormalizeData(as.matrix(x = data.return[[i]]), ...) } } } From 616b0cfcd5129abfad5eada10d9eab3909cd0e49 Mon Sep 17 00:00:00 2001 From: zskylarli Date: Tue, 26 Sep 2023 16:14:19 -0400 Subject: [PATCH 774/979] fixed integratedata with v5 assays bug --- R/integration.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/integration.R b/R/integration.R index 83e3ebc91..702124662 100644 --- a/R/integration.R +++ b/R/integration.R @@ -4223,6 +4223,7 @@ FindIntegrationMatrix <- function( neighbors <- GetIntegrationData(object = object, integration.name = integration.name, slot = 'neighbors') nn.cells1 <- neighbors$cells1 nn.cells2 <- neighbors$cells2 + object <- JoinLayers(object) anchors <- GetIntegrationData( object = object, integration.name = integration.name, From 605c4791cac00728b8469eb5a3813220befc4857 Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Tue, 26 Sep 2023 16:44:24 -0400 Subject: [PATCH 775/979] small fix for PseudobulkExpression --- R/utilities.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/utilities.R b/R/utilities.R index 64a583dce..951afd0d7 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -1387,7 +1387,7 @@ PseudobulkExpression.Assay <- function( } data.use <- GetAssayData( object = object, - slot = slot + layer = layer ) features.to.avg <- features %||% rownames(x = data.use) if (IsMatrixEmpty(x = data.use)) { From e6338bcd157fac375e2e71041b8d78b6f1bc32fa Mon Sep 17 00:00:00 2001 From: zskylarli Date: Tue, 26 Sep 2023 16:50:06 -0400 Subject: [PATCH 776/979] added warning & skip anchor filtering for v5 assay --- R/integration.R | 98 +++++++++++++++++++++++++------------------------ 1 file changed, 51 insertions(+), 47 deletions(-) diff --git a/R/integration.R b/R/integration.R index 702124662..96d26081c 100644 --- a/R/integration.R +++ b/R/integration.R @@ -3865,57 +3865,61 @@ FilterAnchors <- function( eps = 0, verbose = TRUE ) { - if (verbose) { - message("Filtering anchors") - } - assay <- assay %||% DefaultAssay(object = object) - features <- features %||% VariableFeatures(object = object) - if (length(x = features) == 0) { - stop("No features provided and no VariableFeatures computed.") - } - features <- unique(x = features) - neighbors <- GetIntegrationData(object = object, integration.name = integration.name, slot = 'neighbors') - nn.cells1 <- neighbors$cells1 - nn.cells2 <- neighbors$cells2 - if (min(length(x = nn.cells1), length(x = nn.cells2)) < k.filter) { - warning("Number of anchor cells is less than k.filter. Retaining all anchors.") - k.filter <- min(length(x = nn.cells1), length(x = nn.cells2)) - anchors <- GetIntegrationData(object = object, integration.name = integration.name, slot = "anchors") + if (inherits(x = object[[assay[1]]], what = 'Assay5')) { + message("Anchor filtering is currently not supported with v5 assays.") } else { - cn.data1 <- L2Norm( - mat = as.matrix(x = t(x = GetAssayData( - object = object[[assay[1]]], - slot = slot)[features, nn.cells1])), - MARGIN = 1) - cn.data2 <- L2Norm( - mat = as.matrix(x = t(x = GetAssayData( - object = object[[assay[2]]], - slot = slot)[features, nn.cells2])), - MARGIN = 1) - nn <- NNHelper( - data = cn.data2[nn.cells2, ], - query = cn.data1[nn.cells1, ], - k = k.filter, - method = nn.method, - n.trees = n.trees, - eps = eps - ) - - anchors <- GetIntegrationData(object = object, integration.name = integration.name, slot = "anchors") - position <- sapply(X = 1:nrow(x = anchors), FUN = function(x) { - which(x = anchors[x, "cell2"] == Indices(object = nn)[anchors[x, "cell1"], ])[1] - }) - anchors <- anchors[!is.na(x = position), ] if (verbose) { - message("\tRetained ", nrow(x = anchors), " anchors") + message("Filtering anchors") + } + assay <- assay %||% DefaultAssay(object = object) + features <- features %||% VariableFeatures(object = object) + if (length(x = features) == 0) { + stop("No features provided and no VariableFeatures computed.") + } + features <- unique(x = features) + neighbors <- GetIntegrationData(object = object, integration.name = integration.name, slot = 'neighbors') + nn.cells1 <- neighbors$cells1 + nn.cells2 <- neighbors$cells2 + if (min(length(x = nn.cells1), length(x = nn.cells2)) < k.filter) { + warning("Number of anchor cells is less than k.filter. Retaining all anchors.") + k.filter <- min(length(x = nn.cells1), length(x = nn.cells2)) + anchors <- GetIntegrationData(object = object, integration.name = integration.name, slot = "anchors") + } else { + cn.data1 <- L2Norm( + mat = as.matrix(x = t(x = GetAssayData( + object = object[[assay[1]]], + slot = slot)[features, nn.cells1])), + MARGIN = 1) + cn.data2 <- L2Norm( + mat = as.matrix(x = t(x = GetAssayData( + object = object[[assay[2]]], + slot = slot)[features, nn.cells2])), + MARGIN = 1) + nn <- NNHelper( + data = cn.data2[nn.cells2, ], + query = cn.data1[nn.cells1, ], + k = k.filter, + method = nn.method, + n.trees = n.trees, + eps = eps + ) + + anchors <- GetIntegrationData(object = object, integration.name = integration.name, slot = "anchors") + position <- sapply(X = 1:nrow(x = anchors), FUN = function(x) { + which(x = anchors[x, "cell2"] == Indices(object = nn)[anchors[x, "cell1"], ])[1] + }) + anchors <- anchors[!is.na(x = position), ] + if (verbose) { + message("\tRetained ", nrow(x = anchors), " anchors") + } } + object <- SetIntegrationData( + object = object, + integration.name = integration.name, + slot = "anchors", + new.data = anchors + ) } - object <- SetIntegrationData( - object = object, - integration.name = integration.name, - slot = "anchors", - new.data = anchors - ) return(object) } From 803f0a1b16d1d458c54fc46851ece7989b137941 Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Tue, 26 Sep 2023 17:18:24 -0400 Subject: [PATCH 777/979] Expose NormalizeData parameters --- R/utilities.R | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index 6f52dfc0a..4ebec971c 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -430,6 +430,10 @@ AverageExpression <- function( layer = 'data', slot = deprecated(), method = 'average', + normalization.method = "LogNormalize", + scale.factor = 10000, + margin = 1, + block.size = NULL, verbose = TRUE, ... ) { @@ -547,7 +551,12 @@ AverageExpression <- function( ) LayerData(object = toRet, layer = "data", - assay = names(x = data.return)[1]) <- NormalizeData(as.matrix(x = data.return[[1]]), ...) + assay = names(x = data.return)[1]) <- NormalizeData(as.matrix(x = data.return[[i]]), + normalization.method = normalization.method, + scale.factor = scale.factor, + margin = margin, + block.size = block.size, + verbose = verbose) } #for multimodal data if (length(x = data.return) > 1) { @@ -568,7 +577,12 @@ AverageExpression <- function( toRet[[names(x = data.return)[i]]] <- CreateAssayObject(counts = data.return[[i]], check.matrix = FALSE) LayerData(object = toRet, layer = "data", - assay = names(x = data.return)[i]) <- NormalizeData(as.matrix(x = data.return[[i]]), ...) + assay = names(x = data.return)[i]) <- NormalizeData(as.matrix(x = data.return[[i]]), + normalization.method = normalization.method, + scale.factor = scale.factor, + margin = margin, + block.size = block.size, + verbose = verbose) } } } From ec41c2c027f760430be86635b5f6f99362bae254 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Wed, 27 Sep 2023 12:38:45 -0400 Subject: [PATCH 778/979] Fix projectcellembeddings.iterablematrix for multiple layers --- R/integration.R | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/R/integration.R b/R/integration.R index 83e3ebc91..2b3633a17 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5285,7 +5285,13 @@ ProjectCellEmbeddings.IterableMatrix <- function( )) } else { query <- query[features,] - reference.data <- LayerData(object = reference[[reference.assay]], layer = 'data')[features, ] + reference.data.list <- c() + for (i in Layers(object = reference[[reference.assay]], + layer = "data")) { + reference.data.list[[i]] <- LayerData(object = reference[[reference.assay]], + layer = i)[features, ] + } + reference.data <- do.call(cbind, reference.data.list) if (is.null(x = feature.mean)) { if (inherits(x = reference.data, what = 'dgCMatrix')) { feature.mean <- RowMeanSparse(mat = reference.data) From ea01d00ed215652edbbb4e10765cbbf8ca511c71 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Wed, 27 Sep 2023 14:40:58 -0400 Subject: [PATCH 779/979] fix for ProjectEmbeddings.default --- R/integration.R | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/R/integration.R b/R/integration.R index 2b3633a17..7bd64c7f9 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5196,10 +5196,13 @@ if (normalization.method == 'SCT') { nCount_UMI = nCount_UMI) } else { query <- query[features,] - reference.data <- GetAssayData( - object = reference, - assay = reference.assay, - slot = "data")[features, ] + reference.data.list <- c() + for (i in Layers(object = reference[[reference.assay]], + layer = "data")) { + reference.data.list[[i]] <- LayerData(object = reference[[reference.assay]], + layer = i)[features, ] + } + reference.data <- do.call(cbind, reference.data.list) if (is.null(x = feature.mean)) { if (inherits(x = reference.data, what = 'dgCMatrix')) { feature.mean <- RowMeanSparse(mat = reference.data) From 71fa5e678b09926912d69ba54e7becded269122a Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Wed, 27 Sep 2023 15:26:04 -0400 Subject: [PATCH 780/979] Coerce matrix to be sparse before running SCTransform --- R/preprocessing.R | 2 ++ R/preprocessing5.R | 7 ++++--- man/reexports.Rd | 4 ++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index 25af049ed..edd4d2c6d 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -3156,6 +3156,7 @@ SampleUMI <- function( #' #' @importFrom stats setNames #' @importFrom Matrix colSums +#' @importFrom SeuratObject as.sparse #' @importFrom sctransform vst get_residual_var get_residuals correct_counts #' #' @seealso \code{\link[sctransform]{correct_counts}} \code{\link[sctransform]{get_residuals}} @@ -3188,6 +3189,7 @@ SCTransform.default <- function( set.seed(seed = seed.use) } vst.args <- list(...) + object <- as.sparse(x = object) umi <- object # check for batch_var in meta data if ('batch_var' %in% names(x = vst.args)) { diff --git a/R/preprocessing5.R b/R/preprocessing5.R index ff08985b0..5915d6773 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1150,7 +1150,7 @@ CreateSCTAssay <- function(vst.out, do.correct.umi, residual.type, clip.range){ } #' @importFrom SeuratObject Cells DefaultLayer DefaultLayer<- Features -#' LayerData LayerData<- +#' LayerData LayerData<- as.sparse #' #' @method SCTransform StdAssay #' @export @@ -1385,6 +1385,7 @@ SCTransform.StdAssay <- function( layer = paste0(layer, ".", layer.name), cells = all_cells ) + layer.counts.tmp <- as.sparse(x = layer.counts.tmp) vst_out$cell_attr <- vst_out$cell_attr[, c("log_umi"), drop=FALSE] vst_out$model_pars_fit <- vst_out$model_pars_fit[variable.features.target,,drop=FALSE] new_residual <- GetResidualsChunked(vst_out = vst_out, layer.counts = layer.counts.tmp, @@ -1620,8 +1621,8 @@ FetchResidualSCTModel <- function( clip.range = NULL, replace.value = FALSE, verbose = FALSE -) { - +) { + model.cells <- character() model.features <- Features(x = object, assay = assay) if (is.null(x = reference.SCT.model)){ diff --git a/man/reexports.Rd b/man/reexports.Rd index fa7258d42..08ced67a3 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -71,8 +71,8 @@ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ - \item{generics}{\code{\link[generics]{components}}} + \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}} - \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}} + \item{generics}{\code{\link[generics]{components}}} }} From 046cec0f1afc30bea4729282ba0f7ae2ef61761a Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Wed, 27 Sep 2023 15:29:36 -0400 Subject: [PATCH 781/979] Update version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0e30f24eb..601182dc4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9064 -Date: 2023-09-25 +Version: 4.9.9.9065 +Date: 2023-09-27 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From e604455260e629db41e5f1acff19dd165ea1d726 Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Wed, 27 Sep 2023 16:14:50 -0400 Subject: [PATCH 782/979] minor fix --- R/utilities.R | 4 +--- tests/testthat/test_utilities.R | 4 ++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index 4ebec971c..120933054 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -344,8 +344,6 @@ AddModuleScore <- function( #' @param group.by Categories for grouping (e.g, ident, replicate, celltype); 'ident' by default #' @param add.ident (Deprecated) Place an additional label on each cell prior to pseudobulking #' (very useful if you want to observe cluster pseudobulk values, separated by replicate, for example) -#' @param slot Slot(s) to use; if multiple slots are given, assumed to follow -#' the order of 'assays' (if specified) or object's assays #' @param verbose Print messages and show progress bar #' @param ... Arguments to be passed to methods such as \code{\link{CreateSeuratObject}}#' #' @return Returns a matrix with genes as rows, identity classes as columns. @@ -551,7 +549,7 @@ AverageExpression <- function( ) LayerData(object = toRet, layer = "data", - assay = names(x = data.return)[1]) <- NormalizeData(as.matrix(x = data.return[[i]]), + assay = names(x = data.return)[1]) <- NormalizeData(as.matrix(x = data.return[[1]]), normalization.method = normalization.method, scale.factor = scale.factor, margin = margin, diff --git a/tests/testthat/test_utilities.R b/tests/testthat/test_utilities.R index 6f0900e76..f796e1241 100644 --- a/tests/testthat/test_utilities.R +++ b/tests/testthat/test_utilities.R @@ -95,9 +95,9 @@ test_that("AverageExpression with return.seurat", { ) # data - avg.data <- AverageExpression(object, slot = "data", return.seurat = TRUE, verbose = FALSE) + avg.data <- AverageExpression(object, layer = "data", return.seurat = TRUE, verbose = FALSE) expect_s4_class(object = avg.data, "Seurat") - avg.data.mat <- AverageExpression(object, slot = 'data')$RNA + avg.data.mat <- AverageExpression(object, layer = 'data')$RNA expect_equal(unname(as.matrix(LayerData(avg.data[["RNA"]], layer = "counts"))), unname(as.matrix(avg.data.mat))) expect_equal(unname(as.matrix(LayerData(avg.data[["RNA"]], layer = "data"))), From be9abd106ec5b29c481cecc7c235aa7c565cfada Mon Sep 17 00:00:00 2001 From: Gesmira Date: Wed, 27 Sep 2023 17:49:45 -0400 Subject: [PATCH 783/979] scale data fix --- R/preprocessing5.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 5915d6773..3cff85445 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -494,7 +494,7 @@ ScaleData.StdAssay <- function( } else { LayerData(object = object, layer = layer, features = features) } - LayerData(object = object, layer = save, features = features) <- ScaleData( + ldata <- ScaleData( object = ldata, features = features, vars.to.regress = vars.to.regress, @@ -510,6 +510,7 @@ ScaleData.StdAssay <- function( verbose = verbose, ... ) + LayerData(object = object, layer = save, features = rownames(ldata)) <- ldata } return(object) } From a4fdb2f0b0c07a2a6d31470e79364c1e903be8c5 Mon Sep 17 00:00:00 2001 From: Izzy Grabski Date: Wed, 27 Sep 2023 18:34:26 -0400 Subject: [PATCH 784/979] update FindMarkers to use Presto --- R/differential_expression.R | 80 +++++++++++++++++++++++++------------ R/zzz.R | 2 +- 2 files changed, 55 insertions(+), 27 deletions(-) diff --git a/R/differential_expression.R b/R/differential_expression.R index 3b94bf3b0..60e71617a 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -422,6 +422,8 @@ FindConservedMarkers <- function( #' \itemize{ #' \item{"wilcox"} : Identifies differentially expressed genes between two #' groups of cells using a Wilcoxon Rank Sum test (default) +#' \item{"wilcox_limma"} : Identifies differentially expressed genes between two +#' groups of cells using the limma implementation of the Wilcoxon Rank Sum test #' \item{"bimod"} : Likelihood-ratio test for single cell gene expression, #' (McDavid et al., Bioinformatics, 2013) #' \item{"roc"} : Identifies 'markers' of gene expression using ROC analysis. @@ -1440,7 +1442,7 @@ DEmethods_latent <- function() { # returns tests that require CheckDots DEmethods_checkdots <- function() { - c('wilcox', 'MAST', 'DESeq2') + c('wilcox', 'wilcox_limma', 'MAST', 'DESeq2') } # returns tests that do not use Bonferroni correction on the DE results @@ -2079,6 +2081,14 @@ PerformDE <- function( verbose = verbose, ... ), + 'wilcox_limma' = WilcoxDETest( + data.use = data.use, + cells.1 = cells.1, + cells.2 = cells.2, + verbose = verbose, + limma = TRUE, + ... + ), 'bimod' = DiffExpTest( data.use = data.use, cells.1 = cells.1, @@ -2432,14 +2442,18 @@ ValidateCellGroups <- function( # Differential expression using Wilcoxon Rank Sum # # Identifies differentially expressed genes between two groups of cells using -# a Wilcoxon Rank Sum test. Makes use of limma::rankSumTestWithCorrelation for a +# a Wilcoxon Rank Sum test. Makes use of presto::wilcoxauc for a more efficient +# implementation of the wilcoxon test. If presto is not installed, or if limma +# is requested, makes use of limma::rankSumTestWithCorrelation for a # more efficient implementation of the wilcoxon test. Thanks to Yunshun Chen and -# Gordon Smyth for suggesting the limma implementation. +# Gordon Smyth for suggesting the limma implementation. If limma is also not installed, +# uses wilcox.test. # # @param data.use Data matrix to test # @param cells.1 Group 1 cells # @param cells.2 Group 2 cells # @param verbose Print a progress bar +# @param limma If limma should be used for testing; default is FALSE # @param ... Extra parameters passed to wilcox.test # # @return Returns a p-value ranked matrix of putative differentially expressed @@ -2463,6 +2477,7 @@ WilcoxDETest <- function( cells.1, cells.2, verbose = TRUE, + limma = FALSE, ... ) { data.use <- data.use[, c(cells.1, cells.2), drop = FALSE] @@ -2483,40 +2498,53 @@ WilcoxDETest <- function( group.info[cells.1, "group"] <- "Group1" group.info[cells.2, "group"] <- "Group2" group.info[, "group"] <- factor(x = group.info[, "group"]) - if (FALSE) { - data.use <- data.use[, names(x = group.info), drop = FALSE] - res <- presto::wilcoxauc(X = data.use, y = group.info) + if (presto.check[1] && overflow.check && (!limma)) { + data.use <- data.use[, rownames(group.info), drop = FALSE] + res <- presto::wilcoxauc(X = data.use, y = group.info[, "group"]) res <- res[1:(nrow(x = res)/2),] p_val <- res$pval - } else if (limma.check[1] && overflow.check) { - p_val <- my.sapply( - X = 1:nrow(x = data.use), - FUN = function(x) { - return(min(2 * min(limma::rankSumTestWithCorrelation(index = j, statistics = data.use[x, ])), 1)) - } - ) - } else { - if (getOption('Seurat.limma.wilcox.msg', TRUE) && overflow.check) { + } else if (overflow.check) { + if (getOption('Seurat.presto.wilcox.msg', TRUE) && (!limma)) { # if you didnt request limma, output message message( "For a more efficient implementation of the Wilcoxon Rank Sum Test,", - "\n(default method for FindMarkers) please install the limma package", + "\n(default method for FindMarkers) please install the presto package", "\n--------------------------------------------", - "\ninstall.packages('BiocManager')", - "\nBiocManager::install('limma')", + "\ninstall.packages('devtools')", + "\ndevtools::install_github('immunogenomics/presto')", "\n--------------------------------------------", - "\nAfter installation of limma, Seurat will automatically use the more ", + "\nAfter installation of presto, Seurat will automatically use the more ", "\nefficient implementation (no further action necessary).", "\nThis message will be shown once per session" ) - options(Seurat.limma.wilcox.msg = FALSE) + options(Seurat.presto.wilcox.msg = FALSE) } - data.use <- data.use[, rownames(x = group.info), drop = FALSE] - p_val <- my.sapply( - X = 1:nrow(x = data.use), - FUN = function(x) { - return(wilcox.test(data.use[x, ] ~ group.info[, "group"], ...)$p.value) + if (limma.check[1]) { + p_val <- my.sapply( + X = 1:nrow(x = data.use), + FUN = function(x) { + return(min(2 * min(limma::rankSumTestWithCorrelation(index = j, statistics = data.use[x, ])), 1)) + } + ) + } else { + if (limma) { + stop( + "To use the limma implementation of the Wilcoxon Rank Sum Test, + please install the limma package: + -------------------------------------------- + install.packages('BiocManager') + BiocManager::install('limma') + --------------------------------------------" + ) + } else { + data.use <- data.use[, rownames(x = group.info), drop = FALSE] + p_val <- my.sapply( + X = 1:nrow(x = data.use), + FUN = function(x) { + return(wilcox.test(data.use[x, ] ~ group.info[, "group"], ...)$p.value) + } + ) } - ) + } } return(data.frame(p_val, row.names = rownames(x = data.use))) } diff --git a/R/zzz.R b/R/zzz.R index 5793b6760..c9717d5ca 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -57,7 +57,7 @@ seurat_default_options <- list( Seurat.memsafe = FALSE, Seurat.warn.umap.uwot = TRUE, Seurat.checkdots = "warn", - Seurat.limma.wilcox.msg = TRUE, + Seurat.presto.wilcox.msg = TRUE, #CHANGE Seurat.Rfast2.msg = TRUE, Seurat.warn.vlnplot.split = TRUE ) From 2d5607c6b3d9c3d0edfa4128c814ea47786fd437 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Thu, 28 Sep 2023 10:32:01 -0400 Subject: [PATCH 785/979] fixing findtransferanchors test fail with v3 assays --- R/integration.R | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/R/integration.R b/R/integration.R index 7bd64c7f9..8571dbb2a 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5196,13 +5196,21 @@ if (normalization.method == 'SCT') { nCount_UMI = nCount_UMI) } else { query <- query[features,] - reference.data.list <- c() - for (i in Layers(object = reference[[reference.assay]], - layer = "data")) { - reference.data.list[[i]] <- LayerData(object = reference[[reference.assay]], - layer = i)[features, ] + if(inherits(x = reference[[reference.assay]], what = "Assay5")){ + reference.data.list <- c() + for (i in Layers(object = reference[[reference.assay]], layer = "data")) { + reference.data.list[[i]] <- LayerData( + object = reference[[reference.assay]], + layer = i + )[features, ] + } + reference.data <- do.call(cbind, reference.data.list) + } else { + reference.data <- GetAssayData( + object = reference, + assay = reference.assay, + slot = "data")[features, ] } - reference.data <- do.call(cbind, reference.data.list) if (is.null(x = feature.mean)) { if (inherits(x = reference.data, what = 'dgCMatrix')) { feature.mean <- RowMeanSparse(mat = reference.data) From 9216625b31ffe66e98a2161d6b67037a46073eca Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Thu, 28 Sep 2023 10:36:33 -0400 Subject: [PATCH 786/979] Add citatation for Seuratv5 --- inst/CITATION | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/inst/CITATION b/inst/CITATION index bcc1f5593..76d447bbd 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -1,5 +1,27 @@ citHeader("To cite Seurat in publications, please use:") +bibentry(bibtype = "article", + author = c( + as.person("Yuhan Hao"), + as.person("Tim Stuart"), + as.person("Madeline H Kowalski"), + as.person("Saket Choudhary"), + as.person("Paul Hoffman"), + as.person("Austin Hartman"), + as.person("Avi Srivastava"), + as.person("Gesmira Molla"), + as.person("Shaista Madad"), + as.person("Carlos Fernandez-Granda"), + as.person("Rahul Satija") + ), + title = "Dictionary learning for integrative, multimodal and scalable single-cell analysis", + journal = "Nature Biotechnology", + year = "2023", + doi = "10.1038/s41587-023-01767-y", + url = "https://doi.org/10.1038/s41587-023-01767-y", + textVersion = "Hao et al. Dictionary learning for integrative, multimodal and scalable single-cell analysis. Nature Biotechnology (2023) [Seurat V5]" +) + bibentry(bibtype = "article", author = c( as.person("Yuhan Hao"), From 7eea7fa371262ba34b08f9a8c2d0372e5fdeef61 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Thu, 28 Sep 2023 10:43:58 -0400 Subject: [PATCH 787/979] Update news; bump version --- DESCRIPTION | 4 ++-- NEWS.md | 7 +++++++ man/reexports.Rd | 1 + 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2c8993315..537cff96a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9065 -Date: 2023-09-27 +Version: 4.9.9.9066 +Date: 2023-09-28 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( diff --git a/NEWS.md b/NEWS.md index 93bd03abd..e555a7370 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,10 @@ +# Unreleased + +## Added + +## Changes + + # Seurat 4.4.0 (2023-09-27) ## Added diff --git a/man/reexports.Rd b/man/reexports.Rd index b3d725fcb..08ced67a3 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -72,6 +72,7 @@ below to see their documentation. \describe{ \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}} + \item{generics}{\code{\link[generics]{components}}} }} From 02cf8d30347457d87ca3f53fdf7296a761104fa0 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Thu, 28 Sep 2023 14:25:12 -0400 Subject: [PATCH 788/979] vlnplot to work for counts layer if data doesnt exist --- R/visualization.R | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/R/visualization.R b/R/visualization.R index 677a17e1c..a7c62eeb6 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -621,7 +621,7 @@ VlnPlot <- function( log = FALSE, ncol = NULL, slot = deprecated(), - layer = 'data', + layer = NULL, split.plot = FALSE, stack = FALSE, combine = TRUE, @@ -638,6 +638,26 @@ VlnPlot <- function( ) layer <- slot %||% layer } + layer.set <- Layers( + object = object, + search = layer %||% 'data' + ) + if (is.null(layer) && length(layer.set) == 1 && layer.set == 'scale.data'){ + warning('Default search for "data" layer yielded no results; utilizing "scale.data" layer instead.') + } + if (is.null(layer.set) & is.null(layer) ) { + warning('Default search for "data" layer yielded no results; utilizing "counts" layer instead.', + call. = FALSE, immediate. = TRUE) + layer.set <- Layers( + object = object, + search = 'counts' + ) + } + if (is.null(layer.set)) { + stop('layer "', layer,'" is not found in the object') + } else { + layer <- layer.set + } if ( !is.null(x = split.by) & getOption(x = 'Seurat.warn.vlnplot.split', default = TRUE) @@ -6770,7 +6790,8 @@ ExIPlot <- function( if (length(x = obj) == 1) { if (inherits(x = object[[obj]], what = 'DimReduc')) { plots[[i]] <- plots[[i]] + label.fxn(label = 'Embeddings Value') - } else if (inherits(x = object[[obj]], what = 'Assay')) { + } else if (inherits(x = object[[obj]], what = 'Assay') || + inherits(x = object[[obj]], what = 'Assay5')) { next } else { warning("Unknown object type ", class(x = object), immediate. = TRUE, call. = FALSE) From 408b544651fef86638405abf2b9af4b1443a66ee Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Thu, 28 Sep 2023 14:45:48 -0400 Subject: [PATCH 789/979] FindVariableFeatures fix --- R/preprocessing5.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 5915d6773..8ad85c7c5 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -549,7 +549,7 @@ VST.IterableMatrix <- function( matrix = data, row_stats = 'variance')$row_stats # Calculate feature means - hvf.info$mean <- hvf.stats['mean' ] + hvf.info$mean <- hvf.stats['mean', ] # Calculate feature variance hvf.info$variance <- hvf.stats['variance', ] hvf.info$variance.expected <- 0L From 623d3b36efa0f423338408ff24b5208557e743b9 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Thu, 28 Sep 2023 15:02:31 -0400 Subject: [PATCH 790/979] more specific error message --- R/visualization.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/visualization.R b/R/visualization.R index a7c62eeb6..4969f06d4 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -654,7 +654,8 @@ VlnPlot <- function( ) } if (is.null(layer.set)) { - stop('layer "', layer,'" is not found in the object') + assay.name <- DefaultAssay(object) + stop('layer "', layer,'" is not found in assay: "', assay.name, '"') } else { layer <- layer.set } From b28f8f8e2404a2102c9e2c14aec741ab05b6301c Mon Sep 17 00:00:00 2001 From: Gesmira Date: Thu, 28 Sep 2023 15:22:29 -0400 Subject: [PATCH 791/979] more specific warnings --- R/visualization.R | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/R/visualization.R b/R/visualization.R index 4969f06d4..ec4e17677 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -638,15 +638,18 @@ VlnPlot <- function( ) layer <- slot %||% layer } - layer.set <- Layers( - object = object, - search = layer %||% 'data' + layer.set <- suppressWarnings( + Layers( + object = object, + search = layer %||% 'data' + ) ) if (is.null(layer) && length(layer.set) == 1 && layer.set == 'scale.data'){ warning('Default search for "data" layer yielded no results; utilizing "scale.data" layer instead.') } + assay.name <- DefaultAssay(object) if (is.null(layer.set) & is.null(layer) ) { - warning('Default search for "data" layer yielded no results; utilizing "counts" layer instead.', + warning('Default search for "data" layer in "', assay.name, '" assay yielded no results; utilizing "counts" layer instead.', call. = FALSE, immediate. = TRUE) layer.set <- Layers( object = object, @@ -654,7 +657,6 @@ VlnPlot <- function( ) } if (is.null(layer.set)) { - assay.name <- DefaultAssay(object) stop('layer "', layer,'" is not found in assay: "', assay.name, '"') } else { layer <- layer.set From b61e8889921f9645ca60af2fd015c0f38803d6b2 Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Thu, 28 Sep 2023 16:33:47 -0400 Subject: [PATCH 792/979] update warning messages --- R/preprocessing5.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 8ad85c7c5..fea7c026b 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -333,7 +333,7 @@ NormalizeData.default <- function( } if (!inherits(x = object, what = 'dgCMatrix') && !inherits(x = object, what = 'matrix')) { - stop('CLR normalization only supports for dense and dgCMatrix') + stop('CLR normalization is only supported for dense and dgCMatrix') } CustomNormalize( data = object, @@ -347,7 +347,7 @@ NormalizeData.default <- function( 'RC' = { if (!inherits(x = object, what = 'dgCMatrix') && !inherits(x = object, what = 'matrix')) { - stop('RC normalization only supports for dense and dgCMatrix') + stop('RC normalization is only supported for dense and dgCMatrix') } RelativeCounts(data = object, scale.factor = scale.factor, From e24536ad64b55f7f8b152f9bf5f79658d490fb98 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Thu, 28 Sep 2023 16:51:53 -0400 Subject: [PATCH 793/979] Layers search --- R/integration.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/integration.R b/R/integration.R index 8571dbb2a..08075be3b 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5198,7 +5198,7 @@ if (normalization.method == 'SCT') { query <- query[features,] if(inherits(x = reference[[reference.assay]], what = "Assay5")){ reference.data.list <- c() - for (i in Layers(object = reference[[reference.assay]], layer = "data")) { + for (i in Layers(object = reference[[reference.assay]], search = "data")) { reference.data.list[[i]] <- LayerData( object = reference[[reference.assay]], layer = i @@ -5298,7 +5298,7 @@ ProjectCellEmbeddings.IterableMatrix <- function( query <- query[features,] reference.data.list <- c() for (i in Layers(object = reference[[reference.assay]], - layer = "data")) { + search = "data")) { reference.data.list[[i]] <- LayerData(object = reference[[reference.assay]], layer = i)[features, ] } From 1a92bd34b19602631ba6040e1e238b0cd5e33cc9 Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Thu, 28 Sep 2023 17:06:01 -0400 Subject: [PATCH 794/979] add tests for BPCells with NormalizeData --- tests/testthat/test_preprocessing.R | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tests/testthat/test_preprocessing.R b/tests/testthat/test_preprocessing.R index 0f46d9e6a..860f7b7a2 100644 --- a/tests/testthat/test_preprocessing.R +++ b/tests/testthat/test_preprocessing.R @@ -131,6 +131,35 @@ if(class(object[['RNA']]) == "Assay5") { }) } +# Tests for BPCells NormalizeData +# -------------------------------------------------------------------------------- +#make Iterable matrix +mat_bpcells <- object[['RNA']]$counts %>% + t() %>% + as("IterableMatrix") %>% + t() +object[['RNAbp']] <- CreateAssay5Object(counts = mat_bpcells) + +object <- NormalizeData(object = object, verbose = FALSE, scale.factor = 1e6, assay = "RNAbp") +object <- NormalizeData(object = object, verbose = FALSE, scale.factor = 1e6, assay = "RNA") + +test_that("NormalizeData scales properly for BPcells", { + expect_equal(as.matrix(object[['RNAbp']]$data), as.matrix(object[['RNA']]$data), tolerance = 1e-6) + expect_equal(Command(object = object, command = "NormalizeData.RNAbp", value = "scale.factor"), 1e6) + expect_equal(Command(object = object, command = "NormalizeData.RNAbp", value = "normalization.method"), "LogNormalize") +}) + +normalized.data.bp <- LogNormalize(data = GetAssayData(object = object[["RNAbp"]], layer = "counts"), verbose = FALSE) +normalized.data <- LogNormalize(data = GetAssayData(object = object[["RNA"]], layer = "counts"), verbose = FALSE) + +test_that("LogNormalize normalizes properly for BPCells", { + expect_equal( + as.matrix(normalized.data.bp), + as.matrix(normalized.data), + tolerance = 1e-6 + ) +}) + # Tests for ScaleData # -------------------------------------------------------------------------------- context("ScaleData") From bdd31755018b5c1d4e1cb26fa36ab457a403b4f3 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Thu, 28 Sep 2023 17:53:04 -0400 Subject: [PATCH 795/979] Fix DotPlot --- R/visualization.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/visualization.R b/R/visualization.R index ec4e17677..1d5d31da5 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -4387,8 +4387,7 @@ DotPlot <- function( features <- unlist(x = features) names(x = feature.groups) <- features } - cells <- unlist(x = CellsByIdentities(object = object, idents = idents)) - + cells <- unlist(x = CellsByIdentities(object = object, cells = colnames(object[[assay]]), idents = idents)) data.features <- FetchData(object = object, vars = features, cells = cells) data.features$id <- if (is.null(x = group.by)) { Idents(object = object)[cells, drop = TRUE] From 1833db7c8a566718b047e21220a519eb938acf70 Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Thu, 28 Sep 2023 18:05:28 -0400 Subject: [PATCH 796/979] add SCT BPCells test --- tests/testthat/test_preprocessing.R | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/testthat/test_preprocessing.R b/tests/testthat/test_preprocessing.R index 860f7b7a2..8bfe21629 100644 --- a/tests/testthat/test_preprocessing.R +++ b/tests/testthat/test_preprocessing.R @@ -329,6 +329,9 @@ test_that("vst selection option returns expected values", { expect_true(!is.unsorted(rev(object[["RNA"]]["vst.variance.standardized", drop = TRUE][VariableFeatures(object = object)]))) }) +#object <- FindVariableFeatures(object, assay = "RNAbp") +#this breaks currently + # Tests for internal functions # ------------------------------------------------------------------------------ norm.fxn <- function(x) {x / mean(x)} @@ -352,6 +355,8 @@ test_that("CustomNormalize works as expected", { expect_error(CustomNormalize(data = pbmc.test, custom_function = norm.fxn, margin = 10)) }) +# Tests for SCTransform +# -------------------------------------------------------------------------------- context("SCTransform") object <- suppressWarnings(SCTransform(object = object, verbose = FALSE, vst.flavor = "v1", seed.use = 1448145)) @@ -440,3 +445,11 @@ test_that("SCTransform v2 works as expected", { expect_equal(fa["MS4A1", "residual_variance"], 3.023062, tolerance = 1e-6) expect_equal(fa["FCER2", "theta"], Inf) }) + +object <- suppressWarnings(SCTransform(object = object, assay = "RNAbp", new.assay.name = "SCTbp", + verbose = FALSE, vst.flavor = "v2", seed.use = 1448145)) +test_that("SCTransform is equivalent for BPcells ", { + expect_equal(as.matrix(LayerData(object = object[["SCT"]], layer = "data")), + as.matrix(LayerData(object = object[["SCTbp"]], layer = "data")), + tolerance = 1e-6) +}) From d016b2ecf17263700c839dbfa41f59a6f7c42567 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Thu, 28 Sep 2023 21:48:45 -0400 Subject: [PATCH 797/979] Use corrected counts by default in SCTransform.StdAssay --- R/preprocessing5.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 8ad85c7c5..7a2b4ba4e 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1231,7 +1231,7 @@ SCTransform.StdAssay <- function( # Step 1: Learn model vst.out <- sct.function(object = layer.data, - do.correct.umi = FALSE, + do.correct.umi = TRUE, cell.attr = cell.attr.layer, reference.SCT.model = reference.SCT.model, ncells = ncells, @@ -1246,7 +1246,7 @@ SCTransform.StdAssay <- function( conserve.memory = conserve.memory, return.only.var.genes = return.only.var.genes, seed.use = seed.use, - verbose = FALSE) + verbose = verbose) min_var <- vst.out$arguments$min_variance assay.out <- CreateSCTAssay(vst.out = vst.out, do.correct.umi = do.correct.umi, residual.type = residual.type, clip.range = clip.range) From 42359e198dfeff6da8e7fdc295c028a7420ade31 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Thu, 28 Sep 2023 21:49:59 -0400 Subject: [PATCH 798/979] bump version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 537cff96a..ae56c90fd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 4.9.9.9066 +Version: 4.9.9.9067 Date: 2023-09-28 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. From d26a2acda01de934a25906c4c1c65f7749b4ea31 Mon Sep 17 00:00:00 2001 From: zskylarli Date: Fri, 29 Sep 2023 11:11:02 -0400 Subject: [PATCH 799/979] integration fixes --- R/integration.R | 87 ++++++++++++++++++++++++--------------------- src/RcppExports.cpp | 2 +- 2 files changed, 47 insertions(+), 42 deletions(-) diff --git a/R/integration.R b/R/integration.R index 5180c0688..421912ca3 100644 --- a/R/integration.R +++ b/R/integration.R @@ -3831,49 +3831,53 @@ FilterAnchors <- function( eps = 0, verbose = TRUE ) { - if (verbose) { - message("Filtering anchors") - } - assay <- assay %||% DefaultAssay(object = object) - features <- features %||% VariableFeatures(object = object) - if (length(x = features) == 0) { - stop("No features provided and no VariableFeatures computed.") - } - features <- unique(x = features) - neighbors <- GetIntegrationData(object = object, integration.name = integration.name, slot = 'neighbors') - nn.cells1 <- neighbors$cells1 - nn.cells2 <- neighbors$cells2 - if (min(length(x = nn.cells1), length(x = nn.cells2)) < k.filter) { - warning("Number of anchor cells is less than k.filter. Retaining all anchors.") - k.filter <- min(length(x = nn.cells1), length(x = nn.cells2)) - anchors <- GetIntegrationData(object = object, integration.name = integration.name, slot = "anchors") + if (inherits(x = object[[assay[1]]], what = 'Assay5')) { + message("Anchor filtering is currently not supported with v5 assays.") } else { - cn.data1 <- L2Norm( - mat = as.matrix(x = t(x = GetAssayData( - object = object[[assay[1]]], - slot = slot)[features, nn.cells1])), - MARGIN = 1) - cn.data2 <- L2Norm( - mat = as.matrix(x = t(x = GetAssayData( - object = object[[assay[2]]], - slot = slot)[features, nn.cells2])), - MARGIN = 1) - nn <- NNHelper( - data = cn.data2[nn.cells2, ], - query = cn.data1[nn.cells1, ], - k = k.filter, - method = nn.method, - n.trees = n.trees, - eps = eps - ) + if (verbose) { + message("Filtering anchors") + } + assay <- assay %||% DefaultAssay(object = object) + features <- features %||% VariableFeatures(object = object) + if (length(x = features) == 0) { + stop("No features provided and no VariableFeatures computed.") + } + features <- unique(x = features) + neighbors <- GetIntegrationData(object = object, integration.name = integration.name, slot = 'neighbors') + nn.cells1 <- neighbors$cells1 + nn.cells2 <- neighbors$cells2 + if (min(length(x = nn.cells1), length(x = nn.cells2)) < k.filter) { + warning("Number of anchor cells is less than k.filter. Retaining all anchors.") + k.filter <- min(length(x = nn.cells1), length(x = nn.cells2)) + anchors <- GetIntegrationData(object = object, integration.name = integration.name, slot = "anchors") + } else { + cn.data1 <- L2Norm( + mat = as.matrix(x = t(x = GetAssayData( + object = object[[assay[1]]], + slot = slot)[features, nn.cells1])), + MARGIN = 1) + cn.data2 <- L2Norm( + mat = as.matrix(x = t(x = GetAssayData( + object = object[[assay[2]]], + slot = slot)[features, nn.cells2])), + MARGIN = 1) + nn <- NNHelper( + data = cn.data2[nn.cells2, ], + query = cn.data1[nn.cells1, ], + k = k.filter, + method = nn.method, + n.trees = n.trees, + eps = eps + ) - anchors <- GetIntegrationData(object = object, integration.name = integration.name, slot = "anchors") - position <- sapply(X = 1:nrow(x = anchors), FUN = function(x) { - which(x = anchors[x, "cell2"] == Indices(object = nn)[anchors[x, "cell1"], ])[1] - }) - anchors <- anchors[!is.na(x = position), ] - if (verbose) { - message("\tRetained ", nrow(x = anchors), " anchors") + anchors <- GetIntegrationData(object = object, integration.name = integration.name, slot = "anchors") + position <- sapply(X = 1:nrow(x = anchors), FUN = function(x) { + which(x = anchors[x, "cell2"] == Indices(object = nn)[anchors[x, "cell1"], ])[1] + }) + anchors <- anchors[!is.na(x = position), ] + if (verbose) { + message("\tRetained ", nrow(x = anchors), " anchors") + } } } object <- SetIntegrationData( @@ -4189,6 +4193,7 @@ FindIntegrationMatrix <- function( neighbors <- GetIntegrationData(object = object, integration.name = integration.name, slot = 'neighbors') nn.cells1 <- neighbors$cells1 nn.cells2 <- neighbors$cells2 + object <- JoinLayers(object) anchors <- GetIntegrationData( object = object, integration.name = integration.name, diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 540e5c2d8..7a3302c6b 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -402,7 +402,7 @@ BEGIN_RCPP END_RCPP } -RcppExport SEXP isnull(void *); +RcppExport SEXP isnull(SEXP); static const R_CallMethodDef CallEntries[] = { {"_Seurat_RunModularityClusteringCpp", (DL_FUNC) &_Seurat_RunModularityClusteringCpp, 9}, From 63fa9f78eefcd8f6244806dfb3ebdca83d4c9c40 Mon Sep 17 00:00:00 2001 From: zskylarli Date: Fri, 29 Sep 2023 12:06:22 -0400 Subject: [PATCH 800/979] fixed assay5 anchor issue --- R/integration.R | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/R/integration.R b/R/integration.R index b8b4874be..361141741 100644 --- a/R/integration.R +++ b/R/integration.R @@ -3865,10 +3865,12 @@ FilterAnchors <- function( eps = 0, verbose = TRUE ) { + anchors <- GetIntegrationData(object = object, integration.name = integration.name, slot = "anchors") if (inherits(x = object[[assay[1]]], what = 'Assay5')) { message("Anchor filtering is currently not supported with v5 assays.") - } else { - if (verbose) { + } + else { + if (verbose) { message("Filtering anchors") } assay <- assay %||% DefaultAssay(object = object) @@ -3903,7 +3905,6 @@ FilterAnchors <- function( n.trees = n.trees, eps = eps ) - anchors <- GetIntegrationData(object = object, integration.name = integration.name, slot = "anchors") position <- sapply(X = 1:nrow(x = anchors), FUN = function(x) { which(x = anchors[x, "cell2"] == Indices(object = nn)[anchors[x, "cell1"], ])[1] @@ -4227,7 +4228,9 @@ FindIntegrationMatrix <- function( neighbors <- GetIntegrationData(object = object, integration.name = integration.name, slot = 'neighbors') nn.cells1 <- neighbors$cells1 nn.cells2 <- neighbors$cells2 - object <- JoinLayers(object) + if (inherits(x = object[[assay[1]]], what = 'Assay5')) { + object <- JoinLayers(object) + } anchors <- GetIntegrationData( object = object, integration.name = integration.name, From a0a8f986b7f5d2edf2a8fa3b7ba2159197a0c2b3 Mon Sep 17 00:00:00 2001 From: zskylarli Date: Fri, 29 Sep 2023 12:21:52 -0400 Subject: [PATCH 801/979] fix accidental push --- R/integration.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/integration.R b/R/integration.R index b8b4874be..5a449ad8a 100644 --- a/R/integration.R +++ b/R/integration.R @@ -4227,7 +4227,6 @@ FindIntegrationMatrix <- function( neighbors <- GetIntegrationData(object = object, integration.name = integration.name, slot = 'neighbors') nn.cells1 <- neighbors$cells1 nn.cells2 <- neighbors$cells2 - object <- JoinLayers(object) anchors <- GetIntegrationData( object = object, integration.name = integration.name, From 29bc03f5287f993c238c781563d8cf8ba76d7ea7 Mon Sep 17 00:00:00 2001 From: zskylarli Date: Fri, 29 Sep 2023 12:27:52 -0400 Subject: [PATCH 802/979] fix accidental changes --- R/integration.R | 89 ++++++++++++++++++++++++------------------------- 1 file changed, 44 insertions(+), 45 deletions(-) diff --git a/R/integration.R b/R/integration.R index 5a449ad8a..31021599a 100644 --- a/R/integration.R +++ b/R/integration.R @@ -3865,53 +3865,49 @@ FilterAnchors <- function( eps = 0, verbose = TRUE ) { - if (inherits(x = object[[assay[1]]], what = 'Assay5')) { - message("Anchor filtering is currently not supported with v5 assays.") - } else { if (verbose) { - message("Filtering anchors") - } - assay <- assay %||% DefaultAssay(object = object) - features <- features %||% VariableFeatures(object = object) - if (length(x = features) == 0) { - stop("No features provided and no VariableFeatures computed.") - } - features <- unique(x = features) - neighbors <- GetIntegrationData(object = object, integration.name = integration.name, slot = 'neighbors') - nn.cells1 <- neighbors$cells1 - nn.cells2 <- neighbors$cells2 - if (min(length(x = nn.cells1), length(x = nn.cells2)) < k.filter) { - warning("Number of anchor cells is less than k.filter. Retaining all anchors.") - k.filter <- min(length(x = nn.cells1), length(x = nn.cells2)) - anchors <- GetIntegrationData(object = object, integration.name = integration.name, slot = "anchors") - } else { - cn.data1 <- L2Norm( - mat = as.matrix(x = t(x = GetAssayData( - object = object[[assay[1]]], - slot = slot)[features, nn.cells1])), - MARGIN = 1) - cn.data2 <- L2Norm( - mat = as.matrix(x = t(x = GetAssayData( - object = object[[assay[2]]], - slot = slot)[features, nn.cells2])), - MARGIN = 1) - nn <- NNHelper( - data = cn.data2[nn.cells2, ], - query = cn.data1[nn.cells1, ], - k = k.filter, - method = nn.method, - n.trees = n.trees, - eps = eps - ) + message("Filtering anchors") + } + assay <- assay %||% DefaultAssay(object = object) + features <- features %||% VariableFeatures(object = object) + if (length(x = features) == 0) { + stop("No features provided and no VariableFeatures computed.") + } + features <- unique(x = features) + neighbors <- GetIntegrationData(object = object, integration.name = integration.name, slot = 'neighbors') + nn.cells1 <- neighbors$cells1 + nn.cells2 <- neighbors$cells2 + if (min(length(x = nn.cells1), length(x = nn.cells2)) < k.filter) { + warning("Number of anchor cells is less than k.filter. Retaining all anchors.") + k.filter <- min(length(x = nn.cells1), length(x = nn.cells2)) + anchors <- GetIntegrationData(object = object, integration.name = integration.name, slot = "anchors") + } else { + cn.data1 <- L2Norm( + mat = as.matrix(x = t(x = GetAssayData( + object = object[[assay[1]]], + slot = slot)[features, nn.cells1])), + MARGIN = 1) + cn.data2 <- L2Norm( + mat = as.matrix(x = t(x = GetAssayData( + object = object[[assay[2]]], + slot = slot)[features, nn.cells2])), + MARGIN = 1) + nn <- NNHelper( + data = cn.data2[nn.cells2, ], + query = cn.data1[nn.cells1, ], + k = k.filter, + method = nn.method, + n.trees = n.trees, + eps = eps + ) - anchors <- GetIntegrationData(object = object, integration.name = integration.name, slot = "anchors") - position <- sapply(X = 1:nrow(x = anchors), FUN = function(x) { - which(x = anchors[x, "cell2"] == Indices(object = nn)[anchors[x, "cell1"], ])[1] - }) - anchors <- anchors[!is.na(x = position), ] - if (verbose) { - message("\tRetained ", nrow(x = anchors), " anchors") - } + anchors <- GetIntegrationData(object = object, integration.name = integration.name, slot = "anchors") + position <- sapply(X = 1:nrow(x = anchors), FUN = function(x) { + which(x = anchors[x, "cell2"] == Indices(object = nn)[anchors[x, "cell1"], ])[1] + }) + anchors <- anchors[!is.na(x = position), ] + if (verbose) { + message("\tRetained ", nrow(x = anchors), " anchors") } } object <- SetIntegrationData( @@ -4227,6 +4223,9 @@ FindIntegrationMatrix <- function( neighbors <- GetIntegrationData(object = object, integration.name = integration.name, slot = 'neighbors') nn.cells1 <- neighbors$cells1 nn.cells2 <- neighbors$cells2 + if (inherits(x = object[[assay[1]]], what = 'Assay5')) { + object <- JoinLayers(object) + } anchors <- GetIntegrationData( object = object, integration.name = integration.name, From 3b465de46de4d490dc333b12a32d784d6610cae2 Mon Sep 17 00:00:00 2001 From: zskylarli Date: Fri, 29 Sep 2023 12:29:09 -0400 Subject: [PATCH 803/979] fix more accidental changes --- R/integration.R | 3 --- 1 file changed, 3 deletions(-) diff --git a/R/integration.R b/R/integration.R index 31021599a..83e3ebc91 100644 --- a/R/integration.R +++ b/R/integration.R @@ -4223,9 +4223,6 @@ FindIntegrationMatrix <- function( neighbors <- GetIntegrationData(object = object, integration.name = integration.name, slot = 'neighbors') nn.cells1 <- neighbors$cells1 nn.cells2 <- neighbors$cells2 - if (inherits(x = object[[assay[1]]], what = 'Assay5')) { - object <- JoinLayers(object) - } anchors <- GetIntegrationData( object = object, integration.name = integration.name, From d07f7ac47a2185ae72da3c083c8d32c75e40eba0 Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Fri, 29 Sep 2023 14:23:43 -0400 Subject: [PATCH 804/979] add warning message to RunPCA if data is not scaled for v5 assay --- R/dimensional_reduction.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R index bab2f49cd..0ba984f1a 100644 --- a/R/dimensional_reduction.R +++ b/R/dimensional_reduction.R @@ -2418,6 +2418,9 @@ PrepDR <- function( PrepDR5 <- function(object, features = NULL, layer = 'scale.data', verbose = TRUE) { layer <- layer[1L] + if (!(layer %in% Layers(object = object))) { + abort(paste0("Layer '", layer, "' not found. Please run ScaleData and retry")) + } layer <- match.arg(arg = layer, choices = Layers(object = object)) features <- features %||% VariableFeatures(object = object) if (!length(x = features)) { From 98968bc37b134348ce41a23e534fdce323585aad Mon Sep 17 00:00:00 2001 From: Gesmira Date: Sat, 30 Sep 2023 14:18:27 -0400 Subject: [PATCH 805/979] DoHeatmap pull cells from DefaultAssay() --- R/visualization.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/visualization.R b/R/visualization.R index 1d5d31da5..51c4d6f68 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -251,12 +251,12 @@ DoHeatmap <- function( group.bar.height = 0.02, combine = TRUE ) { - cells <- cells %||% colnames(x = object) + assay <- assay %||% DefaultAssay(object = object) + DefaultAssay(object = object) <- assay + cells <- cells %||% colnames(x = object[[assay]]) if (is.numeric(x = cells)) { cells <- colnames(x = object)[cells] } - assay <- assay %||% DefaultAssay(object = object) - DefaultAssay(object = object) <- assay features <- features %||% VariableFeatures(object = object) features <- rev(x = unique(x = features)) disp.max <- disp.max %||% ifelse( From 6fdd69f8a9cd90fb7a1bb68f89a663b833a6863f Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Mon, 2 Oct 2023 10:03:00 -0400 Subject: [PATCH 806/979] add BPCells tests --- tests/testthat/test_dimensional_reduction.R | 67 ++++++++++++--------- tests/testthat/test_preprocessing.R | 5 +- 2 files changed, 38 insertions(+), 34 deletions(-) diff --git a/tests/testthat/test_dimensional_reduction.R b/tests/testthat/test_dimensional_reduction.R index ab87e5a36..fbad42aa9 100644 --- a/tests/testthat/test_dimensional_reduction.R +++ b/tests/testthat/test_dimensional_reduction.R @@ -1,16 +1,17 @@ context("test-dimensional_reduction") -test_that("different ways of passing distance matrix", { - # Generate dummy data exp matrix - set.seed(1) - dummyexpMat <- matrix(data = sample(x = c(1:50), size = 1e4, replace = TRUE), - ncol = 100, nrow = 100) - colnames(dummyexpMat) <- paste0("cell", seq(ncol(dummyexpMat))) - row.names(dummyexpMat) <- paste0("gene", seq(nrow(dummyexpMat))) +set.seed(seed = 1) +dummyexpMat <- matrix( + data = sample(x = c(1:50), size = 1e4, replace = TRUE), + ncol = 100, nrow = 100 +) +colnames(x = dummyexpMat) <- paste0("cell", seq(ncol(x = dummyexpMat))) +row.names(x = dummyexpMat) <- paste0("gene", seq(nrow(x = dummyexpMat))) - # Create Seurat object for testing - obj <- CreateSeuratObject(counts = dummyexpMat) +# Create Seurat object for testing +obj <- CreateSeuratObject(counts = as.sparse(dummyexpMat)) +test_that("different ways of passing distance matrix", { # Manually make a distance object to test distMat <- dist(t(dummyexpMat)) @@ -28,30 +29,19 @@ test_that("different ways of passing distance matrix", { ) }) -test_that("pca returns total variance (see #982)", { - # Generate dummy data exp matrix - set.seed(seed = 1) - dummyexpMat <- matrix( - data = sample(x = c(1:50), size = 1e4, replace = TRUE), - ncol = 100, nrow = 100 - ) - colnames(x = dummyexpMat) <- paste0("cell", seq(ncol(x = dummyexpMat))) - row.names(x = dummyexpMat) <- paste0("gene", seq(nrow(x = dummyexpMat))) - - # Create Seurat object for testing - obj <- CreateSeuratObject(counts = as.sparse(dummyexpMat)) +# Normalize, scale, and compute PCA, using RunPCA +obj <- NormalizeData(object = obj, verbose = FALSE) +obj <- ScaleData(object = obj, verbose = FALSE) - # Normalize, scale, and compute PCA, using RunPCA - # obj <- NormalizeData(object = obj, verbose = FALSE) - obj <- ScaleData(object = obj, verbose = FALSE) - pca_result <- suppressWarnings(expr = RunPCA( - object = obj, - features = rownames(x = obj), - verbose = FALSE - )) +pca_result <- suppressWarnings(expr = RunPCA( + object = obj, + features = rownames(x = obj), + verbose = FALSE +)) +test_that("pca returns total variance (see #982)", { # Using stats::prcomp - scaled_data <- Seurat::GetAssayData(object = obj, slot = "scale.data") + scaled_data <- LayerData(object = obj, layer = "scale.data") prcomp_result <- stats::prcomp(scaled_data, center = FALSE, scale. = FALSE) # Compare @@ -59,3 +49,20 @@ test_that("pca returns total variance (see #982)", { sum(prcomp_result$sdev^2)) }) + +mat_bpcells <- t(as(t(obj[['RNA']]$counts ), "IterableMatrix")) +obj[['RNAbp']] <- CreateAssay5Object(counts = mat_bpcells) +DefaultAssay(obj) <- "RNAbp" +obj <- NormalizeData(object = obj, verbose = FALSE) +obj <- ScaleData(object = obj, verbose=FALSE) +pca_result_bp <- suppressWarnings(expr = RunPCA( + object = obj, + features = rownames(obj[['RNAbp']]$counts), + assay = "RNAbp", + layer = "counts" +)) + +test_that("pca is equivalent for BPCells") { + RunPCA(obj, assay = "RNAbp") + +}) diff --git a/tests/testthat/test_preprocessing.R b/tests/testthat/test_preprocessing.R index 8bfe21629..5e3bb9ebe 100644 --- a/tests/testthat/test_preprocessing.R +++ b/tests/testthat/test_preprocessing.R @@ -134,10 +134,7 @@ if(class(object[['RNA']]) == "Assay5") { # Tests for BPCells NormalizeData # -------------------------------------------------------------------------------- #make Iterable matrix -mat_bpcells <- object[['RNA']]$counts %>% - t() %>% - as("IterableMatrix") %>% - t() +mat_bpcells <- t(as(t(object[['RNA']]$counts ), "IterableMatrix")) object[['RNAbp']] <- CreateAssay5Object(counts = mat_bpcells) object <- NormalizeData(object = object, verbose = FALSE, scale.factor = 1e6, assay = "RNAbp") From 154c3c565be992e9f8e202115ff1502422467dec Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Mon, 2 Oct 2023 10:03:32 -0400 Subject: [PATCH 807/979] add warning message in NormalizeData if layer is not found --- R/preprocessing5.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index fea7c026b..c591a203f 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -432,6 +432,9 @@ ScaleData.StdAssay <- function( ) { use.umi <- ifelse(test = model.use != 'linear', yes = TRUE, no = use.umi) olayer <- layer <- unique(x = layer) + if (!(layer %in% Layers(object = object))) { + abort(paste0("Layer '", layer, "' not found. Please run NormalizeData and retry")) + } layer <- Layers(object = object, search = layer) if (isTRUE(x = use.umi)) { layer <- "counts" From af791a941a635c7022dd9035efc24591d6543ecc Mon Sep 17 00:00:00 2001 From: Gesmira Date: Mon, 2 Oct 2023 11:57:40 -0400 Subject: [PATCH 808/979] adding documentation for some undocumented args --- R/integration5.R | 1 + R/utilities.R | 1 + R/visualization.R | 1 + 3 files changed, 3 insertions(+) diff --git a/R/integration5.R b/R/integration5.R index d90ceeafc..6b46785ba 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -20,6 +20,7 @@ NULL #' should be called \code{group} #' @param features Ignored #' @param scale.layer Ignored +#' @param new.reduction Name of new integrated dimensional reduction #' @param layers Ignored #' @param key Key for Harmony dimensional reduction #' @param npcs If doing PCA on input matrix, number of PCs to compute diff --git a/R/utilities.R b/R/utilities.R index 951afd0d7..5eef89c46 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -409,6 +409,7 @@ AggregateExpression <- function( #' @param group.by Categories for grouping (e.g, ident, replicate, celltype); 'ident' by default #' @param add.ident (Deprecated) Place an additional label on each cell prior to pseudobulking #' (very useful if you want to observe cluster pseudobulk values, separated by replicate, for example) +#' @param layer Name of the layer in assay to use #' @param slot Slot(s) to use; if multiple slots are given, assumed to follow #' the order of 'assays' (if specified) or object's assays #' @param method Method of collapsing expression values. Either 'average' or 'aggregate' diff --git a/R/visualization.R b/R/visualization.R index cd8de4748..96b43d42b 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -1942,6 +1942,7 @@ CellScatter <- function( #' @param slot Slot to pull data from, should be one of 'counts', 'data', or 'scale.data' #' @param combine Combine plots into a single \code{\link[patchwork]{patchwork}ed} #' @param plot.cor Display correlation in plot title +#' @param ncol Number of columns if plotting multiple plots #' @param raster Convert points to raster format, default is \code{NULL} #' which will automatically use raster if the number of points plotted is greater than #' 100,000 From a9295201ca5e22712159bf248ab17a4fdad1fb53 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Mon, 2 Oct 2023 14:37:56 -0400 Subject: [PATCH 809/979] more documentation and importing seuratobject functions --- NAMESPACE | 2 ++ R/preprocessing.R | 1 + R/preprocessing5.R | 7 +++++++ 3 files changed, 10 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index e9b22c7ba..d61886996 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -524,6 +524,7 @@ importFrom(SeuratObject,"Tool<-") importFrom(SeuratObject,"VariableFeatures<-") importFrom(SeuratObject,.CalcN) importFrom(SeuratObject,.FilterObjects) +importFrom(SeuratObject,.IsFutureSeurat) importFrom(SeuratObject,.MARGIN) importFrom(SeuratObject,.PropagateList) importFrom(SeuratObject,.SparseSlots) @@ -536,6 +537,7 @@ importFrom(SeuratObject,Cells) importFrom(SeuratObject,CellsByIdentities) importFrom(SeuratObject,Command) importFrom(SeuratObject,CreateAssayObject) +importFrom(SeuratObject,CreateAssay5Object) importFrom(SeuratObject,CreateCentroids) importFrom(SeuratObject,CreateDimReducObject) importFrom(SeuratObject,CreateFOV) diff --git a/R/preprocessing.R b/R/preprocessing.R index 680fee41a..37410d805 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -501,6 +501,7 @@ GetResidual <- function( #' @param to.upper Converts all feature names to upper case. Can be useful when #' analyses require comparisons between human and mouse gene names for example. #' @param ... Arguments passed to \code{\link{Read10X_h5}} +#' @param image Name of image to pull the coordinates from #' #' @return A \code{Seurat} object #' diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 7a2b4ba4e..4f2e570e0 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1879,6 +1879,13 @@ GetResidualsChunked <- function(vst_out, layer.counts, residual_type, min_varian } #' temporal function to get residuals from reference +#' @param object A seurat object +#' @param reference.SCT.model a reference SCT model that should be used +#' for calculating the residuals +#' @param features Names of features to compute +#' @param nCount_UMI UMI counts. If not specified, defaults to +#' column sums of object +#' @param verbose Whether to print messages and progress bars #' @importFrom sctransform get_residuals #' @importFrom Matrix colSums #' From 3aa01cd400c0c800d3d30451611788b0ddfb12e9 Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Mon, 2 Oct 2023 14:40:41 -0400 Subject: [PATCH 810/979] update documentation --- R/utilities.R | 65 ++++++++++++++++++++------------------ man/AggregateExpression.Rd | 28 ++++++++-------- man/AverageExpression.Rd | 39 +++++++++++++++-------- 3 files changed, 73 insertions(+), 59 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index 120933054..5b53414ca 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -323,19 +323,12 @@ AddModuleScore <- function( #' Aggregated feature expression by identity class #' -#' Returns aggregated (summed) expression values for each identity class -#' -#' If slot is set to 'data', this function assumes that the data has been log -#' normalized and therefore feature values are exponentiated prior to aggregating -#' so that sum is done in non-log space. Otherwise, if slot is set to -#' either 'counts' or 'scale.data', no exponentiation is performed prior to -#' aggregating -#' If \code{return.seurat = TRUE} and slot is not 'scale.data', aggregated values -#' are placed in the 'counts' slot of the returned object and the log of aggregated values -#' are placed in the 'data' slot. For the \code{\link{ScaleData}} is then run on the default assay +#' Returns summed counts ("pseudobulk") for each identity class. +#' +#' If \code{return.seurat = TRUE}, aggregated values are placed in the 'counts' +#' slot of the returned object. The data is then normalized by running \code{\link{NormalizeData}} +#' on the aggregated counts. \code{\link{ScaleData}} is then run on the default assay #' before returning the object. -#' If \code{return.seurat = TRUE} and slot is 'scale.data', the 'counts' slot is left empty, -#' the 'data' slot is filled with NA, and 'scale.data' is set to the aggregated values. #' #' @param object Seurat object #' @param assays Which assays to use. Default is all assays @@ -344,8 +337,12 @@ AddModuleScore <- function( #' @param group.by Categories for grouping (e.g, ident, replicate, celltype); 'ident' by default #' @param add.ident (Deprecated) Place an additional label on each cell prior to pseudobulking #' (very useful if you want to observe cluster pseudobulk values, separated by replicate, for example) +#' @param normalization.method Method for normalization, see \code{\link{NormalizeData}} +#' @param scale.factor Scale factor for normalization, see \code{\link{NormalizeData}} +#' @param margin Margin to perform CLR normalization, see \code{\link{NormalizeData}} #' @param verbose Print messages and show progress bar -#' @param ... Arguments to be passed to methods such as \code{\link{CreateSeuratObject}}#' +#' @param ... Arguments to be passed to methods such as \code{\link{CreateSeuratObject}} +#' #' @return Returns a matrix with genes as rows, identity classes as columns. #' If return.seurat is TRUE, returns an object of class \code{\link{Seurat}}. #' @export @@ -362,6 +359,9 @@ AggregateExpression <- function( return.seurat = FALSE, group.by = 'ident', add.ident = NULL, + normalization.method = "LogNormalize", + scale.factor = 10000, + margin = 1, verbose = TRUE, ... ) { @@ -374,7 +374,10 @@ AggregateExpression <- function( return.seurat = return.seurat, group.by = group.by, add.ident = add.ident, - slot = "counts", + layer = "counts", + normalization.method = normalization.method, + scale.factor = scale.factor, + margin = margin, verbose = verbose, ... ) @@ -383,32 +386,35 @@ AggregateExpression <- function( #' Averaged feature expression by identity class #' -#' Returns averaged expression values for each identity class +#' Returns averaged expression values for each identity class. #' -#' If slot is set to 'data', this function assumes that the data has been log +#' If layer is set to 'data', this function assumes that the data has been log #' normalized and therefore feature values are exponentiated prior to averaging -#' so that averaging is done in non-log space. Otherwise, if slot is set to -#' either 'counts' or 'scale.data', no exponentiation is performed prior to -#' averaging -#' If \code{return.seurat = TRUE} and slot is not 'scale.data', averaged values -#' are placed in the 'counts' slot of the returned object and the log of averaged values -#' are placed in the 'data' slot. \code{\link{ScaleData}} is then run on the default assay -#' before returning the object. -#' If \code{return.seurat = TRUE} and slot is 'scale.data', the 'counts' slot is left empty, -#' the 'data' slot is filled with NA, and 'scale.data' is set to the aggregated values. +#' so that averaging is done in non-log space. Otherwise, if layer is set to +#' either 'counts' or 'scale.data', no exponentiation is performed prior to averaging. +#' If \code{return.seurat = TRUE} and layer is not 'scale.data', averaged values +#' are placed in the 'counts' layer of the returned object and \code{\link{NormalizeData}} +#' is run on the averaged counts and placed in the 'data' layer \code{\link{ScaleData}} +#' is then run on the default assay before returning the object. +#' If \code{return.seurat = TRUE} and slot is 'scale.data', the 'counts' layer contains +#' average counts and 'scale.data' is set to the averaged values. #' #' @param object Seurat object #' @param assays Which assays to use. Default is all assays #' @param features Features to analyze. Default is all features in the assay #' @param return.seurat Whether to return the data as a Seurat object. Default is FALSE #' @param group.by Categories for grouping (e.g, ident, replicate, celltype); 'ident' by default -#' @param add.ident (Deprecated) Place an additional label on each cell prior to pseudobulking +#' @param add.ident (Deprecated). Place an additional label on each cell prior to pseudobulking #' (very useful if you want to observe cluster pseudobulk values, separated by replicate, for example) -#' @param slot Slot(s) to use; if multiple slots are given, assumed to follow +#' @param layer Layer(s) to use; if multiple layers are given, assumed to follow #' the order of 'assays' (if specified) or object's assays +#' @param slot (Deprecated). Slots(s) to use +#' @param normalization.method Method for normalization, see \code{\link{NormalizeData}} +#' @param scale.factor Scale factor for normalization, see \code{\link{NormalizeData}} +#' @param margin Margin to perform CLR normalization, see \code{\link{NormalizeData}} #' @param verbose Print messages and show progress bar #' @param ... Arguments to be passed to methods such as \code{\link{CreateSeuratObject}} -#' +#' #' @return Returns a matrix with genes as rows, identity classes as columns. #' If return.seurat is TRUE, returns an object of class \code{\link{Seurat}}. #' @export @@ -431,7 +437,6 @@ AverageExpression <- function( normalization.method = "LogNormalize", scale.factor = 10000, margin = 1, - block.size = NULL, verbose = TRUE, ... ) { @@ -460,7 +465,7 @@ AverageExpression <- function( } if (method =="average") { - message("Starting with Seurat v5, recommend users swtich to AggregateExpression rather than AverageExpression") + message("As of Seurat v5, it is recommended to use AggregateExpression rather than AverageExpression.") } object.assays <- FilterObjects(object = object, classes.keep = c('Assay', 'Assay5')) diff --git a/man/AggregateExpression.Rd b/man/AggregateExpression.Rd index d54854e8e..fdec2ddf0 100644 --- a/man/AggregateExpression.Rd +++ b/man/AggregateExpression.Rd @@ -11,7 +11,9 @@ AggregateExpression( return.seurat = FALSE, group.by = "ident", add.ident = NULL, - slot = "data", + normalization.method = "LogNormalize", + scale.factor = 10000, + margin = 1, verbose = TRUE, ... ) @@ -30,32 +32,28 @@ AggregateExpression( \item{add.ident}{(Deprecated) Place an additional label on each cell prior to pseudobulking (very useful if you want to observe cluster pseudobulk values, separated by replicate, for example)} -\item{slot}{Slot(s) to use; if multiple slots are given, assumed to follow -the order of 'assays' (if specified) or object's assays} +\item{normalization.method}{Method for normalization, see \code{\link{NormalizeData}}} + +\item{scale.factor}{Scale factor for normalization, see \code{\link{NormalizeData}}} + +\item{margin}{Margin to perform CLR normalization, see \code{\link{NormalizeData}}} \item{verbose}{Print messages and show progress bar} -\item{...}{Arguments to be passed to methods such as \code{\link{CreateSeuratObject}}#'} +\item{...}{Arguments to be passed to methods such as \code{\link{CreateSeuratObject}}} } \value{ Returns a matrix with genes as rows, identity classes as columns. If return.seurat is TRUE, returns an object of class \code{\link{Seurat}}. } \description{ -Returns aggregated (summed) expression values for each identity class +Returns summed counts ("pseudobulk") for each identity class. } \details{ -If slot is set to 'data', this function assumes that the data has been log -normalized and therefore feature values are exponentiated prior to aggregating -so that sum is done in non-log space. Otherwise, if slot is set to -either 'counts' or 'scale.data', no exponentiation is performed prior to -aggregating -If \code{return.seurat = TRUE} and slot is not 'scale.data', aggregated values -are placed in the 'counts' slot of the returned object and the log of aggregated values -are placed in the 'data' slot. For the \code{\link{ScaleData}} is then run on the default assay +If \code{return.seurat = TRUE}, aggregated values are placed in the 'counts' +slot of the returned object. The data is then normalized by running \code{\link{NormalizeData}} +on the aggregated counts. \code{\link{ScaleData}} is then run on the default assay before returning the object. -If \code{return.seurat = TRUE} and slot is 'scale.data', the 'counts' slot is left empty, -the 'data' slot is filled with NA, and 'scale.data' is set to the aggregated values. } \examples{ data("pbmc_small") diff --git a/man/AverageExpression.Rd b/man/AverageExpression.Rd index f4b2f8ff9..1bad9d0fa 100644 --- a/man/AverageExpression.Rd +++ b/man/AverageExpression.Rd @@ -11,8 +11,12 @@ AverageExpression( return.seurat = FALSE, group.by = "ident", add.ident = NULL, - slot = "counts", + layer = "data", + slot = deprecated(), method = "average", + normalization.method = "LogNormalize", + scale.factor = 10000, + margin = 1, verbose = TRUE, ... ) @@ -28,12 +32,20 @@ AverageExpression( \item{group.by}{Categories for grouping (e.g, ident, replicate, celltype); 'ident' by default} -\item{add.ident}{(Deprecated) Place an additional label on each cell prior to pseudobulking +\item{add.ident}{(Deprecated). Place an additional label on each cell prior to pseudobulking (very useful if you want to observe cluster pseudobulk values, separated by replicate, for example)} -\item{slot}{Slot(s) to use; if multiple slots are given, assumed to follow +\item{layer}{Layer(s) to use; if multiple layers are given, assumed to follow the order of 'assays' (if specified) or object's assays} +\item{slot}{(Deprecated). Slots(s) to use} + +\item{normalization.method}{Method for normalization, see \code{\link{NormalizeData}}} + +\item{scale.factor}{Scale factor for normalization, see \code{\link{NormalizeData}}} + +\item{margin}{Margin to perform CLR normalization, see \code{\link{NormalizeData}}} + \item{verbose}{Print messages and show progress bar} \item{...}{Arguments to be passed to methods such as \code{\link{CreateSeuratObject}}} @@ -43,20 +55,19 @@ Returns a matrix with genes as rows, identity classes as columns. If return.seurat is TRUE, returns an object of class \code{\link{Seurat}}. } \description{ -Returns averaged expression values for each identity class +Returns averaged expression values for each identity class. } \details{ -If slot is set to 'data', this function assumes that the data has been log +If layer is set to 'data', this function assumes that the data has been log normalized and therefore feature values are exponentiated prior to averaging -so that averaging is done in non-log space. Otherwise, if slot is set to -either 'counts' or 'scale.data', no exponentiation is performed prior to -averaging -If \code{return.seurat = TRUE} and slot is not 'scale.data', averaged values -are placed in the 'counts' slot of the returned object and the log of averaged values -are placed in the 'data' slot. \code{\link{ScaleData}} is then run on the default assay -before returning the object. -If \code{return.seurat = TRUE} and slot is 'scale.data', the 'counts' slot is left empty, -the 'data' slot is filled with NA, and 'scale.data' is set to the aggregated values. +so that averaging is done in non-log space. Otherwise, if layer is set to +either 'counts' or 'scale.data', no exponentiation is performed prior to averaging. +If \code{return.seurat = TRUE} and layer is not 'scale.data', averaged values +are placed in the 'counts' layer of the returned object and \code{\link{NormalizeData}} +is run on the averaged counts and placed in the 'data' layer \code{\link{ScaleData}} +is then run on the default assay before returning the object. +If \code{return.seurat = TRUE} and slot is 'scale.data', the 'counts' layer contains +average counts and 'scale.data' is set to the averaged values. } \examples{ data("pbmc_small") From 6bd4e53d5c476b105719d073a0d2a09f44980f29 Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Mon, 2 Oct 2023 15:00:53 -0400 Subject: [PATCH 811/979] update COVID_SCTMapping vignette to use AggregateExpression --- vignettes/COVID_SCTMapping.Rmd | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/vignettes/COVID_SCTMapping.Rmd b/vignettes/COVID_SCTMapping.Rmd index 7bc9f2313..b7fc59575 100755 --- a/vignettes/COVID_SCTMapping.Rmd +++ b/vignettes/COVID_SCTMapping.Rmd @@ -142,10 +142,8 @@ p1 + p2 + plot_layout(ncol = 2) In addition to composition analysis, we use an aggregation-based (pseudobulk) workflow to explore differential genes between healthy individuals and COVID-19 donors. We aggregate all cells within the same cell type and donor using the `AggregateExpression` function. This returns a Seurat object where each ‘cell’ represents the pseudobulk profile of one cell type in one individual. ```{r} -bulk <- AverageExpression(object, - method = 'aggregate', +bulk <- AggregateExpression(object, return.seurat = TRUE, - slot = 'counts', assays = 'RNA', group.by = c("predicted.celltype.l2", "donor_id", "disease") ) From c585e09bdc57103b843fd9b6709b521cc87b9c4d Mon Sep 17 00:00:00 2001 From: Gesmira Date: Mon, 2 Oct 2023 15:07:11 -0400 Subject: [PATCH 812/979] importing seuratobject functions --- NAMESPACE | 2 -- 1 file changed, 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index d61886996..e9b22c7ba 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -524,7 +524,6 @@ importFrom(SeuratObject,"Tool<-") importFrom(SeuratObject,"VariableFeatures<-") importFrom(SeuratObject,.CalcN) importFrom(SeuratObject,.FilterObjects) -importFrom(SeuratObject,.IsFutureSeurat) importFrom(SeuratObject,.MARGIN) importFrom(SeuratObject,.PropagateList) importFrom(SeuratObject,.SparseSlots) @@ -537,7 +536,6 @@ importFrom(SeuratObject,Cells) importFrom(SeuratObject,CellsByIdentities) importFrom(SeuratObject,Command) importFrom(SeuratObject,CreateAssayObject) -importFrom(SeuratObject,CreateAssay5Object) importFrom(SeuratObject,CreateCentroids) importFrom(SeuratObject,CreateDimReducObject) importFrom(SeuratObject,CreateFOV) From 4a55d5c99e2539c0ffbfee08988f1a7e75a6f3c3 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Mon, 2 Oct 2023 15:20:55 -0400 Subject: [PATCH 813/979] imports, man --- NAMESPACE | 2 ++ man/AverageExpression.Rd | 2 ++ man/FeatureScatter.Rd | 2 ++ man/FetchResiduals_reference.Rd | 13 +++++++++++++ man/HarmonyIntegration.Rd | 2 ++ man/Load10X_Spatial.Rd | 2 ++ man/reexports.Rd | 4 ++-- 7 files changed, 25 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index e9b22c7ba..d61886996 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -524,6 +524,7 @@ importFrom(SeuratObject,"Tool<-") importFrom(SeuratObject,"VariableFeatures<-") importFrom(SeuratObject,.CalcN) importFrom(SeuratObject,.FilterObjects) +importFrom(SeuratObject,.IsFutureSeurat) importFrom(SeuratObject,.MARGIN) importFrom(SeuratObject,.PropagateList) importFrom(SeuratObject,.SparseSlots) @@ -536,6 +537,7 @@ importFrom(SeuratObject,Cells) importFrom(SeuratObject,CellsByIdentities) importFrom(SeuratObject,Command) importFrom(SeuratObject,CreateAssayObject) +importFrom(SeuratObject,CreateAssay5Object) importFrom(SeuratObject,CreateCentroids) importFrom(SeuratObject,CreateDimReducObject) importFrom(SeuratObject,CreateFOV) diff --git a/man/AverageExpression.Rd b/man/AverageExpression.Rd index 6fc0cf54d..df7dd80e3 100644 --- a/man/AverageExpression.Rd +++ b/man/AverageExpression.Rd @@ -32,6 +32,8 @@ AverageExpression( \item{add.ident}{(Deprecated) Place an additional label on each cell prior to pseudobulking (very useful if you want to observe cluster pseudobulk values, separated by replicate, for example)} +\item{layer}{Name of the layer in assay to use} + \item{slot}{Slot(s) to use; if multiple slots are given, assumed to follow the order of 'assays' (if specified) or object's assays} diff --git a/man/FeatureScatter.Rd b/man/FeatureScatter.Rd index ffded2917..06e6625d7 100644 --- a/man/FeatureScatter.Rd +++ b/man/FeatureScatter.Rd @@ -65,6 +65,8 @@ to split by cell identity'} \item{plot.cor}{Display correlation in plot title} +\item{ncol}{Number of columns if plotting multiple plots} + \item{raster}{Convert points to raster format, default is \code{NULL} which will automatically use raster if the number of points plotted is greater than 100,000} diff --git a/man/FetchResiduals_reference.Rd b/man/FetchResiduals_reference.Rd index 54e9ca09f..4330a3e31 100644 --- a/man/FetchResiduals_reference.Rd +++ b/man/FetchResiduals_reference.Rd @@ -12,6 +12,19 @@ FetchResiduals_reference( verbose = FALSE ) } +\arguments{ +\item{object}{A seurat object} + +\item{reference.SCT.model}{a reference SCT model that should be used +for calculating the residuals} + +\item{features}{Names of features to compute} + +\item{nCount_UMI}{UMI counts. If not specified, defaults to +column sums of object} + +\item{verbose}{Whether to print messages and progress bars} +} \description{ temporal function to get residuals from reference } diff --git a/man/HarmonyIntegration.Rd b/man/HarmonyIntegration.Rd index 215eed868..74b2981d7 100644 --- a/man/HarmonyIntegration.Rd +++ b/man/HarmonyIntegration.Rd @@ -40,6 +40,8 @@ should be called \code{group}} \item{scale.layer}{Ignored} +\item{new.reduction}{Name of new integrated dimensional reduction} + \item{layers}{Ignored} \item{npcs}{If doing PCA on input matrix, number of PCs to compute} diff --git a/man/Load10X_Spatial.Rd b/man/Load10X_Spatial.Rd index 763a1d0ef..1d8cfe73b 100644 --- a/man/Load10X_Spatial.Rd +++ b/man/Load10X_Spatial.Rd @@ -31,6 +31,8 @@ tissue} \item{to.upper}{Converts all feature names to upper case. Can be useful when analyses require comparisons between human and mouse gene names for example.} +\item{image}{Name of image to pull the coordinates from} + \item{...}{Arguments passed to \code{\link{Read10X_h5}}} } \value{ diff --git a/man/reexports.Rd b/man/reexports.Rd index 08ced67a3..fa7258d42 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -71,8 +71,8 @@ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ - \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}} - \item{generics}{\code{\link[generics]{components}}} + + \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}} }} From 85fb43cecbc3e04817a0dcf4fcd19fb1c3caa4bc Mon Sep 17 00:00:00 2001 From: Gesmira Date: Tue, 3 Oct 2023 10:41:31 -0400 Subject: [PATCH 814/979] fix global variable issue for bpcells findmarkers --- R/differential_expression.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/differential_expression.R b/R/differential_expression.R index 3b94bf3b0..9f8b19bb2 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -578,7 +578,7 @@ FindMarkers.default <- function( de.results <- suppressMessages( BPCells::marker_features(data.use, group = groups, method = "wilcoxon") ) - de.results <- subset(de.results, foreground == "foreground") + de.results <- subset(de.results, de.results$foreground == "foreground") de.results <- data.frame(feature = de.results$feature, p_val = de.results$p_val_raw) rownames(de.results) <- de.results$feature From a8d3fce9edd91ca89af2ec5e08707f9903ccaa90 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Tue, 3 Oct 2023 11:27:05 -0400 Subject: [PATCH 815/979] adding BPCells FindMarkers tests and error message --- R/differential_expression.R | 4 +++ tests/testthat/test_differential_expression.R | 31 +++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/R/differential_expression.R b/R/differential_expression.R index 3b94bf3b0..cfe4d9eb6 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -573,6 +573,10 @@ FindMarkers.default <- function( } } if (inherits(x = object, what = "IterableMatrix")){ + if(test.use != "wilcox"){ + stop("Differential expression with BPCells currently only supports the 'wilcox' method.", + " Please rerun with test.use = 'wilcox'") + } data.use <- object[features, c(cells.1, cells.2), drop = FALSE] groups <- c(rep("foreground", length(cells.1)), rep("background", length(cells.2))) de.results <- suppressMessages( diff --git a/tests/testthat/test_differential_expression.R b/tests/testthat/test_differential_expression.R index 5b9414c59..354c8eef6 100644 --- a/tests/testthat/test_differential_expression.R +++ b/tests/testthat/test_differential_expression.R @@ -281,6 +281,23 @@ test_that("LR test works", { expect_equal(rownames(x = results)[1], "LYZ") }) +mat_bpcells <- t(as(t(pbmc_small[['RNA']]$counts ), "IterableMatrix")) +pbmc_small[['RNAbp']] <- CreateAssay5Object(counts = mat_bpcells) +pbmc_small <- NormalizeData(pbmc_small, assay = "RNAbp") + +markers.bp <- suppressWarnings(FindMarkers(object = pbmc_small, assay = "RNAbp", ident.1 = 0, verbose = FALSE, base = exp(1),pseudocount.use = 1)) + +test_that("BPCells FindMarkers gives same results", { + expect_equal(colnames(x = markers.bp), c("p_val", "avg_logFC", "pct.1", "pct.2", "p_val_adj")) + expect_equal(markers.bp[1, "p_val"], 9.572778e-13) + expect_equal(markers.bp[1, "avg_logFC"], -4.034691, tolerance = 1e-6) + expect_equal(markers.bp[1, "pct.1"], 0.083) + expect_equal(markers.bp[1, "pct.2"], 0.909) + expect_equal(markers.bp[1, "p_val_adj"], 2.201739e-10) + expect_equal(nrow(x = markers.bp), 204) + expect_equal(rownames(markers.bp)[1], "HLA-DPB1") +}) + # Tests for FindAllMarkers # ------------------------------------------------------------------------------- results <- suppressMessages(suppressWarnings(FindAllMarkers(object = pbmc_small,pseudocount.use=1))) @@ -358,6 +375,19 @@ test_that("FindMarkers recognizes log normalizatio", { expect_equal(markers[1, "avg_log2FC"], -2.614686, tolerance = 1e-6) }) +results.bp <- suppressMessages(suppressWarnings(FindAllMarkers(object = pbmc_small, assay = "RNAbp", pseudocount.use=1))) + +test_that("BPCells FindAllMarkers gives same results", { + expect_equal(colnames(x = results.bp), c("p_val", "avg_log2FC", "pct.1", "pct.2", "p_val_adj", "cluster", "gene")) + expect_equal(results.bp[1, "p_val"], 9.572778e-13) + expect_equal(results.bp[1, "avg_log2FC"], -5.820829, tolerance = 1e-6) + expect_equal(results.bp[1, "pct.1"], 0.083) + expect_equal(results.bp[1, "pct.2"], 0.909) + expect_equal(results.bp[1, "p_val_adj"], 2.201739e-10) + expect_equal(nrow(x = results.bp), 222) + expect_equal(rownames(results.bp)[1], "HLA-DPB1") +}) + # Tests for FindConservedMarkers @@ -414,3 +444,4 @@ if (requireNamespace('metap', quietly = TRUE)) { expect_equal(rownames(markers.missing)[1], "HLA-DPB1") }) } + From f4c030051dfe165b69bb75f4262ca1fbf8066562 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Tue, 3 Oct 2023 11:42:20 -0400 Subject: [PATCH 816/979] fixing argument package msising issue --- tests/testthat.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat.R b/tests/testthat.R index 1bf754d41..e6f5ed2e5 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -4,9 +4,9 @@ library(Seurat) # Run tests for 'v5' message('Run tests for v5 assay') options(Seurat.object.assay.version = 'v5') -test_check() +test_check("Seurat") # Run tests for 'v3' message('Run tests for v3 assay') options(Seurat.object.assay.version = 'v3') -test_check() +test_check("Seurat") From 9bca336254b0e2c3cec08ee9b34be11a7645f2e4 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Tue, 3 Oct 2023 12:28:04 -0400 Subject: [PATCH 817/979] LeverageScore documentation --- R/sketching.R | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/R/sketching.R b/R/sketching.R index e44752216..12134f372 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -467,6 +467,18 @@ LeverageScore.default <- function( return(Z.score) } +#' @param nsketch A positive integer. The number of sketches to be used in the approximation. +#' Default is 5000. +#' @param ndims A positive integer or NULL. The number of dimensions to use. If NULL, the number +#' of dimensions will default to the number of columns in the object. +#' @param method The sketching method to use, defaults to CountSketch. +#' @param vf.method VariableFeatures method +#' @param layer layer to use +#' @param eps A numeric. The error tolerance for the approximation in Johnson–Lindenstrauss embeddings, +#' defaults to 0.5. +#' @param seed A positive integer. The seed for the random number generator, defaults to 123. +#' @param verbose Print progress and diagnostic messages +#' #' @rdname LeverageScore #' @method LeverageScore StdAssay #' @@ -526,6 +538,17 @@ LeverageScore.StdAssay <- function( #' LeverageScore.Assay <- LeverageScore.StdAssay + +#' @param assay assay to use +#' @param nsketch A positive integer. The number of sketches to be used in the approximation. +#' Default is 5000. +#' @param ndims A positive integer or NULL. The number of dimensions to use. If NULL, the number +#' of dimensions will default to the number of columns in the object. +#' @param method The sketching method to use, defaults to CountSketch. +#' @param var.name name of slot to store leverage scores +#' @param over.write whether to overwrite slot that currently stores leverage scores. Defaults +#' to FALSE, in which case the 'var.name' is modified if it already exists in the object +#' #' @rdname LeverageScore #' @method LeverageScore Seurat #' @export From 1a65832bb27a6f14a3fe227fb424e3b32526e5b8 Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Tue, 3 Oct 2023 13:03:40 -0400 Subject: [PATCH 818/979] update tests for AverageExpression --- R/dimensional_reduction.R | 4 +- R/utilities.R | 2 +- tests/testthat/test_utilities.R | 101 ++++++++++++++++---------------- 3 files changed, 53 insertions(+), 54 deletions(-) diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R index 69a886fc7..89ee619d0 100644 --- a/R/dimensional_reduction.R +++ b/R/dimensional_reduction.R @@ -649,8 +649,8 @@ RunCCA.Seurat <- function( warning("Some cells removed after object merge due to minimum feature count cutoff") } combined.scale <- cbind(data1,data2) - # combined.object <- SetAssayData(object = combined.object,new.data = combined.scale, slot = "scale.data") - combined.object@assays$ToIntegrate@scale.data <- combined.scale + combined.object <- SetAssayData(object = combined.object,new.data = combined.scale, slot = "scale.data") + ## combined.object@assays$ToIntegrate@scale.data <- combined.scale if (renormalize) { combined.object <- NormalizeData( object = combined.object, diff --git a/R/utilities.R b/R/utilities.R index 5b53414ca..dd90177af 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -465,7 +465,7 @@ AverageExpression <- function( } if (method =="average") { - message("As of Seurat v5, it is recommended to use AggregateExpression rather than AverageExpression.") + message("As of Seurat v5, As of Seurat v5, we recommend using AggregateExpression to perform pseudo-bulk analysis.") } object.assays <- FilterObjects(object = object, classes.keep = c('Assay', 'Assay5')) diff --git a/tests/testthat/test_utilities.R b/tests/testthat/test_utilities.R index f796e1241..47fcb8411 100644 --- a/tests/testthat/test_utilities.R +++ b/tests/testthat/test_utilities.R @@ -14,83 +14,81 @@ object <- CreateSeuratObject( min.features = 30, meta.data = meta.data ) +object <- NormalizeData(object) object <- SetIdent(object, value = 'a') -LayerData(object, layer="data") <- LayerData(object, layer="counts") +group.by = "a" +data <- FetchData(object = object, vars = rev(x = group.by)) +data <- data[which(rowSums(x = is.na(x = data)) == 0), , drop = F] +category.matrix.avg <- CreateCategoryMatrix(labels = data, method = 'average') +category.matrix.sum <- CreateCategoryMatrix(labels = data, method = 'aggregate') -test_that("AverageExpression works for different slots", { + +test_that("CreateCategoryMatrix works for average and aggregate", { + expect_equal(unname(colSums(category.matrix.avg)), c(1, 1, 1)) + expect_equal(unname(colSums(category.matrix.sum)), c(27, 26, 24)) +}) + +test_that("AverageExpression works for different layers", { + #average expression on data layer is equal to log of average exponentiated data suppressWarnings(average.expression <- AverageExpression(object, layer = 'data')$RNA) + counts.from.data.avg <- expm1(object[['RNA']]$data) %*% category.matrix.avg expect_equivalent( - average.expression['KHDRBS1', 1:3], - c(a = 7.278237e-01, b = 1.658166e+14, c = 1.431902e-01), + log1p(counts.from.data.avg), + average.expression, tolerance = 1e-6 ) + #average expression on counts layer is equal to average of counts + suppressWarnings(average.counts <- AverageExpression(object, layer = 'counts')$RNA) + avg.counts <- object[['RNA']]$data %*% category.matrix.avg expect_equivalent( - average.expression['DNAJB1', 1:3], - c(a = 1.374079e+00, b = 5.100840e-01, c = 5.011655e-01), - tolerance = 1e-6 - ) - suppressWarnings(avg.counts <- AverageExpression(object, slot = 'counts')$RNA) - expect_equal( - unname(avg.counts['MS4A1', ]), - unname(c(a = 0.37037037, b = 0.3461538, c = 0.3333333)), - tolerance = 1e-6 - ) - expect_equal( - unname(avg.counts['SPON2', ]), - unname(c(a = 0.5185185, b = 0.6153846, c = 0.08333333)), - tolerance = 1e-6 - ) - expect_warning(AverageExpression(object, slot = 'scale.data')) - object <- ScaleData(object = object, verbose = FALSE) - avg.scale <- AverageExpression(object, slot = "scale.data")$RNA - expect_equal( - unname(avg.scale['MS4A1', ]), - unname(c(a = 0.02092088, b = -0.004769018, c = -0.018369549)), + avg.counts, + average.counts, tolerance = 1e-6 ) - expect_equal( - unname(avg.scale['SPON2', ]), - unname(c(a = 0.1052434, b = 0.2042827, c = -0.3397051)), + #average expression on scale.data layer is equal to average of scale.data + object <- ScaleData(object, features = rownames(object[['RNA']]$data)) + suppressWarnings(average.scale.data <- AverageExpression(object, layer = 'scale.data')$RNA) + avg.scale <- object[['RNA']]$scale.data %*% category.matrix.avg + expect_equivalent( + average.scale.data, + avg.scale, tolerance = 1e-6 ) -}) + }) test_that("AverageExpression handles features properly", { features <- rownames(x = object)[1:10] - average.expression <- AverageExpression(object, slot = 'data', features = features)$RNA + average.expression <- AverageExpression(object, layer = 'data', features = features)$RNA expect_equal(rownames(x = average.expression), features) - expect_warning(AverageExpression(object, slot = 'data', features = "BAD")) - expect_warning(AverageExpression(object, slot = "data", features = c(features, "BAD"))) + expect_warning(AverageExpression(object, layer = 'data', features = "BAD")) + expect_warning(AverageExpression(object, layer = "data", features = c(features, "BAD"))) }) test_that("AverageExpression with return.seurat", { # counts - avg.counts <- AverageExpression(object, slot = "counts", return.seurat = TRUE, verbose = FALSE) + avg.counts <- AverageExpression(object, layer = "counts", return.seurat = TRUE, verbose = FALSE) expect_s4_class(object = avg.counts, "Seurat") - avg.counts.mat <- AverageExpression(object, slot = 'counts')$RNA + avg.counts.mat <- AverageExpression(object, layer = 'counts')$RNA expect_equal(unname(as.matrix(LayerData(avg.counts[["RNA"]], layer = "counts"))), unname(as.matrix(avg.counts.mat))) avg.data <- LayerData(avg.counts[["RNA"]], layer = "data") - expect_equal( - unname(avg.data['MS4A1', ]), - unname(c(a = 0.31508105, b = 0.2972515, c = 0.2876821)), - tolerance = 1e-6 - ) - expect_equal( - unname(avg.data['SPON2', ]), - unname(c(a = 0.4177352, b = 0.4795731, c = 0.08004271)), + + expect_equivalent( + as.matrix(NormalizeData(avg.counts.mat)), + as.matrix(avg.data), tolerance = 1e-6 ) + avg.scale <- LayerData(avg.counts[["RNA"]], layer = "scale.data") expect_equal( avg.scale['MS4A1', ], - c(a = 1.0841908, b = -0.1980056, c = -0.8861852), + c(a = -0.8141426, b = 1.1162108, c = -0.3020683), tolerance = 1e-6 ) expect_equal( avg.scale['SPON2', ], - c(a = 0.4275778, b = 0.7151260, c = -1.1427038), + c(a = 0.3387626, b = 0.7866155, c = -1.1253781), tolerance = 1e-6 ) @@ -105,12 +103,12 @@ test_that("AverageExpression with return.seurat", { avg.scale <- LayerData(avg.data[["RNA"]], layer = "scale.data") expect_equal( avg.scale['MS4A1', ], - c(a = 0.721145238, b = -1.1415734, c = 0.4204281), + c(a = -0.07823997, b = 1.0368218, c = -0.9585818), tolerance = 1e-6 ) expect_equal( avg.scale['SPON2', ], - c(a = 0.08226771, b = 0.9563249, c = -1.0385926), + c(a = 0.1213127, b = 0.9338096, c = -1.0551222), tolerance = 1e-6 ) @@ -118,7 +116,7 @@ test_that("AverageExpression with return.seurat", { object <- ScaleData(object = object, verbose = FALSE) avg.scale <- AverageExpression(object, layer = "scale.data", return.seurat = TRUE, verbose = FALSE) expect_s4_class(object = avg.scale, "Seurat") - avg.scale.mat <- AverageExpression(object, slot = 'scale.data')$RNA + avg.scale.mat <- AverageExpression(object, layer = 'scale.data')$RNA expect_equal(unname(as.matrix(LayerData(avg.scale[["RNA"]], layer = "scale.data"))), unname(as.matrix(avg.scale.mat))) }) @@ -127,20 +125,21 @@ rownames(x = test.dat) <- paste0("test-", rownames(x = test.dat)) object[["TEST"]] <- CreateAssayObject(data = test.dat) test_that("AverageExpression with multiple assays", { - avg.test <- AverageExpression(object = object, assays = "TEST", slot = "data") + avg.test <- AverageExpression(object = object, assays = "TEST", layer = "data") expect_equal(names(x = avg.test), "TEST") expect_equal(length(x = avg.test), 1) expect_equivalent( avg.test[[1]]['test-KHDRBS1', 1:3], - c(a = 7.278237e-01, b = 1.658166e+14, c = 1.431902e-01), + c(a = 10.329153, b = 92.287109, c = 5.620942), tolerance = 1e-6 ) expect_equivalent( avg.test[[1]]['test-DNAJB1', 1:3] , - c(a = 1.374079e+00, b = 5.100840e-01, c = 5.011655e-01), + c(a = 42.32240, b = 15.94807, c = 15.96319), tolerance = 1e-6 ) - avg.all <- AverageExpression(object = object, slot = "data") + avg.all <- AverageExpression(object = object, layer = "data") expect_equal(names(x = avg.all), c("RNA", "TEST")) expect_equal(length(x = avg.all), 2) }) + From 34c072df1d8549f51ddaf0e334d82a7e195603af Mon Sep 17 00:00:00 2001 From: Gesmira Date: Tue, 3 Oct 2023 13:04:13 -0400 Subject: [PATCH 819/979] documentation --- R/objects.R | 1 + man/LeverageScore.Rd | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/R/objects.R b/R/objects.R index c0fce535f..ba1d88137 100644 --- a/R/objects.R +++ b/R/objects.R @@ -1672,6 +1672,7 @@ GetTissueCoordinates.VisiumV1 <- function( #' Get variable feature information from \code{\link{SCTAssay}} objects #' #' @inheritParams SeuratObject::HVFInfo +#' @param method method to determine variable features #' #' @export #' @method HVFInfo SCTAssay diff --git a/man/LeverageScore.Rd b/man/LeverageScore.Rd index c00ad991f..9fdc7e319 100644 --- a/man/LeverageScore.Rd +++ b/man/LeverageScore.Rd @@ -82,6 +82,17 @@ defaults to 0.5.} \item{seed}{A positive integer. The seed for the random number generator, defaults to 123.} \item{verbose}{Print progress and diagnostic messages} + +\item{vf.method}{VariableFeatures method} + +\item{layer}{layer to use} + +\item{assay}{assay to use} + +\item{var.name}{name of slot to store leverage scores} + +\item{over.write}{whether to overwrite slot that currently stores leverage scores. Defaults +to FALSE, in which case the 'var.name' is modified if it already exists in the object} } \description{ This function computes the leverage scores for a given object From b51f299b45ae0085c5d911b4ae1c675df6f69292 Mon Sep 17 00:00:00 2001 From: gesmira <59940281+Gesmira@users.noreply.github.com> Date: Tue, 3 Oct 2023 16:19:07 -0400 Subject: [PATCH 820/979] bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index ae56c90fd..3a5f6ea84 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9067 -Date: 2023-09-28 +Version: 4.9.9.9068 +Date: 2023-10-03 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From ec8bce3c50750dd7ac26d5975bcaeac302565a6a Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Tue, 3 Oct 2023 16:53:29 -0400 Subject: [PATCH 821/979] Revert AverageExpression to use arithmetic mean with v5 assay, all tests passing --- R/utilities.R | 18 ++++++++++-------- tests/testthat/test_utilities.R | 9 +++++++-- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index dd90177af..28bd2d8e6 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -1462,9 +1462,6 @@ PseudobulkExpression.StdAssay <- function( ) layer <- slot } - if (layer == 'data') { - message("Assay5 will use arithmetic mean for data slot.") - } layers.set <- Layers(object = object, search = layer) features.to.avg <- features %||% rownames(x = object) bad.features <- setdiff(x = features.to.avg, y = rownames(x = object)) @@ -1497,17 +1494,22 @@ PseudobulkExpression.StdAssay <- function( layer = layers.set[i], features = features.assay ) + if (layers.set[i] == "data") { + data.use.i <- expm1(x = data.i) + if (any(data.use.i == Inf)) { + warning("Exponentiation yielded infinite values. `data` may not be log-normed.") + } + } else { + data.use.i <- data.i + } category.matrix.i <- category.matrix[colnames(x = data.i),] if (inherits(x = data.i, what = 'DelayedArray')) { - data.return.i<- tcrossprod_DelayedAssay(x = data.i, y = t(category.matrix.i)) + data.return.i<- tcrossprod_DelayedAssay(x = data.use.i, y = t(category.matrix.i)) } else { - data.return.i <- as.sparse(x = data.i %*% category.matrix.i) + data.return.i <- as.sparse(x = data.use.i %*% category.matrix.i) } data.return <- data.return + data.return.i } - if (layer == 'data') { - data.return <- expm1(x = data.return) - } return(data.return) } diff --git a/tests/testthat/test_utilities.R b/tests/testthat/test_utilities.R index 47fcb8411..bd4d04ad1 100644 --- a/tests/testthat/test_utilities.R +++ b/tests/testthat/test_utilities.R @@ -68,18 +68,23 @@ test_that("AverageExpression handles features properly", { test_that("AverageExpression with return.seurat", { # counts avg.counts <- AverageExpression(object, layer = "counts", return.seurat = TRUE, verbose = FALSE) + avg.counts.calc <- object[['RNA']]$counts %*% category.matrix.avg + #test that counts are indeed equal to average counts + expect_equivalent( + as.matrix(avg.counts[['RNA']]$counts), + as.matrix(avg.counts.calc), + tolerance = 1e-6 + ) expect_s4_class(object = avg.counts, "Seurat") avg.counts.mat <- AverageExpression(object, layer = 'counts')$RNA expect_equal(unname(as.matrix(LayerData(avg.counts[["RNA"]], layer = "counts"))), unname(as.matrix(avg.counts.mat))) avg.data <- LayerData(avg.counts[["RNA"]], layer = "data") - expect_equivalent( as.matrix(NormalizeData(avg.counts.mat)), as.matrix(avg.data), tolerance = 1e-6 ) - avg.scale <- LayerData(avg.counts[["RNA"]], layer = "scale.data") expect_equal( avg.scale['MS4A1', ], From 167b986aa48173ae73243a8ed745b27d0a9b0420 Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Tue, 3 Oct 2023 17:23:08 -0400 Subject: [PATCH 822/979] fix typos --- R/utilities.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index 28bd2d8e6..3dfb1b87e 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -414,7 +414,7 @@ AggregateExpression <- function( #' @param margin Margin to perform CLR normalization, see \code{\link{NormalizeData}} #' @param verbose Print messages and show progress bar #' @param ... Arguments to be passed to methods such as \code{\link{CreateSeuratObject}} -#' +#' #' @return Returns a matrix with genes as rows, identity classes as columns. #' If return.seurat is TRUE, returns an object of class \code{\link{Seurat}}. #' @export @@ -440,7 +440,7 @@ AverageExpression <- function( verbose = TRUE, ... ) { - #CheckDots(..., fxns = 'CreateSeuratObject') + CheckDots(..., fxns = 'CreateSeuratObject') if (!is.null(x = add.ident)) { .Deprecated(msg = "'add.ident' is a deprecated argument, please use the 'group.by' argument instead") group.by <- c('ident', add.ident) From 0dc76fd26aa9f1a08c9273970c49175fa11cf7a3 Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Tue, 3 Oct 2023 22:16:43 -0400 Subject: [PATCH 823/979] add multi-layer tests for AverageExpression, update vignettes --- tests/testthat/test_utilities.R | 27 +++++++++++++++++++ .../seurat5_integration_introduction.Rmd | 6 +++-- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test_utilities.R b/tests/testthat/test_utilities.R index bd4d04ad1..4aeb24998 100644 --- a/tests/testthat/test_utilities.R +++ b/tests/testthat/test_utilities.R @@ -148,3 +148,30 @@ test_that("AverageExpression with multiple assays", { expect_equal(length(x = avg.all), 2) }) + +meta.data.2 <- data.frame( + b = rep(as.factor(c('c', 'd', 'e')), length.out = ncol(pbmc.test)), + row.names = colnames(pbmc.test) +) +object <- AddMetaData(object, meta.data.2) +if(class(object[['RNA']]) == "Assay5") { + test_that("AggregateExpression works with multiple layers", { + object.split <- split(object, f = object$b) + aggregate.split <- AggregateExpression(object.split) + aggregate <- AggregateExpression(object) + expect_equivalent( + aggregate.split, + aggregate, + tolerance = 1e-6 + ) + }) + test_that("AverageExpression works with multiple layers", { + avg.split <- AverageExpression(object.split) + avg <- AverageExpression(object) + expect_equivalent( + avg.split, + avg, + tolerance = 1e-6 + ) + }) +} diff --git a/vignettes/seurat5_integration_introduction.Rmd b/vignettes/seurat5_integration_introduction.Rmd index 371571880..2342c7c6e 100644 --- a/vignettes/seurat5_integration_introduction.Rmd +++ b/vignettes/seurat5_integration_introduction.Rmd @@ -175,12 +175,14 @@ library(cowplot) theme_set(theme_cowplot()) t.cells <- subset(ifnb, idents = "CD4 Naive T") Idents(t.cells) <- "stim" -avg.t.cells <- as.data.frame(log1p(AverageExpression(t.cells, verbose = FALSE)$RNA)) +t.cells.pseudo <- AggregateExpression(t.cells, return.seurat = TRUE, verbose = FALSE) +avg.t.cells <- as.data.frame(t.cells.pseudo[['RNA']]$data) avg.t.cells$gene <- rownames(avg.t.cells) cd14.mono <- subset(ifnb, idents = "CD14 Mono") Idents(cd14.mono) <- "stim" -avg.cd14.mono <- as.data.frame(log1p(AverageExpression(cd14.mono, verbose = FALSE)$RNA)) +cd14.mono.pseudo <- AggregateExpression(cd14.mono, return.seurat = TRUE, verbose = FALSE) +avg.cd14.mono <- as.data.frame(cd14.mono.pseudo[['RNA']]$data) avg.cd14.mono$gene <- rownames(avg.cd14.mono) genes.to.label = c("ISG15", "LY6E", "IFI6", "ISG20", "MX1", "IFIT2", "IFIT1", "CXCL10", "CCL8") From 6cb45cca01c8fa9dff126c6db7f9f4e92ec60f68 Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Tue, 3 Oct 2023 22:22:05 -0400 Subject: [PATCH 824/979] fix tests --- tests/testthat/test_utilities.R | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/tests/testthat/test_utilities.R b/tests/testthat/test_utilities.R index 4aeb24998..c0946c599 100644 --- a/tests/testthat/test_utilities.R +++ b/tests/testthat/test_utilities.R @@ -157,20 +157,18 @@ object <- AddMetaData(object, meta.data.2) if(class(object[['RNA']]) == "Assay5") { test_that("AggregateExpression works with multiple layers", { object.split <- split(object, f = object$b) - aggregate.split <- AggregateExpression(object.split) - aggregate <- AggregateExpression(object) + aggregate.split <- AggregateExpression(object.split, assay = "RNA") + aggregate <- AggregateExpression(object, assay = "RNA") expect_equivalent( - aggregate.split, - aggregate, + aggregate.split$RNA, + aggregate$RNA, tolerance = 1e-6 ) - }) - test_that("AverageExpression works with multiple layers", { - avg.split <- AverageExpression(object.split) - avg <- AverageExpression(object) + avg.split <- AverageExpression(object.split, assay = "RNA") + avg <- AverageExpression(object, assay = "RNA") expect_equivalent( - avg.split, - avg, + avg.split$RNA, + avg$RNA, tolerance = 1e-6 ) }) From 897af44d85fa95d603adf34adc305925032b8f81 Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Tue, 3 Oct 2023 22:42:13 -0400 Subject: [PATCH 825/979] update seurat5 interaction vignette to use AggregateExpression --- vignettes/seurat5_interaction_vignette.Rmd | 26 +++++++++------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/vignettes/seurat5_interaction_vignette.Rmd b/vignettes/seurat5_interaction_vignette.Rmd index e29f3059f..6b15c98bf 100644 --- a/vignettes/seurat5_interaction_vignette.Rmd +++ b/vignettes/seurat5_interaction_vignette.Rmd @@ -112,29 +112,23 @@ subset(pbmc, idents = c('NK', 'B'), invert = TRUE) # Calculating the average gene expression within a cluster ```{r avg.exp, fig.height=8} -# How can I calculate the average expression of all cells within a cluster? -cluster.averages <- AverageExpression(pbmc) -head(cluster.averages[['RNA']][, 1:5]) - +# How can I pseudobulk cells within a cluster? # Return this information as a Seurat object (enables downstream plotting and analysis) -# First, replace spaces with underscores '_' so ggplot2 doesn't fail -orig.levels <- levels(pbmc) -Idents(pbmc) <- gsub(pattern = ' ', replacement = '-', x = Idents(pbmc)) -orig.levels <- gsub(pattern = ' ', replacement = '-', x = orig.levels) -levels(pbmc) <- orig.levels -cluster.averages <- AverageExpression(pbmc, return.seurat = TRUE) -cluster.averages +# The summed counts are stored in the counts layer and normalized value are stored in the data layer +cluster.pseudobulk <- AggregateExpression(pbmc, return.seurat=TRUE) +cluster.pseudobulk +head(cluster.pseudobulk[['RNA']]$data[1:5, ]) # How can I plot the average expression of NK cells vs. CD8 T cells? # Pass do.hover = T for an interactive plot to identify gene outliers -CellScatter(cluster.averages, cell1 = "NK", cell2 = "CD8-T") +CellScatter(cluster.pseudobulk, cell1 = "NK", cell2 = "CD8 T") -# How can I calculate expression averages separately for each replicate? -cluster.averages <- AverageExpression(pbmc, return.seurat = TRUE, add.ident = "replicate") -CellScatter(cluster.averages, cell1 = "CD8-T_rep1", cell2 = "CD8-T_rep2") +# How can I calculate pseudobulked expression values separately for each replicate? +cluster.pseudobulk <- AggregateExpression(pbmc, return.seurat = TRUE, group.by = c("CellType", "replicate")) +CellScatter(cluster.pseudobulk, cell1 = "CD8 T_rep1", cell2 = "CD8 T_rep2") # You can also plot heatmaps of these 'in silico' bulk datasets to visualize agreement between replicates -DoHeatmap(cluster.averages, features = unlist(TopFeatures(pbmc[['pca']], balanced = TRUE)), size = 3, draw.lines = FALSE) +DoHeatmap(cluster.pseudobulk, features = unlist(TopFeatures(pbmc[['pca']], balanced = TRUE)), size = 3, draw.lines = FALSE) ``` ```{r save.times, include=TRUE} From 0163b235b9ddcdee65a5c357f41f88b61e35e9ba Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Tue, 3 Oct 2023 22:48:11 -0400 Subject: [PATCH 826/979] vignette update --- vignettes/seurat5_interaction_vignette.Rmd | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/vignettes/seurat5_interaction_vignette.Rmd b/vignettes/seurat5_interaction_vignette.Rmd index 6b15c98bf..966230343 100644 --- a/vignettes/seurat5_interaction_vignette.Rmd +++ b/vignettes/seurat5_interaction_vignette.Rmd @@ -113,6 +113,9 @@ subset(pbmc, idents = c('NK', 'B'), invert = TRUE) ```{r avg.exp, fig.height=8} # How can I pseudobulk cells within a cluster? +# First, replace spaces with underscores '_' so ggplot2 doesn't fail +pbmc$CellType <- gsub(" ", "_", pbmc$CellType) +Idents(pbmc) <- pbmc$CellType # Return this information as a Seurat object (enables downstream plotting and analysis) # The summed counts are stored in the counts layer and normalized value are stored in the data layer cluster.pseudobulk <- AggregateExpression(pbmc, return.seurat=TRUE) @@ -121,11 +124,11 @@ head(cluster.pseudobulk[['RNA']]$data[1:5, ]) # How can I plot the average expression of NK cells vs. CD8 T cells? # Pass do.hover = T for an interactive plot to identify gene outliers -CellScatter(cluster.pseudobulk, cell1 = "NK", cell2 = "CD8 T") +CellScatter(cluster.pseudobulk, cell1 = "NK", cell2 = "CD8-T") # How can I calculate pseudobulked expression values separately for each replicate? cluster.pseudobulk <- AggregateExpression(pbmc, return.seurat = TRUE, group.by = c("CellType", "replicate")) -CellScatter(cluster.pseudobulk, cell1 = "CD8 T_rep1", cell2 = "CD8 T_rep2") +CellScatter(cluster.pseudobulk, cell1 = "CD8-T_rep1", cell2 = "CD8-T_rep2") # You can also plot heatmaps of these 'in silico' bulk datasets to visualize agreement between replicates DoHeatmap(cluster.pseudobulk, features = unlist(TopFeatures(pbmc[['pca']], balanced = TRUE)), size = 3, draw.lines = FALSE) From df92ce274d6275b027240fd165b57a1d32427c92 Mon Sep 17 00:00:00 2001 From: zskylarli Date: Wed, 4 Oct 2023 10:54:26 -0400 Subject: [PATCH 827/979] fixed IntegrateData bug for v5 assays --- R/integration.R | 102 ++++++++++++++++++++++++------------------------ 1 file changed, 50 insertions(+), 52 deletions(-) diff --git a/R/integration.R b/R/integration.R index 96d26081c..31021599a 100644 --- a/R/integration.R +++ b/R/integration.R @@ -3865,61 +3865,57 @@ FilterAnchors <- function( eps = 0, verbose = TRUE ) { - if (inherits(x = object[[assay[1]]], what = 'Assay5')) { - message("Anchor filtering is currently not supported with v5 assays.") + if (verbose) { + message("Filtering anchors") + } + assay <- assay %||% DefaultAssay(object = object) + features <- features %||% VariableFeatures(object = object) + if (length(x = features) == 0) { + stop("No features provided and no VariableFeatures computed.") + } + features <- unique(x = features) + neighbors <- GetIntegrationData(object = object, integration.name = integration.name, slot = 'neighbors') + nn.cells1 <- neighbors$cells1 + nn.cells2 <- neighbors$cells2 + if (min(length(x = nn.cells1), length(x = nn.cells2)) < k.filter) { + warning("Number of anchor cells is less than k.filter. Retaining all anchors.") + k.filter <- min(length(x = nn.cells1), length(x = nn.cells2)) + anchors <- GetIntegrationData(object = object, integration.name = integration.name, slot = "anchors") } else { - if (verbose) { - message("Filtering anchors") - } - assay <- assay %||% DefaultAssay(object = object) - features <- features %||% VariableFeatures(object = object) - if (length(x = features) == 0) { - stop("No features provided and no VariableFeatures computed.") - } - features <- unique(x = features) - neighbors <- GetIntegrationData(object = object, integration.name = integration.name, slot = 'neighbors') - nn.cells1 <- neighbors$cells1 - nn.cells2 <- neighbors$cells2 - if (min(length(x = nn.cells1), length(x = nn.cells2)) < k.filter) { - warning("Number of anchor cells is less than k.filter. Retaining all anchors.") - k.filter <- min(length(x = nn.cells1), length(x = nn.cells2)) - anchors <- GetIntegrationData(object = object, integration.name = integration.name, slot = "anchors") - } else { - cn.data1 <- L2Norm( - mat = as.matrix(x = t(x = GetAssayData( - object = object[[assay[1]]], - slot = slot)[features, nn.cells1])), - MARGIN = 1) - cn.data2 <- L2Norm( - mat = as.matrix(x = t(x = GetAssayData( - object = object[[assay[2]]], - slot = slot)[features, nn.cells2])), - MARGIN = 1) - nn <- NNHelper( - data = cn.data2[nn.cells2, ], - query = cn.data1[nn.cells1, ], - k = k.filter, - method = nn.method, - n.trees = n.trees, - eps = eps - ) + cn.data1 <- L2Norm( + mat = as.matrix(x = t(x = GetAssayData( + object = object[[assay[1]]], + slot = slot)[features, nn.cells1])), + MARGIN = 1) + cn.data2 <- L2Norm( + mat = as.matrix(x = t(x = GetAssayData( + object = object[[assay[2]]], + slot = slot)[features, nn.cells2])), + MARGIN = 1) + nn <- NNHelper( + data = cn.data2[nn.cells2, ], + query = cn.data1[nn.cells1, ], + k = k.filter, + method = nn.method, + n.trees = n.trees, + eps = eps + ) - anchors <- GetIntegrationData(object = object, integration.name = integration.name, slot = "anchors") - position <- sapply(X = 1:nrow(x = anchors), FUN = function(x) { - which(x = anchors[x, "cell2"] == Indices(object = nn)[anchors[x, "cell1"], ])[1] - }) - anchors <- anchors[!is.na(x = position), ] - if (verbose) { - message("\tRetained ", nrow(x = anchors), " anchors") - } + anchors <- GetIntegrationData(object = object, integration.name = integration.name, slot = "anchors") + position <- sapply(X = 1:nrow(x = anchors), FUN = function(x) { + which(x = anchors[x, "cell2"] == Indices(object = nn)[anchors[x, "cell1"], ])[1] + }) + anchors <- anchors[!is.na(x = position), ] + if (verbose) { + message("\tRetained ", nrow(x = anchors), " anchors") } - object <- SetIntegrationData( - object = object, - integration.name = integration.name, - slot = "anchors", - new.data = anchors - ) } + object <- SetIntegrationData( + object = object, + integration.name = integration.name, + slot = "anchors", + new.data = anchors + ) return(object) } @@ -4227,7 +4223,9 @@ FindIntegrationMatrix <- function( neighbors <- GetIntegrationData(object = object, integration.name = integration.name, slot = 'neighbors') nn.cells1 <- neighbors$cells1 nn.cells2 <- neighbors$cells2 - object <- JoinLayers(object) + if (inherits(x = object[[assay[1]]], what = 'Assay5')) { + object <- JoinLayers(object) + } anchors <- GetIntegrationData( object = object, integration.name = integration.name, From 7d14d18788dfe121c8633305ce9d58d3a3ce7859 Mon Sep 17 00:00:00 2001 From: zskylarli Date: Wed, 4 Oct 2023 10:54:53 -0400 Subject: [PATCH 828/979] removed k.filter=NA default for v5 assays --- R/integration.R | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/R/integration.R b/R/integration.R index 31021599a..954aaa42c 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5836,11 +5836,12 @@ ValidateParams_FindTransferAnchors <- function( if (reduction == "lsiproject") { ModifyParam(param = "k.filter", value = NA) } - if (inherits(x = reference[[reference.assay]], what = 'Assay5') || - inherits(x = query[[query.assay]], what = 'Assay5')) { - # current filter anchors not support for v5 assay - ModifyParam(param = "k.filter", value = NA) - } + # commented out to enable filter anchors for v5 assay + # if (inherits(x = reference[[reference.assay]], what = 'Assay5') || + # inherits(x = query[[query.assay]], what = 'Assay5')) { + # # current filter anchors not support for v5 assay + # ModifyParam(param = "k.filter", value = NA) + # } if (!is.na(x = k.filter) && k.filter > ncol(x = query)) { warning("k.filter is larger than the number of cells present in the query.\n", "Continuing without anchor filtering.", From 91ffc1488d3023b23207d9d5478f18b86fe8a407 Mon Sep 17 00:00:00 2001 From: zskylarli Date: Wed, 4 Oct 2023 11:07:01 -0400 Subject: [PATCH 829/979] updated version number --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0e30f24eb..318d21f43 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9064 -Date: 2023-09-25 +Version: 4.9.9.9069 +Date: 2023-10-04 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From 8b0165cbb81d6e6ccff709bd3ee6e32f316267c5 Mon Sep 17 00:00:00 2001 From: gesmira <59940281+Gesmira@users.noreply.github.com> Date: Wed, 4 Oct 2023 12:00:12 -0400 Subject: [PATCH 830/979] create object.split --- tests/testthat/test_preprocessing.R | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/testthat/test_preprocessing.R b/tests/testthat/test_preprocessing.R index 5e3bb9ebe..0dea4f3a5 100644 --- a/tests/testthat/test_preprocessing.R +++ b/tests/testthat/test_preprocessing.R @@ -105,6 +105,7 @@ if(class(object[['RNA']]) == "Assay5") { fake.groups <- c(rep(1, floor(ncol(pbmc.test)/2)), rep(2, ncol(pbmc.test) - (floor(ncol(pbmc.test)/2))) ) object$groups <- fake.groups + object.split <- object object.split[["RNA"]] <- split(object[["RNA"]], f = object$groups) object.split <- NormalizeData(object = object.split) From 4bd7205ddabfe0297db3fdb50f250ef105a37e34 Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Wed, 4 Oct 2023 12:05:37 -0400 Subject: [PATCH 831/979] bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 3a5f6ea84..807c26f92 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9068 -Date: 2023-10-03 +Version: 4.9.9.9070 +Date: 2023-10-04 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From d3edc82fcd7213e2130eaeca79f5706111b0b89e Mon Sep 17 00:00:00 2001 From: Gesmira Date: Wed, 4 Oct 2023 14:23:34 -0400 Subject: [PATCH 832/979] fix variablefeatures tests for v5 --- tests/testthat/test_preprocessing.R | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/tests/testthat/test_preprocessing.R b/tests/testthat/test_preprocessing.R index 0dea4f3a5..6b697b409 100644 --- a/tests/testthat/test_preprocessing.R +++ b/tests/testthat/test_preprocessing.R @@ -302,29 +302,31 @@ object <- FindVariableFeatures(object = object, selection.method = "mean.var.plo test_that("mean.var.plot selection option returns expected values", { expect_equal(VariableFeatures(object = object)[1:4], c("PTGDR", "SATB1", "ZNF330", "S100B")) expect_equal(length(x = VariableFeatures(object = object)), 20) - expect_equal(HVFInfo(object = object[["RNA"]], method = 'mvp')$mean[1:2], c(8.328927, 8.444462), tolerance = 1e-6) - expect_equal(HVFInfo(object = object[["RNA"]], method = 'mvp')$dispersion[1:2], c(10.552507, 10.088223), tolerance = 1e-6) - expect_equal(as.numeric(HVFInfo(object = object[["RNA"]], method = 'mvp')$dispersion.scaled[1:2]), c(0.1113214, -0.1332181523), tolerance = 1e-6) + hvf_info <- HVFInfo(object = object[["RNA"]], method = 'mvp') + expect_equal(hvf_info[[grep("mean$", colnames(hvf_info), value = TRUE)]][1:2], c(8.328927, 8.444462), tolerance = 1e-6) + expect_equal(hvf_info[[grep("dispersion$", colnames(hvf_info), value = TRUE)]][1:2], c(10.552507, 10.088223), tolerance = 1e-6) + expect_equal(as.numeric(hvf_info[[grep("dispersion.scaled$", colnames(hvf_info), value = TRUE)]][1:2]), c(0.1113214, -0.1332181523), tolerance = 1e-6) }) object <- FindVariableFeatures(object, selection.method = "dispersion", verbose = FALSE) test_that("dispersion selection option returns expected values", { expect_equal(VariableFeatures(object = object)[1:4], c("PCMT1", "PPBP", "LYAR", "VDAC3")) expect_equal(length(x = VariableFeatures(object = object)), 230) - expect_equal(HVFInfo(object = object[["RNA"]], method = 'mvp')$mean[1:2], c(8.328927, 8.444462), tolerance = 1e-6) - expect_equal(HVFInfo(object = object[["RNA"]], method = 'mvp')$dispersion[1:2], c(10.552507, 10.088223), tolerance = 1e-6) - expect_equal(as.numeric(HVFInfo(object = object[["RNA"]], method = 'mvp')$dispersion.scaled[1:2]), c(0.1113214, -0.1332181523), tolerance = 1e-6) - expect_true(!is.unsorted(rev(HVFInfo(object = object[["RNA"]], method = 'mvp')[VariableFeatures(object = object), "dispersion"]))) + hvf_info <- HVFInfo(object = object[["RNA"]], method = 'mvp') + expect_equal(hvf_info[[grep("mean$", colnames(hvf_info), value = TRUE)]][1:2], c(8.328927, 8.444462), tolerance = 1e-6) + expect_equal(hvf_info[[grep("dispersion$", colnames(hvf_info), value = TRUE)]][1:2], c(10.552507, 10.088223), tolerance = 1e-6) + expect_equal(as.numeric(hvf_info[[grep("dispersion.scaled$", colnames(hvf_info), value = TRUE)]][1:2]), c(0.1113214, -0.1332181523), tolerance = 1e-6) + expect_true(!is.unsorted(rev(hvf_info[VariableFeatures(object = object), "dispersion"]))) }) object <- FindVariableFeatures(object, selection.method = "vst", verbose = FALSE) test_that("vst selection option returns expected values", { expect_equal(VariableFeatures(object = object)[1:4], c("PPBP", "IGLL5", "VDAC3", "CD1C")) expect_equal(length(x = VariableFeatures(object = object)), 230) - expect_equal(unname(object[["RNA"]]["vst.variance", drop = TRUE][1:2]), c(1.0251582, 1.2810127), tolerance = 1e-6) - expect_equal(unname(object[["RNA"]]["vst.variance.expected", drop = TRUE][1:2]), c(1.1411616, 2.7076228), tolerance = 1e-6) - expect_equal(unname(object[["RNA"]]["vst.variance.standardized", drop = TRUE][1:2]), c(0.8983463, 0.4731134), tolerance = 1e-6) - expect_true(!is.unsorted(rev(object[["RNA"]]["vst.variance.standardized", drop = TRUE][VariableFeatures(object = object)]))) + hvf_info <- HVFInfo(object = object[["RNA"]], method = 'vst') + expect_equal(hvf_info[[grep("variance$", colnames(hvf_info), value = TRUE)]][1:2], c(1.0251582, 1.2810127), tolerance = 1e-6) + expect_equal(hvf_info[[grep("variance.standardized$", colnames(hvf_info), value = TRUE)]][1:2], c(0.8983463, 0.4731134), tolerance = 1e-6) + expect_true(!is.unsorted(rev(hvf_info[VariableFeatures(object = object), grep("variance.standardized$", colnames(hvf_info))]))) }) #object <- FindVariableFeatures(object, assay = "RNAbp") From 03d9003748350e7d16b37db05c254f08e78d5fab Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Wed, 4 Oct 2023 16:01:56 -0400 Subject: [PATCH 833/979] add BPcells PCAtest --- tests/testthat/test_dimensional_reduction.R | 14 +++++++------- tests/testthat/test_objects.R | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/testthat/test_dimensional_reduction.R b/tests/testthat/test_dimensional_reduction.R index fbad42aa9..5f83ea862 100644 --- a/tests/testthat/test_dimensional_reduction.R +++ b/tests/testthat/test_dimensional_reduction.R @@ -11,6 +11,7 @@ row.names(x = dummyexpMat) <- paste0("gene", seq(nrow(x = dummyexpMat))) # Create Seurat object for testing obj <- CreateSeuratObject(counts = as.sparse(dummyexpMat)) + test_that("different ways of passing distance matrix", { # Manually make a distance object to test distMat <- dist(t(dummyexpMat)) @@ -35,7 +36,7 @@ obj <- ScaleData(object = obj, verbose = FALSE) pca_result <- suppressWarnings(expr = RunPCA( object = obj, - features = rownames(x = obj), + features = rownames(obj[['RNA']]$counts), verbose = FALSE )) @@ -58,11 +59,10 @@ obj <- ScaleData(object = obj, verbose=FALSE) pca_result_bp <- suppressWarnings(expr = RunPCA( object = obj, features = rownames(obj[['RNAbp']]$counts), - assay = "RNAbp", - layer = "counts" -)) - -test_that("pca is equivalent for BPCells") { - RunPCA(obj, assay = "RNAbp") + assay = "RNAbp")) +test_that("pca is equivalent for BPCells", { + expect_equivalent(abs(pca_result_bp[['pca']]@cell.embeddings), + abs(pca_result[['pca']]@cell.embeddings), + tolerance = 1e-5) }) diff --git a/tests/testthat/test_objects.R b/tests/testthat/test_objects.R index fc33ca04a..e0bf005d8 100644 --- a/tests/testthat/test_objects.R +++ b/tests/testthat/test_objects.R @@ -7,7 +7,7 @@ test_that("as.SingleCellExperiment works", { if (requireNamespace('SingleCellExperiment', quietly = TRUE)) { mat <- pbmc_small[["RNA"]]$counts seuratObj <- Seurat::CreateSeuratObject(mat) - sce <- as.SingleCellExperiment(seuratObj) + sce <- suppressWarnings(as.SingleCellExperiment(seuratObj)) expect_equal(ncol(sce), 80) expect_equal(nrow(sce), 230) @@ -16,7 +16,7 @@ test_that("as.SingleCellExperiment works", { seuratObj <- Seurat::CreateSeuratObject(mat) seuratObj[['ADT']] <- CreateAssayObject(mat) - sce <- as.SingleCellExperiment(seuratObj) + sce <- suppressWarnings(as.SingleCellExperiment(seuratObj)) expect_equal(ncol(sce), 80) expect_equal(nrow(sce), 230) # expect_equal(names(SingleCellExperiment::altExps(sce)), 'ADT') From 44dbfd69915f7521da3da01586013f677aa88fb0 Mon Sep 17 00:00:00 2001 From: Izzy Grabski Date: Wed, 4 Oct 2023 16:43:26 -0400 Subject: [PATCH 834/979] Updated differential expression tests, including fixing tolerances --- tests/testthat/test_differential_expression.R | 121 ++++++++++++------ 1 file changed, 80 insertions(+), 41 deletions(-) diff --git a/tests/testthat/test_differential_expression.R b/tests/testthat/test_differential_expression.R index 5b9414c59..b04ca87f2 100644 --- a/tests/testthat/test_differential_expression.R +++ b/tests/testthat/test_differential_expression.R @@ -14,44 +14,83 @@ markers.01 <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ide results.clr <- suppressWarnings(FindMarkers(object = clr.obj, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1), pseudocount.use = 1)) results.sct <- suppressWarnings(FindMarkers(object = sct.obj, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1), pseudocount.use = 1)) +markers.0.limma <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, verbose = FALSE, base = exp(1),pseudocount.use = 1,test.use='wilcox_limma')) +markers.01.limma <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1),pseudocount.use = 1,test.use='wilcox_limma')) +results.clr.limma <- suppressWarnings(FindMarkers(object = clr.obj, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1), pseudocount.use = 1,test.use='wilcox_limma')) +results.sct.limma <- suppressWarnings(FindMarkers(object = sct.obj, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1), pseudocount.use = 1,test.use='wilcox_limma')) + + test_that("Default settings work as expected with pseudocount = 1", { expect_error(FindMarkers(object = pbmc_small)) expect_error(FindMarkers(object = pbmc_small, ident.1 = "test")) expect_error(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = "test")) expect_equal(colnames(x = markers.0), c("p_val", "avg_logFC", "pct.1", "pct.2", "p_val_adj")) - expect_equal(markers.0[1, "p_val"], 9.572778e-13) + expect_equal(markers.0[1, "p_val"], 9.572778e-13, tolerance = 1e-18) expect_equal(markers.0[1, "avg_logFC"], -4.034691, tolerance = 1e-6) expect_equal(markers.0[1, "pct.1"], 0.083) expect_equal(markers.0[1, "pct.2"], 0.909) - expect_equal(markers.0[1, "p_val_adj"], 2.201739e-10) + expect_equal(markers.0[1, "p_val_adj"], 2.201739e-10, tolerance = 1e-15) expect_equal(nrow(x = markers.0), 204) expect_equal(rownames(markers.0)[1], "HLA-DPB1") - - expect_equal(markers.01[1, "p_val"], 1.702818e-11) + + expect_equal(colnames(x = markers.0.limma), c("p_val", "avg_logFC", "pct.1", "pct.2", "p_val_adj")) + expect_equal(markers.0.limma[1, "p_val"], 9.572778e-13, tolerance = 1e-18) + expect_equal(markers.0.limma[1, "avg_logFC"], -4.034691, tolerance = 1e-6) + expect_equal(markers.0.limma[1, "pct.1"], 0.083) + expect_equal(markers.0.limma[1, "pct.2"], 0.909) + expect_equal(markers.0.limma[1, "p_val_adj"], 2.201739e-10, tolerance = 1e-15) + expect_equal(nrow(x = markers.0.limma), 204) + expect_equal(rownames(markers.0.limma)[1], "HLA-DPB1") + + expect_equal(markers.01[1, "p_val"], 1.702818e-11, tolerance = 1e-16) expect_equal(markers.01[1, "avg_logFC"], -2.539289, tolerance = 1e-6) expect_equal(markers.01[1, "pct.1"], 0.111) expect_equal(markers.01[1, "pct.2"], 1.00) - expect_equal(markers.01[1, "p_val_adj"], 3.916481e-09) + expect_equal(markers.01[1, "p_val_adj"], 3.916481e-09, tolerance = 1e-14) expect_equal(nrow(x = markers.01), 201) expect_equal(rownames(x = markers.01)[1], "TYMP") + + expect_equal(markers.01.limma[1, "p_val"], 1.702818e-11, tolerance = 1e-16) + expect_equal(markers.01.limma[1, "avg_logFC"], -2.539289, tolerance = 1e-6) + expect_equal(markers.01.limma[1, "pct.1"], 0.111) + expect_equal(markers.01.limma[1, "pct.2"], 1.00) + expect_equal(markers.01.limma[1, "p_val_adj"], 3.916481e-09, tolerance = 1e-14) + expect_equal(nrow(x = markers.01.limma), 201) + expect_equal(rownames(x = markers.01.limma)[1], "TYMP") # CLR normalization - expect_equal(results.clr[1, "p_val"], 1.209462e-11) + expect_equal(results.clr[1, "p_val"], 1.209462e-11, tolerance = 1e-16) expect_equal(results.clr[1, "avg_logFC"], -0.8290693, tolerance = 1e-6) expect_equal(results.clr[1, "pct.1"], 0.111) expect_equal(results.clr[1, "pct.2"], 0.96) - expect_equal(results.clr[1, "p_val_adj"], 2.781762e-09) + expect_equal(results.clr[1, "p_val_adj"], 2.781762e-09, tolerance = 1e-14) expect_equal(nrow(x = results.clr), 85) expect_equal(rownames(x = results.clr)[1], "S100A8") + + expect_equal(results.clr.limma[1, "p_val"], 1.209462e-11, tolerance = 1e-16) + expect_equal(results.clr.limma[1, "avg_logFC"], -0.8290693, tolerance = 1e-6) + expect_equal(results.clr.limma[1, "pct.1"], 0.111) + expect_equal(results.clr.limma[1, "pct.2"], 0.96) + expect_equal(results.clr.limma[1, "p_val_adj"], 2.781762e-09, tolerance = 1e-14) + expect_equal(nrow(x = results.clr.limma), 85) + expect_equal(rownames(x = results.clr.limma)[1], "S100A8") # SCT normalization - expect_equal(results.sct[1, "p_val"], 4.646968e-11) + expect_equal(results.sct[1, "p_val"], 4.646968e-11, tolerance = 1e-16) expect_equal(results.sct[1, "avg_logFC"], -1.8522457, tolerance = 1e-6) expect_equal(results.sct[1, "pct.1"], 0.333) expect_equal(results.sct[1, "pct.2"], 1.00) - expect_equal(results.sct[1, "p_val_adj"], 1.022333e-08) + expect_equal(results.sct[1, "p_val_adj"], 1.022333e-08, tolerance = 1e-13) expect_equal(nrow(x = results.sct), 156) expect_equal(rownames(x = results.sct)[1], "CST3") + + expect_equal(results.sct.limma[1, "p_val"], 4.646968e-11, tolerance = 1e-16) + expect_equal(results.sct.limma[1, "avg_logFC"], -1.8522457, tolerance = 1e-6) + expect_equal(results.sct.limma[1, "pct.1"], 0.333) + expect_equal(results.sct.limma[1, "pct.2"], 1.00) + expect_equal(results.sct.limma[1, "p_val_adj"], 1.022333e-08, tolerance = 1e-13) + expect_equal(nrow(x = results.sct.limma), 156) + expect_equal(rownames(x = results.sct.limma)[1], "CST3") }) @@ -60,11 +99,11 @@ vargenes.results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = test_that("features parameter behaves correctly ", { expect_equal(nrow(x = tymp.results), 1) - expect_equal(tymp.results[1, "p_val"], 3.227445e-07) + expect_equal(tymp.results[1, "p_val"], 3.227445e-07, tolerance = 1e-12) expect_equal(tymp.results[1, "avg_logFC"], -2.093928, tolerance = 1e-6) expect_equal(tymp.results[1, "pct.1"], 0.111) expect_equal(tymp.results[1, "pct.2"], 0.682) - expect_equal(tymp.results[1, "p_val_adj"], 7.423123e-05) + expect_equal(tymp.results[1, "p_val_adj"], 7.423123e-05, tolerance = 1e-10) expect_equal(rownames(x = tymp.results)[1], "TYMP") expect_equal(nrow(x = vargenes.results), 19) @@ -152,7 +191,7 @@ test_that("only.pos works", { results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, max.cells.per.ident = 20, verbose = FALSE, base = exp(1),pseudocount.use = 1)) test_that("max.cells.per.ident works", { expect_equal(nrow(x = results), 201) - expect_equal(results[1, "p_val"], 3.428568e-08) + expect_equal(results[1, "p_val"], 3.428568e-08, tolerance = 1e-13) expect_equal(results[1, "avg_logFC"], -2.539289, tolerance = 1e-6) expect_equal(results[1, "pct.1"], 0.111) expect_equal(results[1, "pct.2"], 1) @@ -165,11 +204,11 @@ test_that("latent.vars works", { expect_error(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, latent.vars= "fake", verbose = FALSE)) expect_warning(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, latent.vars= "groups", verbose = FALSE)) expect_equal(nrow(x = results), 201) - expect_equal(results[1, "p_val"], 2.130202e-16) + expect_equal(results[1, "p_val"], 2.130202e-16, tolerance = 1e-21) expect_equal(results[1, "avg_logFC"], -3.082150, tolerance = 1e-6) expect_equal(results[1, "pct.1"], 0.417) expect_equal(results[1, "pct.2"], 1) - expect_equal(results[1, "p_val_adj"], 4.899466e-14) + expect_equal(results[1, "p_val_adj"], 4.899466e-14, tolerance = 1e-19) expect_equal(rownames(x = results)[1], "LYZ") }) @@ -207,20 +246,20 @@ test_that("subset.ident works", { results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, reduction = "pca", verbose = FALSE, base = exp(1), pseudocount.use = 1)) test_that("reduction works", { - expect_equal(results[1, "p_val"], 1.664954e-10) + expect_equal(results[1, "p_val"], 1.664954e-10, tolerance = 1e-15) expect_equal(results[1, "avg_diff"], -2.810453669, tolerance = 1e-6) - expect_equal(results[1, "p_val_adj"], 3.163412e-09) + expect_equal(results[1, "p_val_adj"], 3.163412e-09, tolerance = 1e-14) expect_equal(rownames(x = results)[1], "PC_2") }) results <- FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, test.use = "bimod", verbose = FALSE, base = exp(1), pseudocount.use = 1) test_that("bimod test works", { expect_equal(nrow(x = results), 201) - expect_equal(results[1, "p_val"], 4.751376e-17) + expect_equal(results[1, "p_val"], 4.751376e-17, tolerance = 1e-22) expect_equal(results[1, "avg_logFC"], -2.552769, tolerance = 1e-6) expect_equal(results[1, "pct.1"], 0.306) expect_equal(results[1, "pct.2"], 1.00) - expect_equal(results[1, "p_val_adj"], 1.092816e-14) + expect_equal(results[1, "p_val_adj"], 1.092816e-14, tolerance = 1e-19) expect_equal(rownames(x = results)[1], "CST3") }) @@ -238,46 +277,46 @@ test_that("roc test works", { }) results <- FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, test.use = "t", verbose = FALSE, base = exp(1), pseudocount.use = 1) -test_that("bimod test works", { +test_that("t test works", { expect_equal(nrow(x = results), 201) - expect_equal(results["CST3", "p_val"], 1.170112e-15) + expect_equal(results["CST3", "p_val"], 1.170112e-15, tolerance = 1e-20) expect_equal(results["CST3", "avg_logFC"], -2.552769 , tolerance = 1e-6) expect_equal(results["CST3", "pct.1"], 0.306) expect_equal(results["CST3", "pct.2"], 1.00) - expect_equal(results["CST3", "p_val_adj"], 2.691258e-13) + expect_equal(results["CST3", "p_val_adj"], 2.691258e-13, tolerance = 1e-18) expect_equal(rownames(x = results)[1], "TYMP") }) results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, test.use = "negbinom", verbose = FALSE, base = exp(1), pseudocount.use = 1)) test_that("negbinom test works", { expect_equal(nrow(x = results), 149) - expect_equal(results["CST3", "p_val"], 1.354443e-17) + expect_equal(results["CST3", "p_val"], 1.354443e-17, tolerance = 1e-22) expect_equal(results["CST3", "avg_logFC"], -2.353701, tolerance = 1e-6) expect_equal(results["CST3", "pct.1"], 0.306) expect_equal(results["CST3", "pct.2"], 1.00) - expect_equal(results["CST3", "p_val_adj"], 3.115218e-15) + expect_equal(results["CST3", "p_val_adj"], 3.115218e-15, tolerance = 1e-20) expect_equal(rownames(x = results)[1], "LYZ") }) results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, test.use = "poisson", verbose = FALSE, base = exp(1), pseudocount.use = 1)) test_that("poisson test works", { expect_equal(nrow(x = results), 149) - expect_equal(results["CST3", "p_val"], 3.792196e-78) + expect_equal(results["CST3", "p_val"], 3.792196e-78, tolerance = 1e-83) expect_equal(results["CST3", "avg_logFC"], -2.353701, tolerance = 1e-6) expect_equal(results["CST3", "pct.1"], 0.306) expect_equal(results["CST3", "pct.2"], 1.00) - expect_equal(results["CST3", "p_val_adj"], 8.722050e-76) + expect_equal(results["CST3", "p_val_adj"], 8.722050e-76, tolerance = 1e-81) expect_equal(rownames(x = results)[1], "LYZ") }) results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, test.use = "LR", verbose = FALSE, base = exp(1), pseudocount.use = 1)) test_that("LR test works", { expect_equal(nrow(x = results), 201) - expect_equal(results["CST3", "p_val"], 3.990707e-16) + expect_equal(results["CST3", "p_val"], 3.990707e-16, tolerance = 1e-21) expect_equal(results["CST3", "avg_logFC"], -2.552769, tolerance = 1e-6) expect_equal(results["CST3", "pct.1"], 0.306) expect_equal(results["CST3", "pct.2"], 1.00) - expect_equal(results["CST3", "p_val_adj"], 9.178625e-14) + expect_equal(results["CST3", "p_val_adj"], 9.178625e-14, tolerance = 1e-19) expect_equal(rownames(x = results)[1], "LYZ") }) @@ -290,38 +329,38 @@ results.pseudo <- suppressMessages(suppressWarnings(FindAllMarkers(object = pbmc test_that("FindAllMarkers works as expected", { expect_equal(colnames(x = results), c("p_val", "avg_log2FC", "pct.1", "pct.2", "p_val_adj", "cluster", "gene")) - expect_equal(results[1, "p_val"], 9.572778e-13) + expect_equal(results[1, "p_val"], 9.572778e-13, tolerance = 1e-18) expect_equal(results[1, "avg_log2FC"], -5.820829, tolerance = 1e-6) expect_equal(results[1, "pct.1"], 0.083) expect_equal(results[1, "pct.2"], 0.909) - expect_equal(results[1, "p_val_adj"], 2.201739e-10) + expect_equal(results[1, "p_val_adj"], 2.201739e-10, tolerance = 1e-15) expect_equal(nrow(x = results), 222) expect_equal(rownames(results)[1], "HLA-DPB1") # CLR normalization - expect_equal(results.clr[1, "p_val"], 1.209462e-11) + expect_equal(results.clr[1, "p_val"], 1.338858e-12, tolerance = 1e-17) expect_equal(results.clr[1, "avg_log2FC"], -1.079924, tolerance = 1e-6) expect_equal(results.clr[1, "pct.1"], 0.083) expect_equal(results.clr[1, "pct.2"], 0.909) - expect_equal(results.clr[1, "p_val_adj"], 3.079373e-10) + expect_equal(results.clr[1, "p_val_adj"], 3.079373e-10, tolerance = 1e-15) expect_equal(nrow(x = results.clr), 200) expect_equal(rownames(x = results.clr)[1], "HLA-DPB1") # SCT normalization - expect_equal(results.sct[1, "p_val"], 4.25861e-12) + expect_equal(results.sct[1, "p_val"], 1.366621e-12, tolerance = 1e-17) expect_equal(results.sct[1, "avg_log2FC"], -2.848796, tolerance = 1e-6) expect_equal(results.sct[1, "pct.1"], 0.111) expect_equal(results.sct[1, "pct.2"], 0.909) - expect_equal(results.sct[1, "p_val_adj"], 3.005572e-10) + expect_equal(results.sct[1, "p_val_adj"], 3.006566e-10, tolerance = 1e-15) expect_equal(nrow(x = results.sct), 202) expect_equal(rownames(x = results.sct)[1], "HLA-DPB1") # pseudocount.use = 0.1 - expect_equal(results.pseudo[1, "p_val"], 9.572778e-13) + expect_equal(results.pseudo[1, "p_val"], 9.572778e-13, tolerance = 1e-18) expect_equal(results.pseudo[1, "avg_log2FC"], -6.013818, tolerance = 1e-6) expect_equal(results.pseudo[1, "pct.1"], 0.083) expect_equal(results.pseudo[1, "pct.2"], 0.909) - expect_equal(results.pseudo[1, "p_val_adj"], 2.201739e-10) + expect_equal(results.pseudo[1, "p_val_adj"], 2.201739e-10, tolerance = 1e-15) expect_equal(nrow(x = results.pseudo), 222) expect_equal(rownames(results.pseudo)[1], "HLA-DPB1") }) @@ -353,8 +392,8 @@ object <- suppressMessages(RunPCA(object, verbose = FALSE)) object <- suppressMessages(FindNeighbors(object = object, verbose = FALSE)) object <- suppressMessages(FindClusters(object, verbose = FALSE)) markers <- FindMarkers(object = object, ident.1="0", ident.2="1",pseudocount.use = 1, verbose=FALSE) -test_that("FindMarkers recognizes log normalizatio", { - expect_equal(markers[1, "p_val"], 1.598053e-14) +test_that("FindMarkers recognizes log normalization", { + expect_equal(markers[1, "p_val"], 1.598053e-14, tolerance = 1e-19) expect_equal(markers[1, "avg_log2FC"], -2.614686, tolerance = 1e-6) }) @@ -378,13 +417,13 @@ if (requireNamespace('metap', quietly = TRUE)) { # expect_equal(markers[1, "g2_pct.1"], 0.062) expect_equal(markers[1, "g2_pct.2"], 0.75) expect_equal(markers[1, "g2_p_val_adj"], 0.0114622238) - expect_equal(markers[1, "g1_p_val"], 3.946643e-08) + expect_equal(markers[1, "g1_p_val"], 3.946643e-08, tolerance = 1e-13) expect_equal(markers[1, "g1_avg_logFC"], -3.589384, tolerance = 1e-6) expect_equal(markers[1, "g1_pct.1"], 0.10) expect_equal(markers[1, "g1_pct.2"], 0.958) expect_equal(markers[1, "g1_p_val_adj"], 9.077279e-06) expect_equal(markers[1, "max_pval"], 4.983576e-05) - expect_equal(markers[1, "minimump_p_val"], 7.893286e-08) + expect_equal(markers[1, "minimump_p_val"], 7.893286e-08, tolerance = 1e-13) expect_equal(nrow(markers), 179) expect_equal(rownames(markers)[1], "HLA-DRB1") expect_equal(markers[, "max_pval"], unname(obj = apply(X = markers, MARGIN = 1, FUN = function(x) max(x[c("g1_p_val", "g2_p_val")])))) @@ -405,11 +444,11 @@ if (requireNamespace('metap', quietly = TRUE)) { test_that("FindConservedMarkers handles missing idents in certain groups", { expect_warning(FindConservedMarkers(object = pbmc.test, ident.1 = 0, grouping.var = "groups", test.use = "t")) expect_equal(colnames(x = markers.missing), paste0("g2_", standard.names)) - expect_equal(markers.missing[1, "g2_p_val"], 1.672911e-13) + expect_equal(markers.missing[1, "g2_p_val"], 1.672911e-13, tolerance = 1e-18) expect_equal(markers.missing[1, "g2_avg_logFC"], -4.527888, tolerance = 1e-6) # expect_equal(markers.missing[1, "g2_pct.1"], 0.062) expect_equal(markers.missing[1, "g2_pct.2"], 0.95) - expect_equal(markers.missing[1, "g2_p_val_adj"], 3.847695e-11) + expect_equal(markers.missing[1, "g2_p_val_adj"], 3.847695e-11, tolerance = 1e-16) expect_equal(nrow(markers.missing), 205) expect_equal(rownames(markers.missing)[1], "HLA-DPB1") }) From 1a011e4848a5f841663c30c43bbe8c6051ae36c6 Mon Sep 17 00:00:00 2001 From: Izzy Grabski Date: Wed, 4 Oct 2023 16:46:51 -0400 Subject: [PATCH 835/979] Fixed adjusted p-value calculation for dimension reduction DE testing --- R/differential_expression.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/differential_expression.R b/R/differential_expression.R index 60e71617a..c4432a4f3 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -920,7 +920,7 @@ FindMarkers.DimReduc <- function( de.results$p_val_adj = p.adjust( p = de.results$p_val, method = "bonferroni", - n = nrow(x = object) + n = ncol(x = object) ) } return(de.results) From d63cb80b0a3fdfa1e0112958170954de59ba681a Mon Sep 17 00:00:00 2001 From: Izzy Grabski Date: Wed, 4 Oct 2023 16:53:48 -0400 Subject: [PATCH 836/979] updated version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0e30f24eb..c71539cb5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9064 -Date: 2023-09-25 +Version: 4.9.9.9071 +Date: 2023-10-04 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From 2ee582cf17e4451ac5f8e0cffaac41cc833edcbb Mon Sep 17 00:00:00 2001 From: Gesmira Date: Thu, 5 Oct 2023 10:32:29 -0400 Subject: [PATCH 837/979] man updates --- NAMESPACE | 2 -- man/AverageExpression.Rd | 4 ++-- man/BuildClusterTree.Rd | 3 +-- man/HVFInfo.SCTAssay.Rd | 2 ++ man/VlnPlot.Rd | 2 +- 5 files changed, 6 insertions(+), 7 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index d61886996..e9b22c7ba 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -524,7 +524,6 @@ importFrom(SeuratObject,"Tool<-") importFrom(SeuratObject,"VariableFeatures<-") importFrom(SeuratObject,.CalcN) importFrom(SeuratObject,.FilterObjects) -importFrom(SeuratObject,.IsFutureSeurat) importFrom(SeuratObject,.MARGIN) importFrom(SeuratObject,.PropagateList) importFrom(SeuratObject,.SparseSlots) @@ -537,7 +536,6 @@ importFrom(SeuratObject,Cells) importFrom(SeuratObject,CellsByIdentities) importFrom(SeuratObject,Command) importFrom(SeuratObject,CreateAssayObject) -importFrom(SeuratObject,CreateAssay5Object) importFrom(SeuratObject,CreateCentroids) importFrom(SeuratObject,CreateDimReducObject) importFrom(SeuratObject,CreateFOV) diff --git a/man/AverageExpression.Rd b/man/AverageExpression.Rd index 94e11524a..a0ba1410e 100644 --- a/man/AverageExpression.Rd +++ b/man/AverageExpression.Rd @@ -40,14 +40,14 @@ the order of 'assays' (if specified) or object's assays} \item{slot}{(Deprecated). Slots(s) to use} +\item{method}{Method of collapsing expression values. Either 'average' or 'aggregate'} + \item{normalization.method}{Method for normalization, see \code{\link{NormalizeData}}} \item{scale.factor}{Scale factor for normalization, see \code{\link{NormalizeData}}} \item{margin}{Margin to perform CLR normalization, see \code{\link{NormalizeData}}} -\item{method}{Method of collapsing expression values. Either 'average' or 'aggregate'} - \item{verbose}{Print messages and show progress bar} \item{...}{Arguments to be passed to methods such as \code{\link{CreateSeuratObject}}} diff --git a/man/BuildClusterTree.Rd b/man/BuildClusterTree.Rd index 01d26dd59..38e69335d 100644 --- a/man/BuildClusterTree.Rd +++ b/man/BuildClusterTree.Rd @@ -34,8 +34,7 @@ is not NULL.} \item{graph}{If graph is passed, build tree based on graph connectivity between clusters; overrides \code{dims} and \code{features}} -\item{slot}{Slot(s) to use; if multiple slots are given, assumed to follow -the order of 'assays' (if specified) or object's assays} +\item{slot}{(Deprecated). Slots(s) to use} \item{reorder}{Re-order identity classes (factor ordering), according to position on the tree. This groups similar classes together which can be diff --git a/man/HVFInfo.SCTAssay.Rd b/man/HVFInfo.SCTAssay.Rd index ddf24e2d5..4c2f20a16 100644 --- a/man/HVFInfo.SCTAssay.Rd +++ b/man/HVFInfo.SCTAssay.Rd @@ -9,6 +9,8 @@ \arguments{ \item{object}{An object} +\item{method}{method to determine variable features} + \item{status}{Add variable status to the resulting data frame} \item{...}{Arguments passed to other methods} diff --git a/man/VlnPlot.Rd b/man/VlnPlot.Rd index 6883f12f1..00be2eb10 100644 --- a/man/VlnPlot.Rd +++ b/man/VlnPlot.Rd @@ -21,7 +21,7 @@ VlnPlot( log = FALSE, ncol = NULL, slot = deprecated(), - layer = "data", + layer = NULL, split.plot = FALSE, stack = FALSE, combine = TRUE, From 956fd5aa37a661becdb5ffc4b69421e59a6085a7 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Fri, 6 Oct 2023 08:22:41 -0400 Subject: [PATCH 838/979] Add additional repositories --- DESCRIPTION | 1 + man/reexports.Rd | 4 ++-- src/RcppExports.cpp | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 807c26f92..60010419a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,6 +27,7 @@ Authors@R: c( ) URL: https://satijalab.org/seurat, https://github.com/satijalab/seurat BugReports: https://github.com/satijalab/seurat/issues +Additional_repositories: https://satijalab.r-universe.dev Remotes: bnprks/BPCells, mojaveazure/seurat-object@seurat5 diff --git a/man/reexports.Rd b/man/reexports.Rd index fa7258d42..08ced67a3 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -71,8 +71,8 @@ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ - \item{generics}{\code{\link[generics]{components}}} + \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}} - \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}} + \item{generics}{\code{\link[generics]{components}}} }} diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 7a3302c6b..540e5c2d8 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -402,7 +402,7 @@ BEGIN_RCPP END_RCPP } -RcppExport SEXP isnull(SEXP); +RcppExport SEXP isnull(void *); static const R_CallMethodDef CallEntries[] = { {"_Seurat_RunModularityClusteringCpp", (DL_FUNC) &_Seurat_RunModularityClusteringCpp, 9}, From 894d7d13ce4b0ad515b3dfb4fee9acfd617dd2d1 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Fri, 6 Oct 2023 09:56:53 -0400 Subject: [PATCH 839/979] Fix CRAN notes --- DESCRIPTION | 5 ++--- R/preprocessing5.R | 11 +++++------ src/RcppExports.cpp | 2 +- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 60010419a..41ed69693 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -29,7 +29,6 @@ URL: https://satijalab.org/seurat, https://github.com/satijalab/seurat BugReports: https://github.com/satijalab/seurat/issues Additional_repositories: https://satijalab.r-universe.dev Remotes: - bnprks/BPCells, mojaveazure/seurat-object@seurat5 Depends: R (>= 4.0.0), @@ -89,8 +88,7 @@ Imports: tibble, tools, utils, - uwot (>= 0.1.10), - BPCells (>= 0.0.0.9000) + uwot (>= 0.1.10) LinkingTo: Rcpp (>= 0.11.0), RcppEigen, RcppProgress License: MIT + file LICENSE LazyData: true @@ -119,6 +117,7 @@ RoxygenNote: 7.2.3 Encoding: UTF-8 Suggests: ape, + BPCells, rsvd, testthat, hdf5r, diff --git a/R/preprocessing5.R b/R/preprocessing5.R index aedce8c3f..478f4e3e5 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1249,6 +1249,7 @@ SCTransform.StdAssay <- function( seed.use = seed.use, verbose = verbose) min_var <- vst.out$arguments$min_variance + residual.type <- vst.out[['residual_type']] %||% 'pearson' assay.out <- CreateSCTAssay(vst.out = vst.out, do.correct.umi = do.correct.umi, residual.type = residual.type, clip.range = clip.range) @@ -1311,7 +1312,7 @@ SCTransform.StdAssay <- function( } else { new_residual <- get_residuals( vst_out = vst_out, - umi = counts.vp[all.features,], + umi = counts.vp[all_features,], residual_type = "pearson", min_variance = min_var, res_clip_range = res_clip_range, @@ -1615,6 +1616,7 @@ FetchResidualSCTModel <- function( assay = "SCT", umi.assay = "RNA", layer = "counts", + chunk_size = 2000, layer.cells = NULL, SCTModel = NULL, reference.SCT.model = NULL, @@ -1857,7 +1859,7 @@ GetResidualsChunked <- function(vst_out, layer.counts, residual_type, min_varian cells.grid <- split(x = cells.vector, f = ceiling(x = seq_along(along.with = cells.vector)/chunk_size)) for (i in seq_len(length.out = length(x = cells.grid))) { vp <- cells.grid[[i]] - counts.vp <- as.sparse(x = layer.data[, vp]) + counts.vp <- as.sparse(x = layer.counts[, vp]) vst.out <- vst_out vst.out$cell_attr <- vst.out$cell_attr[colnames(x = counts.vp),,drop=FALSE] residuals.list[[i]] <- get_residuals( @@ -1874,9 +1876,6 @@ GetResidualsChunked <- function(vst_out, layer.counts, residual_type, min_varian stop("Data type not supported") } return (residuals) - - - } #' temporal function to get residuals from reference @@ -1884,7 +1883,7 @@ GetResidualsChunked <- function(vst_out, layer.counts, residual_type, min_varian #' @param reference.SCT.model a reference SCT model that should be used #' for calculating the residuals #' @param features Names of features to compute -#' @param nCount_UMI UMI counts. If not specified, defaults to +#' @param nCount_UMI UMI counts. If not specified, defaults to #' column sums of object #' @param verbose Whether to print messages and progress bars #' @importFrom sctransform get_residuals diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 540e5c2d8..7a3302c6b 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -402,7 +402,7 @@ BEGIN_RCPP END_RCPP } -RcppExport SEXP isnull(void *); +RcppExport SEXP isnull(SEXP); static const R_CallMethodDef CallEntries[] = { {"_Seurat_RunModularityClusteringCpp", (DL_FUNC) &_Seurat_RunModularityClusteringCpp, 9}, From 4050e97a4960eebce213f6d8b1881fa47e9f51ca Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Fri, 6 Oct 2023 13:30:17 -0400 Subject: [PATCH 840/979] update internal function name/description --- R/integration5.R | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/R/integration5.R b/R/integration5.R index 1b34047a7..fce34d3f1 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -106,7 +106,7 @@ HarmonyIntegration <- function( # verbose = verbose # ) #create grouping variables - groups <- CreateGroupVariable(object, layers = layers, scale.layer = scale.layer) + groups <- CreateIntegrationGroups(object, layers = layers, scale.layer = scale.layer) # Run Harmony harmony.embed <- harmony::HarmonyMatrix( data_mat = Embeddings(object = orig), @@ -205,7 +205,7 @@ CCAIntegration <- function( layers <- layers %||% Layers(object, search = 'data') if (normalization.method == 'SCT') { #create grouping variables - groups <- CreateGroupVariable(object, layers = layers, scale.layer = scale.layer) + groups <- CreateIntegrationGroups(object, layers = layers, scale.layer = scale.layer) object.sct <- CreateSeuratObject(counts = object, assay = 'SCT') object.sct$split <- groups[,1] object.list <- SplitObject(object = object.sct,split.by = 'split') @@ -342,7 +342,7 @@ RPCAIntegration <- function( layers <- layers %||% Layers(object = object, search = 'data') if (normalization.method == 'SCT') { #create grouping variables - groups <- CreateGroupVariable(object, layers = layers, scale.layer = scale.layer) + groups <- CreateIntegrationGroups(object, layers = layers, scale.layer = scale.layer) object.sct <- CreateSeuratObject(counts = object, assay = 'SCT') object.sct$split <- groups[,1] object.list <- SplitObject(object = object.sct, split.by = 'split') @@ -438,7 +438,7 @@ JointPCAIntegration <- function( if (normalization.method == 'SCT') { #create grouping variables - groups <- CreateGroupVariable(object, layers = layers, scale.layer = scale.layer) + groups <- CreateIntegrationGroups(object, layers = layers, scale.layer = scale.layer) object.sct <- CreateSeuratObject(counts = object, assay = 'SCT') object.sct <- DietSeurat(object = object.sct, features = features.diet) object.sct[['joint.pca']] <- CreateDimReducObject( @@ -612,7 +612,9 @@ IntegrateLayers <- function( #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Internal #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -CreateGroupVariable <- function(object, layers, scale.layer) { +# Creates data.frame with cell group assignments for integration +# uses SCT models if SCTAssay and layers otherwise +CreateIntegrationGroups <- function(object, layers, scale.layer) { groups <- if (inherits(x = object, what = 'SCTAssay')) { df <- SeuratObject::EmptyDF(n = ncol(x = object)) row.names(x = df) <- colnames(x = object) From ec5378a8aa6cf173f3572b04917d2bd60d54232b Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Sun, 8 Oct 2023 22:55:20 +0000 Subject: [PATCH 841/979] add back test --- tests/testthat/test_integration.R | 60 +++++++++++++++---------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/tests/testthat/test_integration.R b/tests/testthat/test_integration.R index 5f92e8cf0..942d3aeab 100644 --- a/tests/testthat/test_integration.R +++ b/tests/testthat/test_integration.R @@ -331,36 +331,36 @@ test_that("FindTransferAnchors with default SCT works", { expect_equal(anchors@neighbors, list()) }) -# test_that("FindTransferAnchors with SCT and project.query work", { -# skip_on_cran() -# anchors <- FindTransferAnchors(reference = ref, query = query, normalization.method = "SCT", project.query = TRUE, k.filter = 50, recompute.residuals = FALSE) -# co <- anchors@object.list[[1]] -# expect_equal(dim(co), c(220, 160)) -# expect_equal(Reductions(co), c("pcaproject", "pcaproject.l2")) -# expect_equal(DefaultAssay(co), "SCT") -# expect_equal(GetAssayData(co[["SCT"]])[1, 1], 0) -# expect_equal(GetAssayData(co[["SCT"]], slot = "scale.data"), new("matrix")) -# expect_equal(dim(co[['pcaproject']]), c(160, 30)) -# expect_equal(Embeddings(co[['pcaproject']])[1, 1], 0.3308694488, tolerance = 1e-7) -# expect_equal(Loadings(co[['pcaproject']], projected = T)[1, 1], 0.05788217444, tolerance = 1e-7) -# expect_equal(dim(co[['pcaproject.l2']]), c(160, 30)) -# expect_equal(Embeddings(co[['pcaproject.l2']])[1, 1], 0.03807493471, tolerance = 1e-7) -# expect_equal(Loadings(co[['pcaproject.l2']], projected = T)[1, 1], 0.05788217444, tolerance = 1e-7) -# ref.cells <- paste0(Cells(ref), "_reference") -# query.cells <- paste0(Cells(query), "_query") -# expect_equal(anchors@reference.cells, ref.cells) -# expect_equal(anchors@query.cells, query.cells) -# expect_equal(anchors@reference.objects, logical()) -# anchor.mat <- anchors@anchors -# expect_equal(dim(anchor.mat), c(288, 3)) -# expect_equal(as.vector(anchor.mat[1, ]), c(1, 1, 0.6138996139), tolerance = 1e-7) -# expect_equal(max(anchor.mat[, 2]), 80) -# expect_null(anchors@offsets) -# expect_equal(length(anchors@anchor.features), 220) -# expect_equal(anchors@anchor.features[1], "PPBP") -# expect_equal(anchors@neighbors, list()) -# }) -# +test_that("FindTransferAnchors with SCT and project.query work", { + skip_on_cran() + anchors <- FindTransferAnchors(reference = ref, query = query, normalization.method = "SCT", project.query = TRUE, k.filter = 50, recompute.residuals = FALSE) + co <- anchors@object.list[[1]] + expect_equal(dim(co), c(220, 160)) + expect_equal(Reductions(co), c("pcaproject", "pcaproject.l2")) + expect_equal(DefaultAssay(co), "SCT") + expect_equal(GetAssayData(co[["SCT"]])[1, 1], 0) + expect_equal(GetAssayData(co[["SCT"]], slot = "scale.data"), new("matrix")) + expect_equal(dim(co[['pcaproject']]), c(160, 30)) + expect_equal(Embeddings(co[['pcaproject']])[1, 1], 0.3308694488, tolerance = 1e-7) + expect_equal(Loadings(co[['pcaproject']], projected = T)[1, 1], 0.05788217444, tolerance = 1e-7) + expect_equal(dim(co[['pcaproject.l2']]), c(160, 30)) + expect_equal(Embeddings(co[['pcaproject.l2']])[1, 1], 0.03807493471, tolerance = 1e-7) + expect_equal(Loadings(co[['pcaproject.l2']], projected = T)[1, 1], 0.05788217444, tolerance = 1e-7) + ref.cells <- paste0(Cells(ref), "_reference") + query.cells <- paste0(Cells(query), "_query") + expect_equal(anchors@reference.cells, ref.cells) + expect_equal(anchors@query.cells, query.cells) + expect_equal(anchors@reference.objects, logical()) + anchor.mat <- anchors@anchors + expect_equal(dim(anchor.mat), c(288, 3)) + expect_equal(as.vector(anchor.mat[1, ]), c(1, 1, 0.6138996139), tolerance = 1e-7) + expect_equal(max(anchor.mat[, 2]), 80) + expect_null(anchors@offsets) + expect_equal(length(anchors@anchor.features), 220) + expect_equal(anchors@anchor.features[1], "PPBP") + expect_equal(anchors@neighbors, list()) +}) + test_that("FindTransferAnchors with SCT and l2.norm FALSE work", { skip_on_cran() From 662389aca844c2c63f53c14d6e542e400db63501 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Sun, 8 Oct 2023 22:56:49 +0000 Subject: [PATCH 842/979] fix test --- tests/testthat/test_integration.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test_integration.R b/tests/testthat/test_integration.R index 942d3aeab..ccffc61bc 100644 --- a/tests/testthat/test_integration.R +++ b/tests/testthat/test_integration.R @@ -341,10 +341,10 @@ test_that("FindTransferAnchors with SCT and project.query work", { expect_equal(GetAssayData(co[["SCT"]])[1, 1], 0) expect_equal(GetAssayData(co[["SCT"]], slot = "scale.data"), new("matrix")) expect_equal(dim(co[['pcaproject']]), c(160, 30)) - expect_equal(Embeddings(co[['pcaproject']])[1, 1], 0.3308694488, tolerance = 1e-7) + expect_equal(Embeddings(co[['pcaproject']])[1, 1], 0.3049308, tolerance = 1e-7) expect_equal(Loadings(co[['pcaproject']], projected = T)[1, 1], 0.05788217444, tolerance = 1e-7) expect_equal(dim(co[['pcaproject.l2']]), c(160, 30)) - expect_equal(Embeddings(co[['pcaproject.l2']])[1, 1], 0.03807493471, tolerance = 1e-7) + expect_equal(Embeddings(co[['pcaproject.l2']])[1, 1], 0.04334884, tolerance = 1e-7) expect_equal(Loadings(co[['pcaproject.l2']], projected = T)[1, 1], 0.05788217444, tolerance = 1e-7) ref.cells <- paste0(Cells(ref), "_reference") query.cells <- paste0(Cells(query), "_query") @@ -352,8 +352,8 @@ test_that("FindTransferAnchors with SCT and project.query work", { expect_equal(anchors@query.cells, query.cells) expect_equal(anchors@reference.objects, logical()) anchor.mat <- anchors@anchors - expect_equal(dim(anchor.mat), c(288, 3)) - expect_equal(as.vector(anchor.mat[1, ]), c(1, 1, 0.6138996139), tolerance = 1e-7) + expect_equal(dim(anchor.mat), c(290, 3)) + expect_equal(as.vector(anchor.mat[1, ]), c(1, 1, 0.6315789), tolerance = 1e-7) expect_equal(max(anchor.mat[, 2]), 80) expect_null(anchors@offsets) expect_equal(length(anchors@anchor.features), 220) From 72c5afc5cd4c088c5cb1151ea58a5876a2e3059b Mon Sep 17 00:00:00 2001 From: Gesmira Date: Mon, 9 Oct 2023 16:17:56 -0400 Subject: [PATCH 843/979] Fix PrepDR5 so it acts the same as PrepDR --- R/dimensional_reduction.R | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R index bab2f49cd..b63bb41cf 100644 --- a/R/dimensional_reduction.R +++ b/R/dimensional_reduction.R @@ -2419,11 +2419,11 @@ PrepDR <- function( PrepDR5 <- function(object, features = NULL, layer = 'scale.data', verbose = TRUE) { layer <- layer[1L] layer <- match.arg(arg = layer, choices = Layers(object = object)) + data.use <- LayerData(object = object, layer = layer) features <- features %||% VariableFeatures(object = object) if (!length(x = features)) { stop("No variable features, run FindVariableFeatures() or provide a vector of features", call. = FALSE) } - data.use <- LayerData(object = object, layer = layer, features = features) features.var <- apply(X = data.use, MARGIN = 1L, FUN = var) features.keep <- features[features.var > 0] if (!length(x = features.keep)) { @@ -2441,9 +2441,10 @@ PrepDR5 <- function(object, features = NULL, layer = 'scale.data', verbose = TRU ) } } - # features <- features.keep - # features <- features[!is.na(x = features)] - return(LayerData(object = object, layer = layer, features = features.keep)) + features <- features.keep + features <- features[!is.na(x = features)] + data.use <- data.use[features, ] + return(data.use) } #' @param assay Name of Assay SPCA is being run on From 63dedf5c4bf235dbc0de338d55374988947e522a Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 10 Oct 2023 13:24:49 +0000 Subject: [PATCH 844/979] add feature mean --- R/integration.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/integration.R b/R/integration.R index 97f093749..f8c60b909 100644 --- a/R/integration.R +++ b/R/integration.R @@ -913,6 +913,7 @@ FindTransferAnchors <- function( query = reference, scale = scale, dims = dims, + feature.mean = feature.mean, verbose = verbose, normalization.method = normalization.method ) From 8550172238c9cdc4863abe5a6d251c564d3afdcf Mon Sep 17 00:00:00 2001 From: yuhanH Date: Tue, 10 Oct 2023 13:28:10 +0000 Subject: [PATCH 845/979] bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c8c65ae46..12853e1b2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9071 -Date: 2023-10-04 +Version: 4.9.9.9072 +Date: 2023-10-10 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From 5a443b09cc8aa6a20d7bbcf2ff85e5b214588dc3 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Fri, 13 Oct 2023 13:11:27 -0400 Subject: [PATCH 846/979] Update documentation --- man/FetchResidualSCTModel.Rd | 1 + man/FetchResiduals_reference.Rd | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/man/FetchResidualSCTModel.Rd b/man/FetchResidualSCTModel.Rd index d12339d63..06758e13d 100644 --- a/man/FetchResidualSCTModel.Rd +++ b/man/FetchResidualSCTModel.Rd @@ -10,6 +10,7 @@ FetchResidualSCTModel( assay = "SCT", umi.assay = "RNA", layer = "counts", + chunk_size = 2000, layer.cells = NULL, SCTModel = NULL, reference.SCT.model = NULL, diff --git a/man/FetchResiduals_reference.Rd b/man/FetchResiduals_reference.Rd index 4330a3e31..847302c72 100644 --- a/man/FetchResiduals_reference.Rd +++ b/man/FetchResiduals_reference.Rd @@ -20,7 +20,7 @@ for calculating the residuals} \item{features}{Names of features to compute} -\item{nCount_UMI}{UMI counts. If not specified, defaults to +\item{nCount_UMI}{UMI counts. If not specified, defaults to column sums of object} \item{verbose}{Whether to print messages and progress bars} From 69f960b6697830050565a7504977e2a8e4e1a9fd Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Fri, 13 Oct 2023 13:26:05 -0400 Subject: [PATCH 847/979] Remove BPCells from imports --- R/sketching.R | 43 +++++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/R/sketching.R b/R/sketching.R index 12134f372..6d4383056 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -15,14 +15,14 @@ NULL #' Sketch Data #' -#' This function uses sketching methods to downsample high-dimensional single-cell RNA expression data, +#' This function uses sketching methods to downsample high-dimensional single-cell RNA expression data, #' which can help with scalability for large datasets. #' #' @param object A Seurat object. #' @param assay Assay name. Default is NULL, in which case the default assay of the object is used. #' @param ncells A positive integer indicating the number of cells to sample for the sketching. Default is 5000. #' @param sketched.assay Sketched assay name. A sketch assay is created or overwrite with the sketch data. Default is 'sketch'. -#' @param method Sketching method to use. Can be 'LeverageScore' or 'Uniform'. +#' @param method Sketching method to use. Can be 'LeverageScore' or 'Uniform'. #' Default is 'LeverageScore'. #' @param var.name A metadata column name to store the leverage scores. Default is 'leverage.score'. #' @param over.write whether to overwrite existing column in the metadata. Default is FALSE. @@ -130,7 +130,7 @@ SketchData <- function( #' Project full data to the sketch assay #' #' -#' This function allows projection of high-dimensional single-cell RNA expression data from a full dataset +#' This function allows projection of high-dimensional single-cell RNA expression data from a full dataset #' onto the lower-dimensional embedding of the sketch of the dataset. #' #' @param object A Seurat object. @@ -139,7 +139,7 @@ SketchData <- function( #' @param sketched.reduction Dimensional reduction results of the sketched assay to project onto. #' @param full.reduction Dimensional reduction name for the projected full dataset. #' @param dims Dimensions to include in the projection. -#' @param normalization.method Normalization method to use. Can be 'LogNormalize' or 'SCT'. +#' @param normalization.method Normalization method to use. Can be 'LogNormalize' or 'SCT'. #' Default is 'LogNormalize'. #' @param refdata An optional list for label transfer from sketch to full data. Default is NULL. #' Similar to refdata in `MapQuery` @@ -149,7 +149,7 @@ SketchData <- function( #' @param recompute.weights Whether to recompute the weights for label transfer. Default is FALSE. #' @param verbose Print progress and diagnostic messages. #' -#' @return A Seurat object with the full data projected onto the sketched dimensional reduction results. +#' @return A Seurat object with the full data projected onto the sketched dimensional reduction results. #' The projected data are stored in the specified full reduction. #' #' @export @@ -206,7 +206,7 @@ ProjectData <- function( #' Transfer data from sketch data to full data #' -#' This function transfers cell type labels from a sketched dataset to a full dataset +#' This function transfers cell type labels from a sketched dataset to a full dataset #' based on the similarities in the lower dimensional space. #' #' @param object A Seurat object. @@ -222,10 +222,10 @@ ProjectData <- function( #' @param recompute.weights Whether to recompute the weights for label transfer. Default is FALSE. #' @param verbose Print progress and diagnostic messages #' -#' @return A Seurat object with transferred labels stored in the metadata. If a UMAP model is provided, +#' @return A Seurat object with transferred labels stored in the metadata. If a UMAP model is provided, #' the full data are also projected onto the UMAP space, with the results stored in a new reduction, full.`reduction.model` #' -#' +#' #' @export #' TransferSketchLabels <- function( @@ -260,7 +260,7 @@ TransferSketchLabels <- function( compute.weights <- is.null(x = full_sketch.weight) || !all(colnames(full_sketch.weight) == Cells(object[[reduction]])) || !all(rownames(full_sketch.weight) == colnames(object[[sketched.assay]])) || - recompute.weights || + recompute.weights || recompute.neighbors if (compute.neighbors) { @@ -359,18 +359,17 @@ TransferSketchLabels <- function( # Methods for Seurat-defined generics #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -#' @param nsketch A positive integer. The number of sketches to be used in the approximation. +#' @param nsketch A positive integer. The number of sketches to be used in the approximation. #' Default is 5000. -#' @param ndims A positive integer or NULL. The number of dimensions to use. If NULL, the number +#' @param ndims A positive integer or NULL. The number of dimensions to use. If NULL, the number #' of dimensions will default to the number of columns in the object. #' @param method The sketching method to use, defaults to CountSketch. -#' @param eps A numeric. The error tolerance for the approximation in Johnson–Lindenstrauss embeddings, +#' @param eps A numeric. The error tolerance for the approximation in Johnson–Lindenstrauss embeddings, #' defaults to 0.5. #' @param seed A positive integer. The seed for the random number generator, defaults to 123. #' @param verbose Print progress and diagnostic messages #' @importFrom Matrix qrR t #' @importFrom irlba irlba -#' @importFrom BPCells transpose_storage_order matrix_stats #' #' @rdname LeverageScore #' @method LeverageScore default @@ -467,21 +466,21 @@ LeverageScore.default <- function( return(Z.score) } -#' @param nsketch A positive integer. The number of sketches to be used in the approximation. +#' @param nsketch A positive integer. The number of sketches to be used in the approximation. #' Default is 5000. -#' @param ndims A positive integer or NULL. The number of dimensions to use. If NULL, the number +#' @param ndims A positive integer or NULL. The number of dimensions to use. If NULL, the number #' of dimensions will default to the number of columns in the object. #' @param method The sketching method to use, defaults to CountSketch. #' @param vf.method VariableFeatures method #' @param layer layer to use -#' @param eps A numeric. The error tolerance for the approximation in Johnson–Lindenstrauss embeddings, +#' @param eps A numeric. The error tolerance for the approximation in Johnson–Lindenstrauss embeddings, #' defaults to 0.5. #' @param seed A positive integer. The seed for the random number generator, defaults to 123. #' @param verbose Print progress and diagnostic messages -#' +#' #' @rdname LeverageScore #' @method LeverageScore StdAssay -#' +#' #' @export #' LeverageScore.StdAssay <- function( @@ -540,15 +539,15 @@ LeverageScore.Assay <- LeverageScore.StdAssay #' @param assay assay to use -#' @param nsketch A positive integer. The number of sketches to be used in the approximation. +#' @param nsketch A positive integer. The number of sketches to be used in the approximation. #' Default is 5000. -#' @param ndims A positive integer or NULL. The number of dimensions to use. If NULL, the number +#' @param ndims A positive integer or NULL. The number of dimensions to use. If NULL, the number #' of dimensions will default to the number of columns in the object. #' @param method The sketching method to use, defaults to CountSketch. #' @param var.name name of slot to store leverage scores -#' @param over.write whether to overwrite slot that currently stores leverage scores. Defaults +#' @param over.write whether to overwrite slot that currently stores leverage scores. Defaults #' to FALSE, in which case the 'var.name' is modified if it already exists in the object -#' +#' #' @rdname LeverageScore #' @method LeverageScore Seurat #' @export From 5a5936ee36b5785e18ca80d930ed70c38b41b3a2 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Fri, 13 Oct 2023 13:26:31 -0400 Subject: [PATCH 848/979] Remove SeuratObject from remotes --- DESCRIPTION | 2 -- 1 file changed, 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 41ed69693..1b484a2ed 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -28,8 +28,6 @@ Authors@R: c( URL: https://satijalab.org/seurat, https://github.com/satijalab/seurat BugReports: https://github.com/satijalab/seurat/issues Additional_repositories: https://satijalab.r-universe.dev -Remotes: - mojaveazure/seurat-object@seurat5 Depends: R (>= 4.0.0), methods, From 40b87a27b1fc4760a3f6637e586b2e96ce62865b Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Fri, 13 Oct 2023 13:27:18 -0400 Subject: [PATCH 849/979] Documentation updates --- NAMESPACE | 2 -- man/LeverageScore.Rd | 8 ++++---- man/ProjectData.Rd | 6 +++--- man/SketchData.Rd | 4 ++-- man/TransferSketchLabels.Rd | 4 ++-- 5 files changed, 11 insertions(+), 13 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index e9b22c7ba..bcb30fe79 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -472,8 +472,6 @@ importClassesFrom(SeuratObject,Neighbor) importClassesFrom(SeuratObject,Seurat) importClassesFrom(SeuratObject,SeuratCommand) importClassesFrom(SeuratObject,SpatialImage) -importFrom(BPCells,matrix_stats) -importFrom(BPCells,transpose_storage_order) importFrom(KernSmooth,bkde) importFrom(MASS,ginv) importFrom(MASS,glm.nb) diff --git a/man/LeverageScore.Rd b/man/LeverageScore.Rd index 9fdc7e319..a2042c582 100644 --- a/man/LeverageScore.Rd +++ b/man/LeverageScore.Rd @@ -68,15 +68,15 @@ LeverageScore(object, ...) \item{...}{Arguments passed to other methods} -\item{nsketch}{A positive integer. The number of sketches to be used in the approximation. +\item{nsketch}{A positive integer. The number of sketches to be used in the approximation. Default is 5000.} -\item{ndims}{A positive integer or NULL. The number of dimensions to use. If NULL, the number +\item{ndims}{A positive integer or NULL. The number of dimensions to use. If NULL, the number of dimensions will default to the number of columns in the object.} \item{method}{The sketching method to use, defaults to CountSketch.} -\item{eps}{A numeric. The error tolerance for the approximation in Johnson–Lindenstrauss embeddings, +\item{eps}{A numeric. The error tolerance for the approximation in Johnson–Lindenstrauss embeddings, defaults to 0.5.} \item{seed}{A positive integer. The seed for the random number generator, defaults to 123.} @@ -91,7 +91,7 @@ defaults to 0.5.} \item{var.name}{name of slot to store leverage scores} -\item{over.write}{whether to overwrite slot that currently stores leverage scores. Defaults +\item{over.write}{whether to overwrite slot that currently stores leverage scores. Defaults to FALSE, in which case the 'var.name' is modified if it already exists in the object} } \description{ diff --git a/man/ProjectData.Rd b/man/ProjectData.Rd index ee97437a8..79f0a67f8 100644 --- a/man/ProjectData.Rd +++ b/man/ProjectData.Rd @@ -33,7 +33,7 @@ ProjectData( \item{dims}{Dimensions to include in the projection.} -\item{normalization.method}{Normalization method to use. Can be 'LogNormalize' or 'SCT'. +\item{normalization.method}{Normalization method to use. Can be 'LogNormalize' or 'SCT'. Default is 'LogNormalize'.} \item{refdata}{An optional list for label transfer from sketch to full data. Default is NULL. @@ -50,10 +50,10 @@ Similar to refdata in `MapQuery`} \item{verbose}{Print progress and diagnostic messages.} } \value{ -A Seurat object with the full data projected onto the sketched dimensional reduction results. +A Seurat object with the full data projected onto the sketched dimensional reduction results. The projected data are stored in the specified full reduction. } \description{ -This function allows projection of high-dimensional single-cell RNA expression data from a full dataset +This function allows projection of high-dimensional single-cell RNA expression data from a full dataset onto the lower-dimensional embedding of the sketch of the dataset. } diff --git a/man/SketchData.Rd b/man/SketchData.Rd index f4b636738..bbf0db122 100644 --- a/man/SketchData.Rd +++ b/man/SketchData.Rd @@ -27,7 +27,7 @@ SketchData( \item{sketched.assay}{Sketched assay name. A sketch assay is created or overwrite with the sketch data. Default is 'sketch'.} -\item{method}{Sketching method to use. Can be 'LeverageScore' or 'Uniform'. +\item{method}{Sketching method to use. Can be 'LeverageScore' or 'Uniform'. Default is 'LeverageScore'.} \item{var.name}{A metadata column name to store the leverage scores. Default is 'leverage.score'.} @@ -46,6 +46,6 @@ Default is 'LeverageScore'.} A Seurat object with the sketched data added as a new assay. } \description{ -This function uses sketching methods to downsample high-dimensional single-cell RNA expression data, +This function uses sketching methods to downsample high-dimensional single-cell RNA expression data, which can help with scalability for large datasets. } diff --git a/man/TransferSketchLabels.Rd b/man/TransferSketchLabels.Rd index 087758756..687b5f0b3 100644 --- a/man/TransferSketchLabels.Rd +++ b/man/TransferSketchLabels.Rd @@ -43,10 +43,10 @@ Similar to refdata in `MapQuery`} \item{verbose}{Print progress and diagnostic messages} } \value{ -A Seurat object with transferred labels stored in the metadata. If a UMAP model is provided, +A Seurat object with transferred labels stored in the metadata. If a UMAP model is provided, the full data are also projected onto the UMAP space, with the results stored in a new reduction, full.`reduction.model` } \description{ -This function transfers cell type labels from a sketched dataset to a full dataset +This function transfers cell type labels from a sketched dataset to a full dataset based on the similarities in the lower dimensional space. } From bc0889c5fd37648a122776db12aa41b8f21f15b5 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Fri, 13 Oct 2023 14:22:54 -0400 Subject: [PATCH 850/979] Update documentation --- NAMESPACE | 1 + R/preprocessing5.R | 2 ++ R/sketching.R | 4 ++-- R/utilities.R | 46 +++++++++++++++++++----------------- R/zzz.R | 8 ++----- man/FetchResidualSCTModel.Rd | 3 +++ man/reexports.Rd | 4 ++-- 7 files changed, 36 insertions(+), 32 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index bcb30fe79..b6e01c348 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -522,6 +522,7 @@ importFrom(SeuratObject,"Tool<-") importFrom(SeuratObject,"VariableFeatures<-") importFrom(SeuratObject,.CalcN) importFrom(SeuratObject,.FilterObjects) +importFrom(SeuratObject,.IsFutureSeurat) importFrom(SeuratObject,.MARGIN) importFrom(SeuratObject,.PropagateList) importFrom(SeuratObject,.SparseSlots) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 478f4e3e5..96a038049 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1590,6 +1590,8 @@ FetchResiduals <- function( #' UMIs from. Default is "RNA" #' @param layer Name of the layer under `umi.assay` to fetch UMIs from. #' Default is "counts" +#' @param chunk_size Number of cells to load in memory for calculating +#' residuals #' @param layer.cells Vector of cells to calculate the residual for. #' Default is NULL which uses all cells in the layer #' @param SCTModel Which SCTmodel to use from the object for calculating diff --git a/R/sketching.R b/R/sketching.R index 6d4383056..81238d8db 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -430,7 +430,7 @@ LeverageScore.default <- function( } if (inherits(x = object, what = 'IterableMatrix')) { temp <- tempdir() - object.gene_index <- transpose_storage_order(matrix = object, tmpdir = temp) + object.gene_index <- BPCells::transpose_storage_order(matrix = object, tmpdir = temp) sa <- as(object = S %*% object.gene_index, Class = 'dgCMatrix') rm(object.gene_index) unlink(list.files(path = temp, full.names = TRUE)) @@ -458,7 +458,7 @@ LeverageScore.default <- function( )) Z <- object %*% (R.inv %*% JL) if (inherits(x = Z, what = 'IterableMatrix')) { - Z.score <- matrix_stats(matrix = Z ^ 2, row_stats = 'mean' + Z.score <- BPCells::matrix_stats(matrix = Z ^ 2, row_stats = 'mean' )$row_stats['mean',]*ncol(x = Z) } else { Z.score <- rowSums(x = Z ^ 2) diff --git a/R/utilities.R b/R/utilities.R index 92e0fe1fc..3401b8506 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -342,7 +342,7 @@ AddModuleScore <- function( #' @param margin Margin to perform CLR normalization, see \code{\link{NormalizeData}} #' @param verbose Print messages and show progress bar #' @param ... Arguments to be passed to methods such as \code{\link{CreateSeuratObject}} -#' +#' #' @return Returns a matrix with genes as rows, identity classes as columns. #' If return.seurat is TRUE, returns an object of class \code{\link{Seurat}}. #' @export @@ -353,7 +353,7 @@ AddModuleScore <- function( #' data("pbmc_small") #' head(AggregateExpression(object = pbmc_small)) #' } -#' +#' AggregateExpression <- function( object, assays = NULL, @@ -466,11 +466,11 @@ AverageExpression <- function( ) layer <- slot } - + if (method =="average") { message("As of Seurat v5, As of Seurat v5, we recommend using AggregateExpression to perform pseudo-bulk analysis.") } - + object.assays <- FilterObjects(object = object, classes.keep = c('Assay', 'Assay5')) assays <- assays %||% object.assays if (!all(assays %in% object.assays)) { @@ -546,7 +546,7 @@ AverageExpression <- function( ... ) LayerData(object = toRet, - layer = "scale.data", + layer = "scale.data", assay = names(x = data.return)[1]) <- data.return[[1]] } else { toRet <- CreateSeuratObject( @@ -575,9 +575,9 @@ AverageExpression <- function( features = features[[i]], slot = "counts" ) - toRet[[names(x = data.return)[i]]] <- CreateAssay5Object(counts = summed.counts) + toRet[[names(x = data.return)[i]]] <- CreateAssayObject(counts = summed.counts) LayerData(object = toRet, - layer = "scale.data", + layer = "scale.data", assay = names(x = data.return)[i]) <- data.return[[i]] } else { toRet[[names(x = data.return)[i]]] <- CreateAssayObject(counts = data.return[[i]], check.matrix = FALSE) @@ -1333,11 +1333,11 @@ PercentageFeatureSet <- function( for (i in seq_along(along.with = layers)) { layer <- layers[i] features.layer <- features %||% grep( - pattern = pattern, - x = rownames(x = object[[assay]][[layer]]), + pattern = pattern, + x = rownames(x = object[[assay]][[layer]]), value = TRUE) - layer.data <- LayerData(object = object, - assay = assay, + layer.data <- LayerData(object = object, + assay = assay, layer = layer) layer.sums <- colSums(x = layer.data[features.layer, , drop = FALSE]) layer.perc <- layer.sums / object[[]][colnames(layer.data), paste0("nCount_", assay)] * 100 @@ -1371,6 +1371,7 @@ PercentageFeatureSet <- function( # @return Returns a matrix with genes as rows, identity classes as columns. # If return.seurat is TRUE, returns an object of class \code{\link{Seurat}}. #' @method PseudobulkExpression Assay +#' @importFrom SeuratObject .IsFutureSeurat #' @export # # @@ -2633,6 +2634,7 @@ MergeSparseMatrices <- function(...) { dimnames=list(rowname.new, colname.new)) return (merged.mat) } + # cross product from delayed array # crossprod_DelayedAssay <- function(x, y, block.size = 1e8) { @@ -2644,7 +2646,7 @@ crossprod_DelayedAssay <- function(x, y, block.size = 1e8) { stop('row of x and y should be the same') } sparse <- DelayedArray::is_sparse(x = y) - suppressMessages(setAutoBlockSize(size = block.size)) + suppressMessages(expr = DelayedArray::setAutoBlockSize(size = block.size)) cells.grid <- DelayedArray::colAutoGrid(x = y) product.list <- list() for (i in seq_len(length.out = length(x = cells.grid))) { @@ -2703,11 +2705,11 @@ SweepNonzero <- function( #' Create one hot matrix for a given label -#' +#' #' @param labels A vector of labels #' @param method Method to aggregate cells with the same label. Either 'aggregate' or 'average' #' @param cells.name A vector of cell names -#' +#' #' @importFrom Matrix colSums sparse.model.matrix #' @importFrom stats as.formula #' @export @@ -2725,7 +2727,7 @@ CreateCategoryMatrix <- function( data <- cbind(labels = labels) } } else { - data <- labels + data <- labels } cells.name <- cells.name %||% rownames(data) if (!is.null(cells.name) & length(cells.name) != nrow(data)) { @@ -2758,7 +2760,7 @@ CreateCategoryMatrix <- function( colsums <- colSums(x = category.matrix) category.matrix <- category.matrix[, colsums > 0] colsums <- colsums[colsums > 0] - + if (method =='average') { category.matrix <- SweepNonzero( x = category.matrix, @@ -2792,7 +2794,7 @@ CreateCategoryMatrix <- function( #' @param assay Name for spatial neighborhoods assay #' @param neighbors.k Number of neighbors to consider for each cell #' @param niches.k Number of clusters to return based on the niche assay -#' +#' #' @importFrom stats kmeans #' @return Seurat object containing a new assay #' @concept clustering @@ -2813,7 +2815,7 @@ BuildNicheAssay <- function( coords <- as.matrix(coords[ , c("x", "y")]) neighbors <- FindNeighbors(coords, k.param = neighbors.k) neighbors$nn <- neighbors$nn[Cells(object), Cells(object)] - + # build cell x cell type matrix ct.mtx <- matrix( data = 0, @@ -2827,13 +2829,13 @@ BuildNicheAssay <- function( ct <- as.character(cts[cells[[i]], ]) ct.mtx[cells[[i]], ct] <- 1 } - + # create niche assay sum.mtx <- as.matrix(neighbors$nn %*% ct.mtx) niche.assay <- CreateAssayObject(counts = t(sum.mtx)) object[[assay]] <- niche.assay DefaultAssay(object) <- assay - + # cluster niches assay object <- ScaleData(object) results <- kmeans( @@ -2842,6 +2844,6 @@ BuildNicheAssay <- function( nstart = 30 ) object$niches <- results[["cluster"]] - - return(object) + + return(object) } diff --git a/R/zzz.R b/R/zzz.R index 5793b6760..639e52f00 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -59,7 +59,8 @@ seurat_default_options <- list( Seurat.checkdots = "warn", Seurat.limma.wilcox.msg = TRUE, Seurat.Rfast2.msg = TRUE, - Seurat.warn.vlnplot.split = TRUE + Seurat.warn.vlnplot.split = TRUE, + Seurat.object.assay.version = "v5" ) @@ -87,11 +88,6 @@ AttachDeps <- function(deps) { #' .onAttach <- function(libname, pkgname) { AttachDeps(deps = c('SeuratObject')) - packageStartupMessage("Loading Seurat v5 beta version \n", - "To maintain compatibility with previous workflows, new Seurat objects ", - "will use the previous object structure by default\n", - "To use new Seurat v5 assays please run: ", - "options(Seurat.object.assay.version = 'v5')") return(invisible(x = NULL)) } diff --git a/man/FetchResidualSCTModel.Rd b/man/FetchResidualSCTModel.Rd index 06758e13d..0b4d7ec77 100644 --- a/man/FetchResidualSCTModel.Rd +++ b/man/FetchResidualSCTModel.Rd @@ -32,6 +32,9 @@ UMIs from. Default is "RNA"} \item{layer}{Name of the layer under `umi.assay` to fetch UMIs from. Default is "counts"} +\item{chunk_size}{Number of cells to load in memory for calculating +residuals} + \item{layer.cells}{Vector of cells to calculate the residual for. Default is NULL which uses all cells in the layer} diff --git a/man/reexports.Rd b/man/reexports.Rd index 08ced67a3..fa7258d42 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -71,8 +71,8 @@ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ - \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}} - \item{generics}{\code{\link[generics]{components}}} + + \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}} }} From 5398c5493d971fa51c910f0430f4ff607ac8c144 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Fri, 13 Oct 2023 15:04:46 -0400 Subject: [PATCH 851/979] Replace slot -> layers --- R/zzz.R | 1 + tests/testthat/test_differential_expression.R | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/R/zzz.R b/R/zzz.R index 639e52f00..8f27610a3 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -96,6 +96,7 @@ AttachDeps <- function(deps) { x = names(x = seurat_default_options), y = names(x = options()) ) + # toset <- names(x = seurat_default_options) if (length(x = toset)) { options(seurat_default_options[toset]) } diff --git a/tests/testthat/test_differential_expression.R b/tests/testthat/test_differential_expression.R index 5b9414c59..e6b17ac8d 100644 --- a/tests/testthat/test_differential_expression.R +++ b/tests/testthat/test_differential_expression.R @@ -331,11 +331,11 @@ test_that("FindAllMarkers works as expected", { ref <- pbmc_small ref <- FindVariableFeatures(object = ref, verbose = FALSE, nfeatures = 100) query <- CreateSeuratObject(CreateAssayObject( - counts = as.sparse(GetAssayData(object = pbmc_small[['RNA']], slot = "counts") + rpois(n = ncol(pbmc_small), lambda = 1)) + counts = as.sparse(GetAssayData(object = pbmc_small[['RNA']], layer = "counts") + rpois(n = ncol(pbmc_small), lambda = 1)) )) query2 <- CreateSeuratObject(CreateAssayObject( - counts = as.sparse(GetAssayData(object = pbmc_small[['RNA']], slot = "counts")[, 1:40] + rpois(n = ncol(pbmc_small), lambda = 1)) + counts = as.sparse(GetAssayData(object = pbmc_small[['RNA']], layer = "counts")[, 1:40] + rpois(n = ncol(pbmc_small), lambda = 1)) )) From a037d8cb95b24d6260d363209cc998f29ab60536 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Fri, 13 Oct 2023 15:08:26 -0400 Subject: [PATCH 852/979] GetAssayData -> LayerData --- tests/testthat/test_dimensional_reduction.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test_dimensional_reduction.R b/tests/testthat/test_dimensional_reduction.R index ab87e5a36..16cd09a97 100644 --- a/tests/testthat/test_dimensional_reduction.R +++ b/tests/testthat/test_dimensional_reduction.R @@ -51,7 +51,7 @@ test_that("pca returns total variance (see #982)", { )) # Using stats::prcomp - scaled_data <- Seurat::GetAssayData(object = obj, slot = "scale.data") + scaled_data <- LayerData(object = obj, layer = "scale.data") prcomp_result <- stats::prcomp(scaled_data, center = FALSE, scale. = FALSE) # Compare From 3ca03f0dc9b9fc07da4ad9ab5f72245236031cd0 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Fri, 13 Oct 2023 15:18:46 -0400 Subject: [PATCH 853/979] adding assay argument --- R/integration.R | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/R/integration.R b/R/integration.R index f8c60b909..aac8ae03a 100644 --- a/R/integration.R +++ b/R/integration.R @@ -3199,6 +3199,7 @@ SelectSCTIntegrationFeatures <- function( #' } #' @param reference Reference object from which to pull data to transfer #' @param query Query object into which the data will be transferred. +#' @param query.assay Name of the Assay to use from query #' @param weight.reduction Dimensional reduction to use for the weighting #' anchors. Options are: #' \itemize{ @@ -3284,6 +3285,7 @@ TransferData <- function( refdata, reference = NULL, query = NULL, + query.assay = NULL, weight.reduction = 'pcaproject', l2.norm = FALSE, dims = NULL, @@ -3301,6 +3303,7 @@ TransferData <- function( anchors <- slot(object = anchorset, name = "anchors") reference.cells <- slot(object = anchorset, name = "reference.cells") query.cells <- slot(object = anchorset, name = "query.cells") + query.assay <- query.assay %||% DefaultAssay(query) label.transfer <- list() ValidateParams_TransferData( anchorset = anchorset, @@ -3311,6 +3314,7 @@ TransferData <- function( refdata = refdata, reference = reference, query = query, + query.assay = query.assay, weight.reduction = weight.reduction, l2.norm = l2.norm, dims = dims, @@ -3331,6 +3335,7 @@ TransferData <- function( features <- slot(object = anchorset, name = "anchor.features") query.ob <- query + DefaultAssay(query.ob) <- query.assay query.ob <- ScaleData(object = query.ob, features = features, verbose = FALSE) query.ob <- RunPCA(object = query.ob, npcs = max(dims), features = features, verbose = FALSE) query.pca <- Embeddings(query.ob[['pca']]) @@ -6106,6 +6111,7 @@ ValidateParams_TransferData <- function( query.cells, reference, query, + query.assay, refdata, weight.reduction, l2.norm, @@ -6246,7 +6252,7 @@ ValidateParams_TransferData <- function( if (!is.null(x = query)) { if (!isTRUE(x = all.equal( target = gsub(pattern = "_query", replacement = "", x = query.cells), - current = colnames(x = query), + current = colnames(x = query[[query.assay]]), check.attributes = FALSE) )) { stop("Query object provided contains a different set of cells from the ", From 52b85d13d3d354f60fd7e26e163d1b75e4b7b4c8 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Fri, 13 Oct 2023 15:24:24 -0400 Subject: [PATCH 854/979] GetAssayData -> LayerData --- tests/testthat/test_dimensional_reduction.R | 2 +- tests/testthat/test_integratedata.R | 42 ++++++++++----------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/tests/testthat/test_dimensional_reduction.R b/tests/testthat/test_dimensional_reduction.R index 16cd09a97..6dac7a9c8 100644 --- a/tests/testthat/test_dimensional_reduction.R +++ b/tests/testthat/test_dimensional_reduction.R @@ -51,7 +51,7 @@ test_that("pca returns total variance (see #982)", { )) # Using stats::prcomp - scaled_data <- LayerData(object = obj, layer = "scale.data") + scaled_data <- Seurat::LayerData(object = obj, layer = "scale.data") prcomp_result <- stats::prcomp(scaled_data, center = FALSE, scale. = FALSE) # Compare diff --git a/tests/testthat/test_integratedata.R b/tests/testthat/test_integratedata.R index 818ce2b16..fe17ca37b 100644 --- a/tests/testthat/test_integratedata.R +++ b/tests/testthat/test_integratedata.R @@ -9,16 +9,16 @@ query <- CreateSeuratObject( counts = as.sparse( GetAssayData( object = pbmc_small[['RNA']], - slot = "counts") + rpois(n = ncol(pbmc_small), + layer = "counts") + rpois(n = ncol(pbmc_small), lambda = 1 ) ) ) query2 <- CreateSeuratObject( counts = as.sparse( - GetAssayData( + LayerData( object = pbmc_small[['RNA']], - slot = "counts")[, 1:40] + rpois(n = ncol(pbmc_small), + layer = "counts")[, 1:40] + rpois(n = ncol(pbmc_small), lambda = 1 ) ) @@ -82,10 +82,10 @@ test_that("IntegrateData with two objects default work", { expect_equal(Tool(int2), "Integration") expect_equal(dim(int2[["integrated"]]), c(133, 160)) expect_equal(length(VariableFeatures(int2)), 133) - expect_equal(GetAssayData(int2[["integrated"]], slot = "counts"), new("dgCMatrix")) - expect_equal(GetAssayData(int2[['integrated']], slot = "scale.data"), matrix()) - expect_equal(sum(GetAssayData(int2[["integrated"]])[1, ]), 44.97355, tolerance = 1e-3) - expect_equal(sum(GetAssayData(int2[["integrated"]])[, 1]), 78.8965706046, tolerance = 1e-6) + expect_equal(GetAssayData(int2[["integrated"]], layer = "counts"), new("dgCMatrix")) + expect_equal(GetAssayData(int2[['integrated']], layer = "scale.data"), matrix()) + expect_equal(sum(GetAssayData(int2[["integrated"]], layer = "data")[1, ]), 44.97355, tolerance = 1e-3) + expect_equal(sum(GetAssayData(int2[["integrated"]], layer = "data")[, 1]), 78.8965706046, tolerance = 1e-6) expect_equal(Tool(object = int2, slot = "Integration")@sample.tree, matrix(c(-1, -2), nrow = 1)) }) @@ -96,10 +96,10 @@ test_that("IntegrateData with three objects default work", { expect_equal(Tool(int3), "Integration") expect_equal(dim(int3[["integrated"]]), c(169, 200)) expect_equal(length(VariableFeatures(int3)), 169) - expect_equal(GetAssayData(int3[["integrated"]], slot = "counts"), new("dgCMatrix")) - expect_equal(GetAssayData(int3[['integrated']], slot = "scale.data"), matrix()) - expect_equal(sum(GetAssayData(int3[["integrated"]])[1, ]), 372.829, tolerance = 1e-6) - expect_equal(sum(GetAssayData(int3[["integrated"]])[, 1]), 482.5009, tolerance = 1e-6) + expect_equal(GetAssayData(int3[["integrated"]], layer = "counts"), new("dgCMatrix")) + expect_equal(GetAssayData(int3[['integrated']], layer = "scale.data"), matrix()) + expect_equal(sum(GetAssayData(int3[["integrated"]], layer = "data")[1, ]), 372.829, tolerance = 1e-6) + expect_equal(sum(GetAssayData(int3[["integrated"]], layer = "data")[, 1]), 482.5009, tolerance = 1e-6) expect_equal(Tool(object = int3, slot = "Integration")@sample.tree, matrix(c(-2, -3, 1, -1), nrow = 2, byrow = TRUE)) }) @@ -127,19 +127,19 @@ pbmc_small <- suppressMessages(suppressWarnings(RunPCA(pbmc_small))) test_that("IntegrateLayers does not work on a v3 assay ", { - expect_error(IntegrateLayers(object = pbmc_small, method = CCAIntegration, - orig.reduction = "pca", + expect_error(IntegrateLayers(object = pbmc_small, method = CCAIntegration, + orig.reduction = "pca", assay = "RNA", new.reduction = "integrated.cca")) }) test_that("IntegrateLayers errors out if incorrect input ", { - expect_error(IntegrateLayers(object = pbmc_small, method = CCAIntegration, - orig.reduction = "pca", + expect_error(IntegrateLayers(object = pbmc_small, method = CCAIntegration, + orig.reduction = "pca", assay = "DNA", new.reduction = "integrated.cca")) - expect_error(IntegrateLayers(object = pbmc_small, method = CCAIntegration, - orig.reduction = "lda", + expect_error(IntegrateLayers(object = pbmc_small, method = CCAIntegration, + orig.reduction = "lda", new.reduction = "integrated.cca")) }) @@ -176,7 +176,7 @@ test_that("IntegrateLayers returns embeddings with correct dimensions ", { expect_equal(dim(int_cca[["integrated.cca"]]), c(80, 50)) expect_equal(dim(int_rpca[["integrated.rpca"]]), c(80, 50)) expect_equal(dim(int_harmony[["harmony"]]), c(80, 50)) - + int_rpca expect_equal(int_cca[["integrated.cca"]]@assay.used, "RNAv5") #expect_equal(int_cca[['integrated.cca']]@cell.embeddings, c(3, 4, 5)) @@ -189,13 +189,13 @@ test_that("group.by ", { #Harmony integration -# int_2 <- IntegrateLayers(object = pbmc_small, method = CCAIntegration, +# int_2 <- IntegrateLayers(object = pbmc_small, method = CCAIntegration, # group.by = "letter.idents", -# orig.reduction = "pca", +# orig.reduction = "pca", # assay = "RNAv5", # k.weight = 20, # new.reduction = "integrated.cca") -# +# # head(int_2[['integrated.cca']]@cell.embeddings[1:5,1:5]) # head(int_cca[['integrated.cca']]@cell.embeddings[1:5,1:5]) From 64d041eb73839bee689dc0c2a57c106fb9dc2969 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Fri, 13 Oct 2023 15:34:20 -0400 Subject: [PATCH 855/979] slot -> layer --- tests/testthat/test_integration.R | 34 +++++++++++++++---------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/tests/testthat/test_integration.R b/tests/testthat/test_integration.R index 5f92e8cf0..c26689382 100644 --- a/tests/testthat/test_integration.R +++ b/tests/testthat/test_integration.R @@ -8,7 +8,7 @@ query <- CreateSeuratObject( counts = as.sparse( GetAssayData( object = pbmc_small[['RNA']], - slot = "counts") + rpois(n = ncol(pbmc_small), + layer = "counts") + rpois(n = ncol(pbmc_small), lambda = 1 ) ) @@ -26,8 +26,8 @@ test_that("FindTransferAnchors defaults work", { co <- anchors@object.list[[1]] expect_equal(dim(co), c(100, 160)) expect_equal(Reductions(co), c("pcaproject", "pcaproject.l2")) - expect_equal(GetAssayData(co[["RNA"]])[1, 3], 0) - expect_equal(GetAssayData(co[["RNA"]], slot = "counts")[1, 3], 0) + expect_equal(GetAssayData(co[["RNA"]], layer ="data")[1, 3], 0) + expect_equal(GetAssayData(co[["RNA"]], layer = "counts")[1, 3], 0) expect_equal(dim(co[['pcaproject']]), c(160, 30)) expect_equal(Embeddings(co[['pcaproject']])[1, 1], 0.4840944592, tolerance = 1e-7) expect_equal(Loadings(co[['pcaproject']], projected = T)[1, 1], 0.2103563963, tolerance = 1e-7) @@ -79,7 +79,7 @@ test_that("FindTransferAnchors allows reference.reduction to be precomputed", { expect_equal(dim(co), c(100, 160)) expect_equal(Reductions(co), c("pcaproject", "pcaproject.l2")) expect_equal(GetAssayData(co[["RNA"]])[1, 3], 0) - expect_equal(GetAssayData(co[["RNA"]], slot = "counts")[1, 3], 0) + expect_equal(GetAssayData(co[["RNA"]], layer = "counts")[1, 3], 0) expect_equal(dim(co[['pcaproject']]), c(160, 30)) expect_equal(Embeddings(co[['pcaproject']])[1, 1], 0.4840944592, tolerance = 1e-7) expect_equal(Loadings(co[['pcaproject']], projected = T)[1, 1], 0.2103563963, tolerance = 1e-7) @@ -109,8 +109,8 @@ test_that("FindTransferAnchors with cca defaults work", { expect_equal(Reductions(co), c("cca", "cca.l2")) expect_equal(GetAssayData(co[["RNA"]])["PPBP", 3], 0) expect_equal(GetAssayData(co[["RNA"]])["PPBP", 1], 0) - expect_equal(GetAssayData(co[["RNA"]], slot = "counts")["PPBP", 3], 0) - expect_equal(GetAssayData(co[["RNA"]], slot = "counts")["PPBP", 1], 0) + expect_equal(GetAssayData(co[["RNA"]], layer = "counts")["PPBP", 3], 0) + expect_equal(GetAssayData(co[["RNA"]], layer = "counts")["PPBP", 1], 0) expect_equal(dim(co[['cca']]), c(160, 30)) expect_equal(Embeddings(co[['cca']])[1, 1], 0.04611130861, tolerance = 1e-7) expect_equal(Loadings(co[['cca']], projected = T)["PPBP", 1], 12.32379661, tolerance = 1e-7) @@ -138,10 +138,10 @@ test_that("FindTransferAnchors with project.query defaults work", { co <- anchors@object.list[[1]] expect_equal(dim(co), c(100, 160)) expect_equal(Reductions(co), c("pcaproject", "pcaproject.l2")) - expect_equal(GetAssayData(co[["RNA"]])["PPBP", 3], 0) - expect_equal(GetAssayData(co[["RNA"]])["PPBP", 1], 0) - expect_equal(GetAssayData(co[["RNA"]], slot = "counts")["PPBP", 3], 0) - expect_equal(GetAssayData(co[["RNA"]], slot = "counts")["PPBP", 1], 0) + expect_equal(GetAssayData(co[["RNA"]], layer = "data")["PPBP", 3], 0) + expect_equal(GetAssayData(co[["RNA"]], layer = "data")["PPBP", 1], 0) + expect_equal(GetAssayData(co[["RNA"]], layer = "counts")["PPBP", 3], 0) + expect_equal(GetAssayData(co[["RNA"]], layer = "counts")["PPBP", 1], 0) expect_equal(dim(co[['pcaproject']]), c(160, 30)) expect_equal(Embeddings(co[['pcaproject']])[1, 1], 1.577959404, tolerance = 1e-7) expect_equal(Loadings(co[['pcaproject']], projected = T)["PPBP", 1], 0.1145472305, tolerance = 1e-7) @@ -174,8 +174,8 @@ test_that("FindTransferAnchors with project.query and reference.reduction works" expect_equal(Reductions(co), c("pcaproject", "pcaproject.l2")) expect_equal(GetAssayData(co[["RNA"]])["PPBP", 3], 0) expect_equal(GetAssayData(co[["RNA"]])["PPBP", 1], 0) - expect_equal(GetAssayData(co[["RNA"]], slot = "counts")["PPBP", 3], 0) - expect_equal(GetAssayData(co[["RNA"]], slot = "counts")["PPBP", 1], 0) + expect_equal(GetAssayData(co[["RNA"]], layer = "counts")["PPBP", 3], 0) + expect_equal(GetAssayData(co[["RNA"]], layer = "counts")["PPBP", 1], 0) expect_equal(dim(co[['pcaproject']]), c(160, 30)) expect_equal(Embeddings(co[['pcaproject']])[1, 1], 1.577959404, tolerance = 1e-7) expect_equal(Loadings(co[['pcaproject']], projected = T)["PPBP", 1], 0.1145472305, tolerance = 1e-7) @@ -208,7 +208,7 @@ test_that("FindTransferAnchors with reference.neighbors precomputed works", { expect_equal(dim(co), c(100, 160)) expect_equal(Reductions(co), c("pcaproject", "pcaproject.l2")) expect_equal(GetAssayData(co[["RNA"]])[1, 3], 0) - expect_equal(GetAssayData(co[["RNA"]], slot = "counts")[1, 3], 0) + expect_equal(GetAssayData(co[["RNA"]], layer = "counts")[1, 3], 0) expect_equal(dim(co[['pcaproject']]), c(160, 30)) expect_equal(Embeddings(co[['pcaproject']])[1, 1], 0.4840944592, tolerance = 1e-7) expect_equal(Loadings(co[['pcaproject']], projected = T)[1, 1], 0.2103563963, tolerance = 1e-7) @@ -237,7 +237,7 @@ test_that("FindTransferAnchors with no l2 works", { expect_equal(dim(co), c(100, 160)) expect_equal(Reductions(co), c("pcaproject")) expect_equal(GetAssayData(co[["RNA"]])[1, 3], 0) - expect_equal(GetAssayData(co[["RNA"]], slot = "counts")[1, 3], 0) + expect_equal(GetAssayData(co[["RNA"]], layer = "counts")[1, 3], 0) expect_equal(dim(co[['pcaproject']]), c(160, 30)) expect_equal(Embeddings(co[['pcaproject']])[1, 1], 0.4840944592, tolerance = 1e-7) expect_equal(Loadings(co[['pcaproject']], projected = T)[1, 1], 0.2103563963, tolerance = 1e-7) @@ -267,7 +267,7 @@ test_that("FindTransferAnchors with default SCT works", { expect_equal(dim(co), c(220, 160)) expect_equal(Reductions(co), c("pcaproject", "pcaproject.l2")) expect_equal(DefaultAssay(co), "SCT") - expect_equal(GetAssayData(co[["SCT"]], slot = "scale.data"), new(Class = "matrix")) + expect_equal(GetAssayData(co[["SCT"]], layer = "scale.data"), new(Class = "matrix")) expect_equal(GetAssayData(co[["SCT"]])[1, 1], 0) expect_equal(dim(co[['pcaproject']]), c(160, 30)) expect_equal(Embeddings(co[['pcaproject']])[1, 1], -1.852491719, tolerance = 1e-7) @@ -360,7 +360,7 @@ test_that("FindTransferAnchors with default SCT works", { # expect_equal(anchors@anchor.features[1], "PPBP") # expect_equal(anchors@neighbors, list()) # }) -# +# test_that("FindTransferAnchors with SCT and l2.norm FALSE work", { skip_on_cran() @@ -370,7 +370,7 @@ test_that("FindTransferAnchors with SCT and l2.norm FALSE work", { expect_equal(Reductions(co), c("pcaproject")) expect_equal(DefaultAssay(co), "SCT") expect_equal(GetAssayData(co[["SCT"]])[1, 1], 0) - expect_equal(GetAssayData(co[["SCT"]], slot = "scale.data"), new("matrix")) + expect_equal(GetAssayData(co[["SCT"]], layer = "scale.data"), new("matrix")) expect_equal(dim(co[['pcaproject']]), c(160, 30)) expect_equal(Embeddings(co[['pcaproject']])[1, 1], -1.852491719, tolerance = 1e-7) expect_equal(Loadings(co[['pcaproject']], projected = T)[1, 1], -0.1829401539, tolerance = 1e-7) From d6b0152ab6f794a9736cdf0b9f81d83d91f16651 Mon Sep 17 00:00:00 2001 From: Izzy Grabski Date: Fri, 13 Oct 2023 15:41:21 -0400 Subject: [PATCH 856/979] Updated logfc.threshold and min.pct --- R/differential_expression.R | 32 ++++---- src/RcppExports.cpp | 2 +- tests/testthat/test_differential_expression.R | 74 +++++++++---------- 3 files changed, 54 insertions(+), 54 deletions(-) diff --git a/R/differential_expression.R b/R/differential_expression.R index c4432a4f3..034d4c027 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -46,10 +46,10 @@ FindAllMarkers <- function( object, assay = NULL, features = NULL, - logfc.threshold = 0.25, + logfc.threshold = 0.1, test.use = 'wilcox', slot = 'data', - min.pct = 0.1, + min.pct = 0.01, min.diff.pct = -Inf, node = NULL, verbose = TRUE, @@ -416,7 +416,7 @@ FindConservedMarkers <- function( #' expressing #' @param features Genes to test. Default is to use all genes #' @param logfc.threshold Limit testing to genes which show, on average, at least -#' X-fold difference (log-scale) between the two groups of cells. Default is 0.25 +#' X-fold difference (log-scale) between the two groups of cells. Default is 0.1 #' Increasing logfc.threshold speeds up the function, but can miss weaker signals. #' @param test.use Denotes which test to use. Available options are: #' \itemize{ @@ -462,7 +462,7 @@ FindConservedMarkers <- function( #' } #' @param min.pct only test genes that are detected in a minimum fraction of #' min.pct cells in either of the two populations. Meant to speed up the function -#' by not testing genes that are very infrequently expressed. Default is 0.1 +#' by not testing genes that are very infrequently expressed. Default is 0.01 #' @param min.diff.pct only test genes that show a minimum difference in the #' fraction of detection between the two groups. Set to -Inf by default #' @param only.pos Only return positive markers (FALSE by default) @@ -496,9 +496,9 @@ FindMarkers.default <- function( cells.1 = NULL, cells.2 = NULL, features = NULL, - logfc.threshold = 0.25, + logfc.threshold = 0.1, test.use = 'wilcox', - min.pct = 0.1, + min.pct = 0.01, min.diff.pct = -Inf, verbose = TRUE, only.pos = FALSE, @@ -630,9 +630,9 @@ FindMarkers.Assay <- function( cells.1 = NULL, cells.2 = NULL, features = NULL, - logfc.threshold = 0.25, + logfc.threshold = 0.1, test.use = 'wilcox', - min.pct = 0.1, + min.pct = 0.01, min.diff.pct = -Inf, verbose = TRUE, only.pos = FALSE, @@ -719,9 +719,9 @@ FindMarkers.SCTAssay <- function( cells.1 = NULL, cells.2 = NULL, features = NULL, - logfc.threshold = 0.25, + logfc.threshold = 0.1, test.use = 'wilcox', - min.pct = 0.1, + min.pct = 0.01, min.diff.pct = -Inf, verbose = TRUE, only.pos = FALSE, @@ -840,9 +840,9 @@ FindMarkers.DimReduc <- function( cells.1 = NULL, cells.2 = NULL, features = NULL, - logfc.threshold = 0.25, + logfc.threshold = 0.1, test.use = "wilcox", - min.pct = 0.1, + min.pct = 0.01, min.diff.pct = -Inf, verbose = TRUE, only.pos = FALSE, @@ -961,10 +961,10 @@ FindMarkers.Seurat <- function( slot = 'data', reduction = NULL, features = NULL, - logfc.threshold = 0.25, + logfc.threshold = 0.1, pseudocount.use = 0.1, test.use = "wilcox", - min.pct = 0.1, + min.pct = 0.01, min.diff.pct = -Inf, verbose = TRUE, only.pos = FALSE, @@ -2504,9 +2504,9 @@ WilcoxDETest <- function( res <- res[1:(nrow(x = res)/2),] p_val <- res$pval } else if (overflow.check) { - if (getOption('Seurat.presto.wilcox.msg', TRUE) && (!limma)) { # if you didnt request limma, output message + if (getOption('Seurat.presto.wilcox.msg', TRUE) && (!limma)) { message( - "For a more efficient implementation of the Wilcoxon Rank Sum Test,", + "For a (much!) faster implementation of the Wilcoxon Rank Sum Test,", "\n(default method for FindMarkers) please install the presto package", "\n--------------------------------------------", "\ninstall.packages('devtools')", diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 7a3302c6b..540e5c2d8 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -402,7 +402,7 @@ BEGIN_RCPP END_RCPP } -RcppExport SEXP isnull(SEXP); +RcppExport SEXP isnull(void *); static const R_CallMethodDef CallEntries[] = { {"_Seurat_RunModularityClusteringCpp", (DL_FUNC) &_Seurat_RunModularityClusteringCpp, 9}, diff --git a/tests/testthat/test_differential_expression.R b/tests/testthat/test_differential_expression.R index b04ca87f2..b2c587f2e 100644 --- a/tests/testthat/test_differential_expression.R +++ b/tests/testthat/test_differential_expression.R @@ -30,7 +30,7 @@ test_that("Default settings work as expected with pseudocount = 1", { expect_equal(markers.0[1, "pct.1"], 0.083) expect_equal(markers.0[1, "pct.2"], 0.909) expect_equal(markers.0[1, "p_val_adj"], 2.201739e-10, tolerance = 1e-15) - expect_equal(nrow(x = markers.0), 204) + expect_equal(nrow(x = markers.0), 227) expect_equal(rownames(markers.0)[1], "HLA-DPB1") expect_equal(colnames(x = markers.0.limma), c("p_val", "avg_logFC", "pct.1", "pct.2", "p_val_adj")) @@ -39,7 +39,7 @@ test_that("Default settings work as expected with pseudocount = 1", { expect_equal(markers.0.limma[1, "pct.1"], 0.083) expect_equal(markers.0.limma[1, "pct.2"], 0.909) expect_equal(markers.0.limma[1, "p_val_adj"], 2.201739e-10, tolerance = 1e-15) - expect_equal(nrow(x = markers.0.limma), 204) + expect_equal(nrow(x = markers.0.limma), 227) expect_equal(rownames(markers.0.limma)[1], "HLA-DPB1") expect_equal(markers.01[1, "p_val"], 1.702818e-11, tolerance = 1e-16) @@ -47,7 +47,7 @@ test_that("Default settings work as expected with pseudocount = 1", { expect_equal(markers.01[1, "pct.1"], 0.111) expect_equal(markers.01[1, "pct.2"], 1.00) expect_equal(markers.01[1, "p_val_adj"], 3.916481e-09, tolerance = 1e-14) - expect_equal(nrow(x = markers.01), 201) + expect_equal(nrow(x = markers.01), 222) expect_equal(rownames(x = markers.01)[1], "TYMP") expect_equal(markers.01.limma[1, "p_val"], 1.702818e-11, tolerance = 1e-16) @@ -55,7 +55,7 @@ test_that("Default settings work as expected with pseudocount = 1", { expect_equal(markers.01.limma[1, "pct.1"], 0.111) expect_equal(markers.01.limma[1, "pct.2"], 1.00) expect_equal(markers.01.limma[1, "p_val_adj"], 3.916481e-09, tolerance = 1e-14) - expect_equal(nrow(x = markers.01.limma), 201) + expect_equal(nrow(x = markers.01.limma), 222) expect_equal(rownames(x = markers.01.limma)[1], "TYMP") # CLR normalization @@ -64,7 +64,7 @@ test_that("Default settings work as expected with pseudocount = 1", { expect_equal(results.clr[1, "pct.1"], 0.111) expect_equal(results.clr[1, "pct.2"], 0.96) expect_equal(results.clr[1, "p_val_adj"], 2.781762e-09, tolerance = 1e-14) - expect_equal(nrow(x = results.clr), 85) + expect_equal(nrow(x = results.clr), 167) expect_equal(rownames(x = results.clr)[1], "S100A8") expect_equal(results.clr.limma[1, "p_val"], 1.209462e-11, tolerance = 1e-16) @@ -72,7 +72,7 @@ test_that("Default settings work as expected with pseudocount = 1", { expect_equal(results.clr.limma[1, "pct.1"], 0.111) expect_equal(results.clr.limma[1, "pct.2"], 0.96) expect_equal(results.clr.limma[1, "p_val_adj"], 2.781762e-09, tolerance = 1e-14) - expect_equal(nrow(x = results.clr.limma), 85) + expect_equal(nrow(x = results.clr.limma), 167) expect_equal(rownames(x = results.clr.limma)[1], "S100A8") # SCT normalization @@ -81,7 +81,7 @@ test_that("Default settings work as expected with pseudocount = 1", { expect_equal(results.sct[1, "pct.1"], 0.333) expect_equal(results.sct[1, "pct.2"], 1.00) expect_equal(results.sct[1, "p_val_adj"], 1.022333e-08, tolerance = 1e-13) - expect_equal(nrow(x = results.sct), 156) + expect_equal(nrow(x = results.sct), 197) expect_equal(rownames(x = results.sct)[1], "CST3") expect_equal(results.sct.limma[1, "p_val"], 4.646968e-11, tolerance = 1e-16) @@ -89,7 +89,7 @@ test_that("Default settings work as expected with pseudocount = 1", { expect_equal(results.sct.limma[1, "pct.1"], 0.333) expect_equal(results.sct.limma[1, "pct.2"], 1.00) expect_equal(results.sct.limma[1, "p_val_adj"], 1.022333e-08, tolerance = 1e-13) - expect_equal(nrow(x = results.sct.limma), 156) + expect_equal(nrow(x = results.sct.limma), 197) expect_equal(rownames(x = results.sct.limma)[1], "CST3") }) @@ -106,19 +106,19 @@ test_that("features parameter behaves correctly ", { expect_equal(tymp.results[1, "p_val_adj"], 7.423123e-05, tolerance = 1e-10) expect_equal(rownames(x = tymp.results)[1], "TYMP") - expect_equal(nrow(x = vargenes.results), 19) - expect_equal(vargenes.results[19, "p_val"], 4.225151e-01, tolerance = 1e-6) - expect_equal(vargenes.results[19, "avg_logFC"], 1.5976958, tolerance = 1e-6) - expect_equal(vargenes.results[19, "pct.1"], 0.139) - expect_equal(vargenes.results[19, "pct.2"], 0.091) - expect_equal(vargenes.results[19, "p_val_adj"], 1.000000e+00) - expect_equal(rownames(x = vargenes.results)[19], "PARVB") + expect_equal(nrow(x = vargenes.results), 20) + expect_equal(vargenes.results[20, "p_val"], 4.225151e-01, tolerance = 1e-6) + expect_equal(vargenes.results[20, "avg_logFC"], 1.5976958, tolerance = 1e-6) + expect_equal(vargenes.results[20, "pct.1"], 0.139) + expect_equal(vargenes.results[20, "pct.2"], 0.091) + expect_equal(vargenes.results[20, "p_val_adj"], 1.000000e+00) + expect_equal(rownames(x = vargenes.results)[20], "PARVB") }) results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = Cells(x = pbmc_small)[1:40], ident.2 = Cells(x = pbmc_small)[41:80], verbose = FALSE, base = exp(1),pseudocount.use = 1)) test_that("passing cell names works", { - expect_equal(nrow(x = results), 190) + expect_equal(nrow(x = results), 216) expect_equal(results[1, "p_val"], 0.0001690882) expect_equal(results[1, "avg_logFC"], -1.790824, tolerance = 1e-6) expect_equal(results[1, "pct.1"], 0.075) @@ -131,11 +131,11 @@ results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident. results.clr <- suppressWarnings(FindMarkers(object = clr.obj, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1), pseudocount.use = 0.1)) results.sct <- suppressWarnings(FindMarkers(object = sct.obj, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1), pseudocount.use = 0.1)) test_that("setting pseudocount.use works", { - expect_equal(nrow(x = results), 202) + expect_equal(nrow(x = results), 222) expect_equal(results[1, "avg_logFC"], -2.630395, tolerance = 1e-6) - expect_equal(nrow(x = results.clr), 182) + expect_equal(nrow(x = results.clr), 212) expect_equal(results.clr[1, "avg_logFC"], -2.317338, tolerance = 1e-6) - expect_equal(nrow(results.sct), 194) + expect_equal(nrow(results.sct), 215) expect_equal(results.sct[1, "avg_logFC"], -2.421716, tolerance = 1e-6) }) @@ -143,7 +143,7 @@ results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident. results.clr <- suppressWarnings(FindMarkers(object = clr.obj, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1), pseudocount.use = 1, mean.fxn = rowMeans)) results.sct <- suppressWarnings(FindMarkers(object = sct.obj, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1), pseudocount.use = 1,mean.fxn = rowMeans)) test_that("setting mean.fxn works", { - expect_equal(nrow(x = results), 191) + expect_equal(nrow(x = results), 216) expect_equal(results[1, "avg_logFC"], -4.204346, tolerance = 1e-6) expect_equal(results.clr[1, "avg_logFC"], -1.353025, tolerance = 1e-6) expect_equal(results.sct[1, "avg_logFC"], -2.021490, tolerance = 1e-6) @@ -151,7 +151,7 @@ test_that("setting mean.fxn works", { results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, logfc.threshold = 2, verbose = FALSE, base = exp(1), pseudocount.use = 1)) test_that("logfc.threshold works", { - expect_equal(nrow(x = results), 112) + expect_equal(nrow(x = results), 118) expect_gte(min(abs(x = results$avg_logFC)), 2) }) @@ -162,7 +162,7 @@ test_that("logfc.threshold warns when none met", { results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, min.pct = 0.5, verbose = FALSE, base = exp(1), pseudocount.use = 1)) test_that("min.pct works", { - expect_equal(nrow(x = results), 65) + expect_equal(nrow(x = results), 66) expect_gte(min(apply(X = results, MARGIN = 1, FUN = function(x) max(x[3], x[4]))), 0.5) }) @@ -184,13 +184,13 @@ test_that("min.diff.pct warns when none met", { results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, only.pos = TRUE, verbose = FALSE, base = exp(1), pseudocount.use = 1)) test_that("only.pos works", { - expect_equal(nrow(x = results), 116) + expect_equal(nrow(x = results), 127) expect_true(all(results$avg_logFC > 0)) }) results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, max.cells.per.ident = 20, verbose = FALSE, base = exp(1),pseudocount.use = 1)) test_that("max.cells.per.ident works", { - expect_equal(nrow(x = results), 201) + expect_equal(nrow(x = results), 222) expect_equal(results[1, "p_val"], 3.428568e-08, tolerance = 1e-13) expect_equal(results[1, "avg_logFC"], -2.539289, tolerance = 1e-6) expect_equal(results[1, "pct.1"], 0.111) @@ -203,7 +203,7 @@ results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident. test_that("latent.vars works", { expect_error(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, latent.vars= "fake", verbose = FALSE)) expect_warning(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, latent.vars= "groups", verbose = FALSE)) - expect_equal(nrow(x = results), 201) + expect_equal(nrow(x = results), 222) expect_equal(results[1, "p_val"], 2.130202e-16, tolerance = 1e-21) expect_equal(results[1, "avg_logFC"], -3.082150, tolerance = 1e-6) expect_equal(results[1, "pct.1"], 0.417) @@ -218,7 +218,7 @@ Idents(object = t2) <- "groups" results2 <- suppressWarnings(FindMarkers(object = t2, ident.1 = "g1", ident.2 = "g2", verbose = FALSE, base = exp(1), pseudocount.use = 1)) test_that("group.by works", { - expect_equal(nrow(x = results), 136) + expect_equal(nrow(x = results), 188) expect_equal(results, results2) expect_equal(results[1, "p_val"], 0.02870319) expect_equal(results[1, "avg_logFC"], 0.8226720, tolerance = 1e-6) @@ -234,7 +234,7 @@ Idents(object = t2) <- "groups" results2 <- suppressWarnings(FindMarkers(object = t2, ident.1 = "g1", ident.2 = "g2", verbose = FALSE, base = exp(1), pseudocount.use = 1)) test_that("subset.ident works", { - expect_equal(nrow(x = results), 127) + expect_equal(nrow(x = results), 182) expect_equal(results, results2) expect_equal(results[1, "p_val"], 0.01293720) expect_equal(results[1, "avg_logFC"], 1.799280, tolerance = 1e-6) @@ -254,7 +254,7 @@ test_that("reduction works", { results <- FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, test.use = "bimod", verbose = FALSE, base = exp(1), pseudocount.use = 1) test_that("bimod test works", { - expect_equal(nrow(x = results), 201) + expect_equal(nrow(x = results), 222) expect_equal(results[1, "p_val"], 4.751376e-17, tolerance = 1e-22) expect_equal(results[1, "avg_logFC"], -2.552769, tolerance = 1e-6) expect_equal(results[1, "pct.1"], 0.306) @@ -265,7 +265,7 @@ test_that("bimod test works", { results <- FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, test.use = "roc", verbose = FALSE, base = exp(1), pseudocount.use = 1) test_that("roc test works", { - expect_equal(nrow(x = results), 201) + expect_equal(nrow(x = results), 222) # expect_equal(colnames(x = results), c("myAUC", "avg_diff", "power", "pct.1", "pct.2")) expect_equal(colnames(x = results), c("myAUC", "avg_diff", "power", "avg_logFC", "pct.1", "pct.2")) expect_equal(results["CST3", "myAUC"], 0.018) @@ -278,7 +278,7 @@ test_that("roc test works", { results <- FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, test.use = "t", verbose = FALSE, base = exp(1), pseudocount.use = 1) test_that("t test works", { - expect_equal(nrow(x = results), 201) + expect_equal(nrow(x = results), 222) expect_equal(results["CST3", "p_val"], 1.170112e-15, tolerance = 1e-20) expect_equal(results["CST3", "avg_logFC"], -2.552769 , tolerance = 1e-6) expect_equal(results["CST3", "pct.1"], 0.306) @@ -289,7 +289,7 @@ test_that("t test works", { results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, test.use = "negbinom", verbose = FALSE, base = exp(1), pseudocount.use = 1)) test_that("negbinom test works", { - expect_equal(nrow(x = results), 149) + expect_equal(nrow(x = results), 188) expect_equal(results["CST3", "p_val"], 1.354443e-17, tolerance = 1e-22) expect_equal(results["CST3", "avg_logFC"], -2.353701, tolerance = 1e-6) expect_equal(results["CST3", "pct.1"], 0.306) @@ -300,7 +300,7 @@ test_that("negbinom test works", { results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, test.use = "poisson", verbose = FALSE, base = exp(1), pseudocount.use = 1)) test_that("poisson test works", { - expect_equal(nrow(x = results), 149) + expect_equal(nrow(x = results), 188) expect_equal(results["CST3", "p_val"], 3.792196e-78, tolerance = 1e-83) expect_equal(results["CST3", "avg_logFC"], -2.353701, tolerance = 1e-6) expect_equal(results["CST3", "pct.1"], 0.306) @@ -311,7 +311,7 @@ test_that("poisson test works", { results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, test.use = "LR", verbose = FALSE, base = exp(1), pseudocount.use = 1)) test_that("LR test works", { - expect_equal(nrow(x = results), 201) + expect_equal(nrow(x = results), 222) expect_equal(results["CST3", "p_val"], 3.990707e-16, tolerance = 1e-21) expect_equal(results["CST3", "avg_logFC"], -2.552769, tolerance = 1e-6) expect_equal(results["CST3", "pct.1"], 0.306) @@ -343,7 +343,7 @@ test_that("FindAllMarkers works as expected", { expect_equal(results.clr[1, "pct.1"], 0.083) expect_equal(results.clr[1, "pct.2"], 0.909) expect_equal(results.clr[1, "p_val_adj"], 3.079373e-10, tolerance = 1e-15) - expect_equal(nrow(x = results.clr), 200) + expect_equal(nrow(x = results.clr), 222) expect_equal(rownames(x = results.clr)[1], "HLA-DPB1") # SCT normalization @@ -352,7 +352,7 @@ test_that("FindAllMarkers works as expected", { expect_equal(results.sct[1, "pct.1"], 0.111) expect_equal(results.sct[1, "pct.2"], 0.909) expect_equal(results.sct[1, "p_val_adj"], 3.006566e-10, tolerance = 1e-15) - expect_equal(nrow(x = results.sct), 202) + expect_equal(nrow(x = results.sct), 204) expect_equal(rownames(x = results.sct)[1], "HLA-DPB1") # pseudocount.use = 0.1 @@ -424,7 +424,7 @@ if (requireNamespace('metap', quietly = TRUE)) { expect_equal(markers[1, "g1_p_val_adj"], 9.077279e-06) expect_equal(markers[1, "max_pval"], 4.983576e-05) expect_equal(markers[1, "minimump_p_val"], 7.893286e-08, tolerance = 1e-13) - expect_equal(nrow(markers), 179) + expect_equal(nrow(markers), 217) expect_equal(rownames(markers)[1], "HLA-DRB1") expect_equal(markers[, "max_pval"], unname(obj = apply(X = markers, MARGIN = 1, FUN = function(x) max(x[c("g1_p_val", "g2_p_val")])))) }) @@ -449,7 +449,7 @@ if (requireNamespace('metap', quietly = TRUE)) { # expect_equal(markers.missing[1, "g2_pct.1"], 0.062) expect_equal(markers.missing[1, "g2_pct.2"], 0.95) expect_equal(markers.missing[1, "g2_p_val_adj"], 3.847695e-11, tolerance = 1e-16) - expect_equal(nrow(markers.missing), 205) + expect_equal(nrow(markers.missing), 225) expect_equal(rownames(markers.missing)[1], "HLA-DPB1") }) } From de7a4cc9618fe73cb0ad4293fea5c016e430301e Mon Sep 17 00:00:00 2001 From: gesmira <59940281+Gesmira@users.noreply.github.com> Date: Fri, 13 Oct 2023 15:59:34 -0400 Subject: [PATCH 857/979] selection method --- R/preprocessing5.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 0c36347ea..77b5f0ae2 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -29,6 +29,7 @@ FindVariableFeatures.default <- function( method = VST, nfeatures = 2000L, verbose = TRUE, + selection.method = selection.method, ... ) { if (is_quosure(x = method)) { From 1fb637417a1af585096ae56e824d393e25ad8512 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Fri, 13 Oct 2023 16:13:54 -0400 Subject: [PATCH 858/979] Change invoke -> exec, FilterObjects -> .FilterObjects --- R/integration.R | 30 +++++++++++++++--------------- R/objects.R | 9 +++++---- R/utilities.R | 3 ++- 3 files changed, 22 insertions(+), 20 deletions(-) diff --git a/R/integration.R b/R/integration.R index 97f093749..8c80193e7 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1868,7 +1868,7 @@ IntegrateEmbeddings.TransferAnchorSet <- function( #' @param reduction.key Key for new dimensional reduction; defaults to creating #' one from \code{reduction.name} #' @param layers Names of layers for correction. -#' @param sketched.layers Names of sketched layers, defaults to all +#' @param sketched.layers Names of sketched layers, defaults to all #' layers of \dQuote{\code{object[[assay]]}} #' @param seed A positive integer. The seed for the random number generator, defaults to 123. #' @param verbose Print progress and message @@ -2165,7 +2165,7 @@ LocalStruct <- function( #' reference UMAP using \code{\link{ProjectUMAP}}} #' } #' -#' @importFrom rlang invoke +#' @importFrom rlang exec #' #' @export #' @concept integration @@ -2285,7 +2285,7 @@ MapQuery <- function( integrateembeddings.args$weight.reduction <- integrateembeddings.args$weight.reduction %||% anchor.reduction slot(object = query, name = "tools")$TransferData <- NULL reuse.weights.matrix <- FALSE - query <- invoke( + query <- exec( .fn = TransferData, .args = c(list( anchorset = anchorset, @@ -2303,7 +2303,7 @@ MapQuery <- function( reuse.weights.matrix <- TRUE } if (anchor.reduction != "cca") { - query <- invoke( + query <- exec( .fn = IntegrateEmbeddings, .args = c(list( anchorset = anchorset, @@ -2340,7 +2340,7 @@ MapQuery <- function( query.dims <- reference.dims } ref_nn.num <- Misc(object = reference[[reduction.model]], slot = "model")$n_neighbors - query <- invoke( + query <- exec( .fn = ProjectUMAP, .args = c(list( query = query, @@ -3028,7 +3028,7 @@ SelectIntegrationFeatures <- function( } #' Select integration features -#' +#' #' @param object Seurat object #' @param nfeatures Number of features to return for integration #' @param assay Name of assay to use for integration feature selection @@ -3044,7 +3044,7 @@ SelectIntegrationFeatures <- function( #' @param layers Name of layers to use for integration feature selection #' @param verbose Print messages #' @param ... Arguments passed on to \code{method} -#' +#' #' @export #' SelectIntegrationFeatures5 <- function( @@ -3070,13 +3070,13 @@ SelectIntegrationFeatures5 <- function( } #' Select SCT integration features -#' +#' #' @param object Seurat object #' @param nfeatures Number of features to return for integration #' @param assay Name of assay to use for integration feature selection #' @param verbose Print messages #' @param ... Arguments passed on to \code{method} -#' +#' #' @export #' SelectSCTIntegrationFeatures <- function( @@ -5203,7 +5203,7 @@ if (normalization.method == 'SCT') { reference.data.list <- c() for (i in Layers(object = reference[[reference.assay]], search = "data")) { reference.data.list[[i]] <- LayerData( - object = reference[[reference.assay]], + object = reference[[reference.assay]], layer = i )[features, ] } @@ -5300,9 +5300,9 @@ ProjectCellEmbeddings.IterableMatrix <- function( } else { query <- query[features,] reference.data.list <- c() - for (i in Layers(object = reference[[reference.assay]], + for (i in Layers(object = reference[[reference.assay]], search = "data")) { - reference.data.list[[i]] <- LayerData(object = reference[[reference.assay]], + reference.data.list[[i]] <- LayerData(object = reference[[reference.assay]], layer = i)[features, ] } reference.data <- do.call(cbind, reference.data.list) @@ -5311,7 +5311,7 @@ ProjectCellEmbeddings.IterableMatrix <- function( feature.mean <- RowMeanSparse(mat = reference.data) } else if (inherits(x = reference.data, what = "IterableMatrix")) { bp.stats <- BPCells::matrix_stats( - matrix = reference.data, + matrix = reference.data, row_stats = "variance") feature.mean <- bp.stats$row_stats["mean",] } else { @@ -7783,7 +7783,7 @@ FindBridgeIntegrationAnchors <- function( #' \code{\link{FindIntegrationAnchors}} #' @param verbose Print messages and progress #' -#' @importFrom rlang invoke +#' @importFrom rlang exec #' @return Returns a Seurat object with integrated dimensional reduction #' @export #' @@ -7834,7 +7834,7 @@ FastRPCAIntegration <- function( } ) - anchor <- invoke( + anchor <- exec( .fn = FindIntegrationAnchors, .args = c(list( object.list = object.list, diff --git a/R/objects.R b/R/objects.R index ba1d88137..f57dbb9dc 100644 --- a/R/objects.R +++ b/R/objects.R @@ -578,12 +578,12 @@ DietSeurat <- function( }, error = function(e) { if (lyr == "data"){ object[[assay]][[lyr]] <- sparseMatrix(i = 1, j = 1, x = 1, - dims = dim(object[[assay]][[lyr]]), + dims = dim(object[[assay]][[lyr]]), dimnames = dimnames(object[[assay]][[lyr]])) } else{ slot(object = object[[assay]], name = lyr) <- new(Class = "dgCMatrix") } - message("Converting layer ", lyr, " in assay ", + message("Converting layer ", lyr, " in assay ", assay, " to empty dgCMatrix") object }) @@ -1286,6 +1286,7 @@ as.Seurat.SingleCellExperiment <- function( #' @concept objects #' @export #' @method as.SingleCellExperiment Seurat +#' @importFrom SeuratObject .FilterObjects #' as.SingleCellExperiment.Seurat <- function(x, assay = NULL, ...) { CheckDots(...) @@ -1348,7 +1349,7 @@ as.SingleCellExperiment.Seurat <- function(x, assay = NULL, ...) { ) } } - for (dr in FilterObjects(object = x, classes.keep = "DimReduc")) { + for (dr in .FilterObjects(object = x, classes.keep = "DimReduc")) { assay.used <- DefaultAssay(object = x[[dr]]) swap.exp <- assay.used %in% SingleCellExperiment::altExpNames(x = sce) & assay.used != orig.exp.name if (swap.exp) { @@ -3143,7 +3144,7 @@ UpdateSlots <- function(object) { ) object.list <- Filter(f = Negate(f = is.null), x = object.list) object.list <- c('Class' = class(x = object)[1], object.list) - object <- rlang::invoke( + object <- rlang::exec( .fn = new, .args = object.list ) diff --git a/R/utilities.R b/R/utilities.R index 3401b8506..62cc8c291 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -422,6 +422,7 @@ AggregateExpression <- function( #' If return.seurat is TRUE, returns an object of class \code{\link{Seurat}}. #' @export #' @concept utilities +#' @importFrom SeuratObject .FilterObjects #' #' @examples #' data("pbmc_small") @@ -471,7 +472,7 @@ AverageExpression <- function( message("As of Seurat v5, As of Seurat v5, we recommend using AggregateExpression to perform pseudo-bulk analysis.") } - object.assays <- FilterObjects(object = object, classes.keep = c('Assay', 'Assay5')) + object.assays <- .FilterObjects(object = object, classes.keep = c('Assay', 'Assay5')) assays <- assays %||% object.assays if (!all(assays %in% object.assays)) { assays <- assays[assays %in% object.assays] From e5d7208b478f48c843ca8fe629664a3904c7e005 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Fri, 13 Oct 2023 16:14:09 -0400 Subject: [PATCH 859/979] Update tests --- tests/testthat/test_load_10X.R | 2 +- tests/testthat/test_preprocessing.R | 118 ++++++++++++++-------------- 2 files changed, 60 insertions(+), 60 deletions(-) diff --git a/tests/testthat/test_load_10X.R b/tests/testthat/test_load_10X.R index 02b23e6dd..dd551def7 100644 --- a/tests/testthat/test_load_10X.R +++ b/tests/testthat/test_load_10X.R @@ -38,7 +38,7 @@ if (requireNamespace("hdf5r", quietly = TRUE)) { expect_equal(nrow(x = txsp), 100) expect_equal(Cells(x = txsp)[1], "AAACAAGTATCTCCCA-1") expect_equal(Assays(object = txsp), "Spatial") - expect_equal(GetAssayData(object = txsp[["Spatial"]], slot = "counts")[5, 9], 1) + expect_equal(GetAssayData(object = txsp[["Spatial"]], layer = "counts")[5, 9], 1) }) test_that("Read10X_Spatial handles missing files properly", { expect_error(Load10X_Spatial(data.dir = ".")) diff --git a/tests/testthat/test_preprocessing.R b/tests/testthat/test_preprocessing.R index 8289f769a..9aa8dc784 100644 --- a/tests/testthat/test_preprocessing.R +++ b/tests/testthat/test_preprocessing.R @@ -33,8 +33,8 @@ object.filtered <- CreateSeuratObject( ) test_that("Filtering handled properly", { - expect_equal(nrow(x = GetAssayData(object = object.filtered, slot = "counts")), 163) - expect_equal(ncol(x = GetAssayData(object = object.filtered, slot = "counts")), 77) + expect_equal(nrow(x = LayerData(object = object.filtered, layer = "counts")), 163) + expect_equal(ncol(x = LayerData(object = object.filtered, layer = "counts")), 77) }) #this should be moved to seurat object @@ -52,22 +52,22 @@ context("NormalizeData") test_that("NormalizeData error handling", { expect_error(NormalizeData(object = object, assay = "FAKE")) expect_equal( - object = GetAssayData( + object = LayerData( object = NormalizeData( object = object, normalization.method = NULL, verbose = FALSE ), - slot = "data" + layer = "data" ), - expected = GetAssayData(object = object, slot = "counts") + expected = LayerData(object = object, layer = "counts") ) }) object <- NormalizeData(object = object, verbose = FALSE, scale.factor = 1e6) test_that("NormalizeData scales properly", { - expect_equal(GetAssayData(object = object, slot = "data")[2, 1], 9.567085, tolerance = 1e-6) - expect_equal(GetAssayData(object = object, slot = "data")[161, 55], 8.415309, tolerance = 1e-6) + expect_equal(LayerData(object = object, layer = "data")[2, 1], 9.567085, tolerance = 1e-6) + expect_equal(LayerData(object = object, layer = "data")[161, 55], 8.415309, tolerance = 1e-6) expect_equal(Command(object = object, command = "NormalizeData.RNA", value = "scale.factor"), 1e6) expect_equal(Command(object = object, command = "NormalizeData.RNA", value = "normalization.method"), "LogNormalize") }) @@ -75,8 +75,8 @@ test_that("NormalizeData scales properly", { normalized.data <- LogNormalize(data = GetAssayData(object = object[["RNA"]], layer = "counts"), verbose = FALSE) test_that("LogNormalize normalizes properly", { expect_equal( - as.matrix(LogNormalize(data = GetAssayData(object = object[["RNA"]], slot = "counts"), verbose = FALSE)), - as.matrix(LogNormalize(data = as.data.frame(as.matrix(GetAssayData(object = object[["RNA"]], slot = "counts"))), verbose = FALSE)) + as.matrix(LogNormalize(data = GetAssayData(object = object[["RNA"]], layer = "counts"), verbose = FALSE)), + as.matrix(LogNormalize(data = as.data.frame(as.matrix(GetAssayData(object = object[["RNA"]], layer = "counts"))), verbose = FALSE)) ) }) @@ -107,26 +107,26 @@ if(class(object[['RNA']]) == "Assay5") { object$groups <- fake.groups object.split <- CreateSeuratObject(split(object[["RNA"]], f = object$groups)) object.split <- NormalizeData(object = object.split) - + group1 <- subset(object, groups==1) group1 <- NormalizeData(group1) - + test_that("Normalization is performed for each layer", { - expect_equal(Layers(object.split),c("counts.1", "counts.2", "data.1", "data.2")) + expect_equal(Layers(object.split),c("counts.1", "counts.2", "data.1", "data.2")) expect_equal(group1[['RNA']]$data, LayerData(object.split, layer="data.1")) }) - + object.split <- NormalizeData(object = object.split, normalization.method = "CLR", verbose = FALSE) group1 <- NormalizeData(object = group1, normalization.method = "CLR", verbose = FALSE) test_that("CLR normalization works with multiple layers", { - expect_equal(Layers(object.split),c("counts.1", "counts.2", "data.1", "data.2")) + expect_equal(Layers(object.split),c("counts.1", "counts.2", "data.1", "data.2")) expect_equal(group1[['RNA']]$data, LayerData(object.split, layer="data.1")) - }) - + }) + object.split <- NormalizeData(object = object.split, normalization.method = "RC", verbose = FALSE) group1 <- NormalizeData(object = group1, normalization.method = "RC", verbose = FALSE) test_that("RC normalization works with multiple layers", { - expect_equal(Layers(object.split),c("counts.1", "counts.2", "data.1", "data.2")) + expect_equal(Layers(object.split),c("counts.1", "counts.2", "data.1", "data.2")) expect_equal(group1[['RNA']]$data, LayerData(object.split, layer="data.1")) }) } @@ -136,12 +136,12 @@ if(class(object[['RNA']]) == "Assay5") { context("ScaleData") object <- ScaleData(object, verbose = FALSE) test_that("ScaleData returns expected values when input is a sparse matrix", { - expect_equal(GetAssayData(object = object[["RNA"]], slot = "scale.data")[1, 1], -0.4148587, tolerance = 1e-6) - expect_equal(GetAssayData(object = object[["RNA"]], slot = "scale.data")[75, 25], -0.2562305, tolerance = 1e-6) - expect_equal(GetAssayData(object = object[["RNA"]], slot = "scale.data")[162, 59], -0.4363939, tolerance = 1e-6) + expect_equal(GetAssayData(object = object[["RNA"]], layer = "scale.data")[1, 1], -0.4148587, tolerance = 1e-6) + expect_equal(GetAssayData(object = object[["RNA"]], layer = "scale.data")[75, 25], -0.2562305, tolerance = 1e-6) + expect_equal(GetAssayData(object = object[["RNA"]], layer = "scale.data")[162, 59], -0.4363939, tolerance = 1e-6) }) -new.data <- as.matrix(GetAssayData(object = object[["RNA"]], slot = "data")) +new.data <- as.matrix(GetAssayData(object = object[["RNA"]], layer = "data")) new.data[1, ] <- rep(x = 0, times = ncol(x = new.data)) object2 <- object @@ -154,13 +154,13 @@ object2 <- ScaleData(object = object2, verbose = FALSE) object <- ScaleData(object = object, verbose = FALSE) test_that("ScaleData returns expected values when input is not sparse", { - expect_equal(GetAssayData(object = object[["RNA"]], slot = "scale.data")[75, 25], -0.2562305, tolerance = 1e-6) - expect_equal(GetAssayData(object = object[["RNA"]], slot = "scale.data")[162, 59], -0.4363939, tolerance = 1e-6) + expect_equal(GetAssayData(object = object[["RNA"]], layer = "scale.data")[75, 25], -0.2562305, tolerance = 1e-6) + expect_equal(GetAssayData(object = object[["RNA"]], layer = "scale.data")[162, 59], -0.4363939, tolerance = 1e-6) }) test_that("ScaleData handles zero variance features properly", { - expect_equal(GetAssayData(object = object2[["RNA"]], slot = "scale.data")[1, 1], 0) - expect_equal(GetAssayData(object = object2[["RNA"]], slot = "scale.data")[1, 80], 0) + expect_equal(GetAssayData(object = object2[["RNA"]], layer = "scale.data")[1, 1], 0) + expect_equal(GetAssayData(object = object2[["RNA"]], layer = "scale.data")[1, 80], 0) }) ng1 <- rep(x = "g1", times = round(x = ncol(x = object) / 2)) @@ -173,20 +173,20 @@ object <- ScaleData(object = object, features = rownames(x = object), verbose = #move to SeuratObject # test_that("split.by option works", { -# expect_equal(GetAssayData(object = object, slot = "scale.data")[, Cells(x = g1)], -# GetAssayData(object = g1, slot = "scale.data")) -# expect_equal(GetAssayData(object = object, slot = "scale.data")[, Cells(x = g2)], -# GetAssayData(object = g2, slot = "scale.data")) +# expect_equal(GetAssayData(object = object, layer = "scale.data")[, Cells(x = g1)], +# GetAssayData(object = g1, layer = "scale.data")) +# expect_equal(GetAssayData(object = object, layer = "scale.data")[, Cells(x = g2)], +# GetAssayData(object = g2, layer = "scale.data")) # }) g1 <- ScaleData(object = g1, features = rownames(x = g1), vars.to.regress = "nCount_RNA", verbose = FALSE) g2 <- ScaleData(object = g2, features = rownames(x = g2), vars.to.regress = "nCount_RNA", verbose = FALSE) object <- ScaleData(object = object, features = rownames(x = object), verbose = FALSE, split.by = "group", vars.to.regress = "nCount_RNA") test_that("split.by option works with regression", { - expect_equal(GetAssayData(object = object, slot = "scale.data")[, Cells(x = g1)], - GetAssayData(object = g1, slot = "scale.data")) - expect_equal(GetAssayData(object = object, slot = "scale.data")[, Cells(x = g2)], - GetAssayData(object = g2, slot = "scale.data")) + expect_equal(LayerData(object = object, layer = "scale.data")[, Cells(x = g1)], + LayerData(object = g1, layer = "scale.data")) + expect_equal(LayerData(object = object, layer = "scale.data")[, Cells(x = g2)], + LayerData(object = g2, layer = "scale.data")) }) @@ -201,10 +201,10 @@ object <- ScaleData( model.use = "linear") test_that("Linear regression works as expected", { - expect_equal(dim(x = GetAssayData(object = object[["RNA"]], slot = "scale.data")), c(10, 80)) - expect_equal(GetAssayData(object = object[["RNA"]], slot = "scale.data")[1, 1], -0.6436435, tolerance = 1e-6) - expect_equal(GetAssayData(object = object[["RNA"]], slot = "scale.data")[5, 25], -0.09035383, tolerance = 1e-6) - expect_equal(GetAssayData(object = object[["RNA"]], slot = "scale.data")[10, 80], -0.2723782, tolerance = 1e-6) + expect_equal(dim(x = GetAssayData(object = object[["RNA"]], layer = "scale.data")), c(10, 80)) + expect_equal(GetAssayData(object = object[["RNA"]], layer = "scale.data")[1, 1], -0.6436435, tolerance = 1e-6) + expect_equal(GetAssayData(object = object[["RNA"]], layer = "scale.data")[5, 25], -0.09035383, tolerance = 1e-6) + expect_equal(GetAssayData(object = object[["RNA"]], layer = "scale.data")[10, 80], -0.2723782, tolerance = 1e-6) }) object <- ScaleData( @@ -215,10 +215,10 @@ object <- ScaleData( model.use = "negbinom") test_that("Negative binomial regression works as expected", { - expect_equal(dim(x = GetAssayData(object = object[["RNA"]], slot = "scale.data")), c(10, 80)) - expect_equal(GetAssayData(object = object[["RNA"]], slot = "scale.data")[1, 1], -0.5888811, tolerance = 1e-6) - expect_equal(GetAssayData(object = object[["RNA"]], slot = "scale.data")[5, 25], -0.2553394, tolerance = 1e-6) - expect_equal(GetAssayData(object = object[["RNA"]], slot = "scale.data")[10, 80], -0.1921429, tolerance = 1e-6) + expect_equal(dim(x = GetAssayData(object = object[["RNA"]], layer = "scale.data")), c(10, 80)) + expect_equal(GetAssayData(object = object[["RNA"]], layer = "scale.data")[1, 1], -0.5888811, tolerance = 1e-6) + expect_equal(GetAssayData(object = object[["RNA"]], layer = "scale.data")[5, 25], -0.2553394, tolerance = 1e-6) + expect_equal(GetAssayData(object = object[["RNA"]], layer = "scale.data")[10, 80], -0.1921429, tolerance = 1e-6) }) test_that("Regression error handling checks out", { @@ -233,10 +233,10 @@ object <- ScaleData( model.use = "poisson") test_that("Poisson regression works as expected", { - expect_equal(dim(x = GetAssayData(object = object[["RNA"]], slot = "scale.data")), c(10, 80)) - expect_equal(GetAssayData(object = object[["RNA"]], slot = "scale.data")[1, 1], -1.011717, tolerance = 1e-6) - expect_equal(GetAssayData(object = object[["RNA"]], slot = "scale.data")[5, 25], 0.05575307, tolerance = 1e-6) - expect_equal(GetAssayData(object = object[["RNA"]], slot = "scale.data")[10, 80], -0.1662119, tolerance = 1e-6) + expect_equal(dim(x = GetAssayData(object = object[["RNA"]], layer = "scale.data")), c(10, 80)) + expect_equal(GetAssayData(object = object[["RNA"]], layer = "scale.data")[1, 1], -1.011717, tolerance = 1e-6) + expect_equal(GetAssayData(object = object[["RNA"]], layer = "scale.data")[5, 25], 0.05575307, tolerance = 1e-6) + expect_equal(GetAssayData(object = object[["RNA"]], layer = "scale.data")[10, 80], -0.1662119, tolerance = 1e-6) }) @@ -245,22 +245,22 @@ test_that("Poisson regression works as expected", { context("SampleUMI") downsampled.umis <- SampleUMI( - data = GetAssayData(object = object, slot = "counts"), + data = LayerData(object = object, layer = "counts"), max.umi = 100, verbose = FALSE ) downsampled.umis.p.cell <- SampleUMI( - data = GetAssayData(object = object, slot = "counts"), + data = LayerData(object = object, layer = "counts"), max.umi = seq(50, 1640, 20), verbose = FALSE, upsample = TRUE ) test_that("SampleUMI gives reasonable downsampled/upsampled UMI counts", { expect_true(!any(colSums(x = downsampled.umis) < 30, colSums(x = downsampled.umis) > 120)) - expect_error(SampleUMI(data = GetAssayData(object = object, slot = "raw.data"), max.umi = rep(1, 5))) + expect_error(SampleUMI(data = LayerData(object = object, layer = "raw.data"), max.umi = rep(1, 5))) expect_true(!is.unsorted(x = colSums(x = downsampled.umis.p.cell))) expect_error(SampleUMI( - data = GetAssayData(object = object, slot = "counts"), + data = LayerData(object = object, layer = "counts"), max.umi = seq(50, 900, 10), verbose = FALSE, upsample = TRUE @@ -381,13 +381,13 @@ test_that("SCTransform ncells param works", { }) suppressWarnings(object[["SCT_SAVE"]] <- object[["SCT"]]) -object[["SCT"]] <- SetAssayData(object = object[["SCT"]], slot = "scale.data", new.data = GetAssayData(object = object[["SCT"]], slot = "scale.data")[1:100, ]) +object[["SCT"]] <- SetAssayData(object = object[["SCT"]], slot = "scale.data", new.data = GetAssayData(object = object[["SCT"]], layer = "scale.data")[1:100, ]) object <- GetResidual(object = object, features = rownames(x = object), verbose = FALSE) test_that("GetResidual works", { - expect_equal(dim(GetAssayData(object = object[["SCT"]], slot = "scale.data")), c(220, 80)) + expect_equal(dim(GetAssayData(object = object[["SCT"]], layer = "scale.data")), c(220, 80)) expect_equal( - GetAssayData(object = object[["SCT"]], slot = "scale.data"), - GetAssayData(object = object[["SCT_SAVE"]], slot = "scale.data") + GetAssayData(object = object[["SCT"]], layer = "scale.data"), + GetAssayData(object = object[["SCT_SAVE"]], layer = "scale.data") ) expect_warning(GetResidual(object, features = "asd")) }) @@ -396,12 +396,12 @@ object <- suppressWarnings(SCTransform(object = object, verbose = FALSE, vst.fla test_that("SCTransform v2 works as expected", { expect_true("SCT" %in% names(object)) - expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "scale.data"))[1]), 24.5813, tolerance = 1e-4) - expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "scale.data"))[5]), 0) - expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "data"))[1]), 58.65829, tolerance = 1e-6) - expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "data"))[5]), 13.75449, tolerance = 1e-6) - expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[1]), 141) - expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[5]), 40) + expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], layer = "scale.data"))[1]), 24.5813, tolerance = 1e-4) + expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], layer = "scale.data"))[5]), 0) + expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], layer = "data"))[1]), 58.65829, tolerance = 1e-6) + expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], layer = "data"))[5]), 13.75449, tolerance = 1e-6) + expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], layer = "counts"))[1]), 141) + expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], layer = "counts"))[5]), 40) expect_equal(length(VariableFeatures(object[["SCT"]])), 220) fa <- SCTResults(object = object, assay = "SCT", slot = "feature.attributes") expect_equal(fa["MS4A1", "detection_rate"], 0.15) From e8fefd3c72baad31170c1373856496769dda5581 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Fri, 13 Oct 2023 16:21:10 -0400 Subject: [PATCH 860/979] Update tests --- R/objects.R | 2 +- tests/testthat/test_transferdata.R | 10 +++++----- tests/testthat/test_visualization.R | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/R/objects.R b/R/objects.R index f57dbb9dc..3f0306960 100644 --- a/R/objects.R +++ b/R/objects.R @@ -3146,7 +3146,7 @@ UpdateSlots <- function(object) { object.list <- c('Class' = class(x = object)[1], object.list) object <- rlang::exec( .fn = new, - .args = object.list + !!! object.list ) for (x in setdiff(x = slotNames(x = object), y = names(x = object.list))) { xobj <- slot(object = object, name = x) diff --git a/tests/testthat/test_transferdata.R b/tests/testthat/test_transferdata.R index bb7cb0527..7ece18912 100644 --- a/tests/testthat/test_transferdata.R +++ b/tests/testthat/test_transferdata.R @@ -8,7 +8,7 @@ query <- CreateSeuratObject( counts = as.sparse( GetAssayData( object = pbmc_small[['RNA']], - slot = "counts") + rpois(n = ncol(pbmc_small), + layer = "counts") + rpois(n = ncol(pbmc_small), lambda = 1 ) ) @@ -36,8 +36,8 @@ test_that("TransferData default work", { # continuous assay data pred.assay <- TransferData(anchorset = anchors, refdata = GetAssayData(ref[["RNA"]]), verbose = FALSE) expect_equal(dim(pred.assay), c(230, 80)) - expect_equal(GetAssayData(pred.assay, slot = "counts"), new("matrix")) - expect_equal(GetAssayData(pred.assay, slot = "scale.data"), new("matrix")) + expect_equal(GetAssayData(pred.assay, layer = "counts"), new("matrix")) + expect_equal(GetAssayData(pred.assay, layer = "scale.data"), new("matrix")) expect_equal(colnames(pred.assay), Cells(query)) expect_equal(rownames(pred.assay), rownames(ref[["RNA"]])) expect_equal(sum(GetAssayData(pred.assay)[1, ]), 64.46388, tolerance = 1e-6) @@ -51,8 +51,8 @@ test_that("TransferData can return predictions assay, ", { pred.assay <- TransferData(anchorset = anchors, refdata = ref$RNA_snn_res.1, prediction.assay = TRUE, verbose = FALSE) expect_true(inherits(pred.assay, "Assay")) expect_equal(dim(pred.assay), c(4, 80)) - expect_equal(GetAssayData(pred.assay, slot = "counts"), new("matrix")) - expect_equal(GetAssayData(pred.assay, slot = "scale.data"), new("matrix")) + expect_equal(GetAssayData(pred.assay, layer = "counts"), new("matrix")) + expect_equal(GetAssayData(pred.assay, layer = "scale.data"), new("matrix")) expect_equal(colnames(pred.assay), Cells(query)) expect_equal(pred.assay@var.features, logical(0)) expect_equal(ncol(pred.assay@meta.features), 0) diff --git a/tests/testthat/test_visualization.R b/tests/testthat/test_visualization.R index bb4513552..037c6ceaf 100644 --- a/tests/testthat/test_visualization.R +++ b/tests/testthat/test_visualization.R @@ -4,7 +4,7 @@ set.seed(42) # Tests for visualization utilities # ------------------------------------------------------------------------------ -pbmc_small[["tsne_new"]] <- CollapseEmbeddingOutliers(pbmc_small, +pbmc_small[["tsne_new"]] <- CollapseEmbeddingOutliers(pbmc_small, reduction = "tsne", reduction.key = 'tsne_', outlier.sd = 0.5) test_that("CollapseEmbeddingOutliers works", { From d8facf657738e274dd28ddcd5ffd9554a7b23993 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Fri, 13 Oct 2023 16:25:55 -0400 Subject: [PATCH 861/979] Fix tests --- R/utilities.R | 2 +- tests/testthat/test_dimensional_reduction.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index 62cc8c291..ff647a436 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -538,7 +538,7 @@ AverageExpression <- function( assay = assays[1], category.matrix = category.matrix, features = features[[1]], - slot = "counts" + layer = "counts" ) toRet <- CreateSeuratObject( counts = summed.counts, diff --git a/tests/testthat/test_dimensional_reduction.R b/tests/testthat/test_dimensional_reduction.R index 6dac7a9c8..16cd09a97 100644 --- a/tests/testthat/test_dimensional_reduction.R +++ b/tests/testthat/test_dimensional_reduction.R @@ -51,7 +51,7 @@ test_that("pca returns total variance (see #982)", { )) # Using stats::prcomp - scaled_data <- Seurat::LayerData(object = obj, layer = "scale.data") + scaled_data <- LayerData(object = obj, layer = "scale.data") prcomp_result <- stats::prcomp(scaled_data, center = FALSE, scale. = FALSE) # Compare From 7444a42696dfc3340bff3d231465dead4617ac3b Mon Sep 17 00:00:00 2001 From: Izzy Grabski Date: Fri, 13 Oct 2023 16:40:10 -0400 Subject: [PATCH 862/979] Fixed overflow.check error --- R/differential_expression.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/differential_expression.R b/R/differential_expression.R index 034d4c027..d8a18a559 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -2498,12 +2498,12 @@ WilcoxDETest <- function( group.info[cells.1, "group"] <- "Group1" group.info[cells.2, "group"] <- "Group2" group.info[, "group"] <- factor(x = group.info[, "group"]) - if (presto.check[1] && overflow.check && (!limma)) { + if (presto.check[1] && (!limma)) { data.use <- data.use[, rownames(group.info), drop = FALSE] res <- presto::wilcoxauc(X = data.use, y = group.info[, "group"]) res <- res[1:(nrow(x = res)/2),] p_val <- res$pval - } else if (overflow.check) { + } else { if (getOption('Seurat.presto.wilcox.msg', TRUE) && (!limma)) { message( "For a (much!) faster implementation of the Wilcoxon Rank Sum Test,", @@ -2518,7 +2518,7 @@ WilcoxDETest <- function( ) options(Seurat.presto.wilcox.msg = FALSE) } - if (limma.check[1]) { + if (limma.check[1] && overflow.check) { p_val <- my.sapply( X = 1:nrow(x = data.use), FUN = function(x) { @@ -2526,7 +2526,7 @@ WilcoxDETest <- function( } ) } else { - if (limma) { + if (limma && overflow.check) { stop( "To use the limma implementation of the Wilcoxon Rank Sum Test, please install the limma package: From 4d84d5f650ecccde8529ceac866edb72ed0d19bf Mon Sep 17 00:00:00 2001 From: Izzy Grabski Date: Fri, 13 Oct 2023 16:45:37 -0400 Subject: [PATCH 863/979] Update version number --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index c8c65ae46..e20a1f136 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 4.9.9.9071 +Version: 4.9.9.9072 Date: 2023-10-04 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. From 7792a55b05dc69ca209f6273b069b628cdfeb1a8 Mon Sep 17 00:00:00 2001 From: Izzy Grabski Date: Fri, 13 Oct 2023 16:56:52 -0400 Subject: [PATCH 864/979] Update RcppExports.cpp --- src/RcppExports.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 540e5c2d8..7a3302c6b 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -402,7 +402,7 @@ BEGIN_RCPP END_RCPP } -RcppExport SEXP isnull(void *); +RcppExport SEXP isnull(SEXP); static const R_CallMethodDef CallEntries[] = { {"_Seurat_RunModularityClusteringCpp", (DL_FUNC) &_Seurat_RunModularityClusteringCpp, 9}, From b00c9c0af6049a4d840a5fc0380843f7e9e4b009 Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Fri, 13 Oct 2023 17:25:22 -0400 Subject: [PATCH 865/979] fix warnings if correct layer not found in ScaleData and PrepDR5 --- R/dimensional_reduction.R | 7 ++++--- R/preprocessing5.R | 6 +++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R index 9d095ebac..86f0674ee 100644 --- a/R/dimensional_reduction.R +++ b/R/dimensional_reduction.R @@ -2418,10 +2418,11 @@ PrepDR <- function( PrepDR5 <- function(object, features = NULL, layer = 'scale.data', verbose = TRUE) { layer <- layer[1L] - if (!(layer %in% Layers(object = object))) { - abort(paste0("Layer '", layer, "' not found. Please run ScaleData and retry")) + olayer <- layer + layer <- Layers(object = object, search = layer) + if (is.null(layer)) { + abort(paste0("No layer matching pattern '", olayer, "' not found. Please run ScaleData and retry")) } - layer <- match.arg(arg = layer, choices = Layers(object = object)) data.use <- LayerData(object = object, layer = layer) features <- features %||% VariableFeatures(object = object) if (!length(x = features)) { diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 77b5f0ae2..4a70b036c 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -434,10 +434,10 @@ ScaleData.StdAssay <- function( ) { use.umi <- ifelse(test = model.use != 'linear', yes = TRUE, no = use.umi) olayer <- layer <- unique(x = layer) - if (!(layer %in% Layers(object = object))) { - abort(paste0("Layer '", layer, "' not found. Please run NormalizeData and retry")) - } layer <- Layers(object = object, search = layer) + if (is.null(layer)) { + abort(paste0("No layer matching pattern '", olayer, "' found. Please run NormalizeData and retry")) + } if (isTRUE(x = use.umi)) { layer <- "counts" inform( From b1499780c6a04b6bbca275615cfffd868ebb54cc Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Fri, 13 Oct 2023 17:48:22 -0400 Subject: [PATCH 866/979] Update NAMESPACE --- NAMESPACE | 1 - man/ProjectIntegration.Rd | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index b6e01c348..193fc35e4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -801,7 +801,6 @@ importFrom(rlang,check_installed) importFrom(rlang,enquo) importFrom(rlang,exec) importFrom(rlang,inform) -importFrom(rlang,invoke) importFrom(rlang,is_integerish) importFrom(rlang,is_na) importFrom(rlang,is_quosure) diff --git a/man/ProjectIntegration.Rd b/man/ProjectIntegration.Rd index c61c4648c..84f3ea9fe 100644 --- a/man/ProjectIntegration.Rd +++ b/man/ProjectIntegration.Rd @@ -50,7 +50,7 @@ for all cells (default is 'sketch'). Can be one of: \item{ratio}{Sketch ratio of data slot when \code{dictionary.method} is set to \dQuote{\code{sketch}}; defaults to 0.8} -\item{sketched.layers}{Names of sketched layers, defaults to all +\item{sketched.layers}{Names of sketched layers, defaults to all layers of \dQuote{\code{object[[assay]]}}} \item{seed}{A positive integer. The seed for the random number generator, defaults to 123.} From 925175d798adc098cda0ab99df34a8de7f531ec1 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Fri, 13 Oct 2023 17:48:41 -0400 Subject: [PATCH 867/979] Bump version; date --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1b484a2ed..45795fa90 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9070 -Date: 2023-10-04 +Version: 4.9.9.9071 +Date: 2023-10-13 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From c394ae3c23a58cd3c1a315f0bd41cdb2c67f1782 Mon Sep 17 00:00:00 2001 From: gesmira <59940281+Gesmira@users.noreply.github.com> Date: Fri, 13 Oct 2023 18:32:25 -0400 Subject: [PATCH 868/979] bump version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 118392dfc..2dcd652a9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 4.9.9.9073 +Version: 4.9.9.9074 Date: 2023-10-14 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. From 05761cb5d539c94a59ba99073c37fb0aedf6ed5a Mon Sep 17 00:00:00 2001 From: gesmira <59940281+Gesmira@users.noreply.github.com> Date: Mon, 16 Oct 2023 12:00:39 -0400 Subject: [PATCH 869/979] fix mvp unused arguments error --- R/preprocessing5.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 414ac258c..0b98db57c 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1978,7 +1978,8 @@ MVP <- function( verbose = TRUE, nselect = 2000L, mean.cutoff = c(0.1, 8), - dispersion.cutoff = c(1, Inf) + dispersion.cutoff = c(1, Inf), + ... ) { hvf.info <- DISP(data = data, nselect = nselect, verbose = verbose) hvf.info$variable <- FALSE From 9760c263b1225fb2799c29b16f8d202c5731f5a2 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Mon, 16 Oct 2023 12:14:18 -0400 Subject: [PATCH 870/979] Fix for cell_attr in FetchResiduals for V5 assays --- R/preprocessing5.R | 4 +++- vignettes/seurat5_sctransform_v2_vignette.Rmd | 11 +++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 414ac258c..fb163a762 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1787,8 +1787,10 @@ FetchResidualSCTModel <- function( min_var <- vst_out$arguments$min_variance } if (nrow(umi)>0){ + vst_out.tmp <- vst_out + vst_out.tmp$cell_attr <- vst_out.tmp$cell_attr[colnames(x = umi),] new_residual <- get_residuals( - vst_out = vst_out, + vst_out = vst_out.tmp, umi = umi, residual_type = "pearson", min_variance = min_var, diff --git a/vignettes/seurat5_sctransform_v2_vignette.Rmd b/vignettes/seurat5_sctransform_v2_vignette.Rmd index 9f309799c..21cd4d43b 100644 --- a/vignettes/seurat5_sctransform_v2_vignette.Rmd +++ b/vignettes/seurat5_sctransform_v2_vignette.Rmd @@ -36,7 +36,7 @@ knitr::opts_chunk$set( We recently introduced [sctransform](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1874-1) to perform normalization and variance stabilization of scRNA-seq datasets. We now release an updated version ('v2'), based on [our broad analysis](https://www.biorxiv.org/content/10.1101/2021.07.07.451498v1) of 59 scRNA-seq datasets spanning a range of technologies, systems, and sequencing depths. This update improves speed and memory consumption, the stability of parameter estimates, the identification of variable features, and the the ability to perform downstream differential expression analyses. -Users can install sctransform v2 from CRAN (sctransform v0.3.3) and invoke the use of the updated method via the `vst.flavor` argument. +Users can install sctransform v2 from CRAN (sctransform v0.3.3) and invoke the use of the updated method via the `vst.flavor` argument (This is the default in SeuratV5). ```{r tldr, eval=FALSE} # install sctransform >= 0.3.3 @@ -55,16 +55,14 @@ In this vignette, we use [sctransform v2](https://github.com/satijalab/sctransfo * Compare the datasets to find cell-type specific responses to stimulation * Obtain cell type markers that are conserved in both control and stimulated cells -## Install sctransform +## Install dependencies -We will install sctransform v2 from CRAN (v0.3.3). We will also install the [glmGamPoi](https://bioconductor.org/packages/release/bioc/html/glmGamPoi.html) package which substantially improves the speed of the learning procedure. +We will install the [glmGamPoi](https://bioconductor.org/packages/release/bioc/html/glmGamPoi.html) package which substantially improves the speed of the learning procedure. -```{r results='hide', message=FALSE, warning=FALSE} +```{r results='hide', message=FALSE, warning=FALSE, eval=FALSE} # install glmGamPoi if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager") BiocManager::install("glmGamPoi") -# install sctransform from Github -install.packages("sctransform") ``` ## Setup the Seurat objects @@ -88,6 +86,7 @@ InstallData("ifnb") ```{r init, results='hide', message=FALSE, fig.keep='none'} # load dataset ifnb <- LoadData("ifnb") +ifnb <- UpdateSeuratObject(object = ifnb) ifnb[["RNA"]] <- as(ifnb[["RNA"]], Class = "Assay5") # split the dataset into a list of two seurat objects (stim and CTRL) From fac03ffbb78dd29d82e8b0e7c62aa0493003381b Mon Sep 17 00:00:00 2001 From: Gesmira Date: Mon, 16 Oct 2023 15:26:46 -0400 Subject: [PATCH 871/979] fixing integrateembeddings --- R/integration.R | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/R/integration.R b/R/integration.R index aac8ae03a..0261a6fd0 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1743,6 +1743,7 @@ IntegrateEmbeddings.IntegrationAnchorSet <- function( } #' @param reference Reference object used in anchorset construction #' @param query Query object used in anchorset construction +#' @param query.assay Name of the Assay to use from query #' @param reuse.weights.matrix Can be used in conjunction with the store.weights #' parameter in TransferData to reuse a precomputed weights matrix. #' @@ -1755,6 +1756,7 @@ IntegrateEmbeddings.TransferAnchorSet <- function( anchorset, reference, query, + query.assay = NULL, new.reduction.name = "integrated_dr", reductions = 'pcaproject', dims.to.integrate = NULL, @@ -1770,11 +1772,13 @@ IntegrateEmbeddings.TransferAnchorSet <- function( combined.object <- slot(object = anchorset, name = 'object.list')[[1]] anchors <- slot(object = anchorset, name = 'anchors') weights.matrix <- NULL + query.assay <- query.assay %||% DefaultAssay(query) ValidateParams_IntegrateEmbeddings_TransferAnchors( anchorset = anchorset, combined.object = combined.object, reference = reference, query = query, + query.assay = query.assay, reductions = reductions, dims.to.integrate = dims.to.integrate, k.weight = k.weight, @@ -6364,6 +6368,7 @@ ValidateParams_IntegrateEmbeddings_TransferAnchors <- function( combined.object , reference, query, + query.assay, reductions, dims.to.integrate, k.weight, @@ -6384,7 +6389,7 @@ ValidateParams_IntegrateEmbeddings_TransferAnchors <- function( } query.cells <- slot(object = anchorset, name = "query.cells") query.cells <- gsub(pattern = "_query", replacement = "", x = query.cells) - if (!isTRUE(x = all.equal(target = query.cells, current = colnames(x = query), check.attributes = FALSE))) { + if (!isTRUE(x = all.equal(target = query.cells, current = colnames(x = query[[query.assay]]), check.attributes = FALSE))) { stop("The set of cells used as a query in the AnchorSet does not match ", "the set of cells provided in the query object.") } @@ -6401,7 +6406,7 @@ ValidateParams_IntegrateEmbeddings_TransferAnchors <- function( reference[[reductions]] <- CreateDimReducObject(embeddings = reference.embeddings, assay = DefaultAssay(object = reference)) ModifyParam(param = "reference", value = reference) query <- RenameCells(object = query, new.names = paste0(Cells(x = query), "_query")) - query.embeddings <- Embeddings(object = combined.object[[reductions]])[Cells(x = query), ] + query.embeddings <- Embeddings(object = combined.object[[reductions]])[Cells(x = query[[query.assay]]), ] query[[reductions]] <- CreateDimReducObject(embeddings = query.embeddings, assay = DefaultAssay(object = query)) ModifyParam(param = "query", value = query) ModifyParam(param = "reductions", value = c(reductions, reductions)) From 24f917ab37a937aea453f760f34087181900c329 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Mon, 16 Oct 2023 15:37:35 -0400 Subject: [PATCH 872/979] query cells --- R/integration.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/integration.R b/R/integration.R index 0261a6fd0..d77f3ee6b 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1831,7 +1831,7 @@ IntegrateEmbeddings.TransferAnchorSet <- function( ) integrated.embeddings <- as.matrix(x = integrated.embeddings) query[[new.reduction.name]] <- CreateDimReducObject( - embeddings = t(x = integrated.embeddings[, Cells(x = query)]), + embeddings = t(x = integrated.embeddings[, Cells(x = query[[query.assay]])]), assay = DefaultAssay(object = query[[reductions[1]]]), key = paste0(new.reduction.name.safe, "_") ) From 57604c066bc736d7ac14541d452b03ddadaf7210 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Mon, 16 Oct 2023 17:13:31 -0400 Subject: [PATCH 873/979] Documentation --- man/CCAIntegration.Rd | 3 --- man/DEenrichRPlot.Rd | 4 +++- man/FindAllMarkers.Rd | 10 ++++++---- man/FindMarkers.Rd | 26 ++++++++++++++------------ man/HarmonyIntegration.Rd | 4 ---- man/IntegrateLayers.Rd | 4 ---- man/JointPCAIntegration.Rd | 3 --- man/MixscapeHeatmap.Rd | 4 +++- man/MixscapeLDA.Rd | 2 +- man/PrepLDA.Rd | 2 +- man/RPCAIntegration.Rd | 5 ++--- man/RunMixscape.Rd | 2 +- 12 files changed, 31 insertions(+), 38 deletions(-) diff --git a/man/CCAIntegration.Rd b/man/CCAIntegration.Rd index 65af0f2e9..a032bd9d2 100644 --- a/man/CCAIntegration.Rd +++ b/man/CCAIntegration.Rd @@ -14,7 +14,6 @@ CCAIntegration( features = NULL, normalization.method = c("LogNormalize", "SCT"), dims = 1:30, - groups = NULL, k.filter = NA, scale.layer = "scale.data", dims.to.integrate = NULL, @@ -47,8 +46,6 @@ or SCT} \item{dims}{Dimensions of dimensional reduction to use for integration} -\item{groups}{A one-column data frame with grouping information} - \item{k.filter}{Number of anchors to filter} \item{scale.layer}{Name of scaled layer in \code{Assay}} diff --git a/man/DEenrichRPlot.Rd b/man/DEenrichRPlot.Rd index 0bf4a36e9..0f833ee02 100644 --- a/man/DEenrichRPlot.Rd +++ b/man/DEenrichRPlot.Rd @@ -32,7 +32,7 @@ DEenrichRPlot( positive DE genes.If false, only positive DE gene will be displayed.} \item{logfc.threshold}{Limit testing to genes which show, on average, at least -X-fold difference (log-scale) between the two groups of cells. Default is 0.25 +X-fold difference (log-scale) between the two groups of cells. Default is 0.1 Increasing logfc.threshold speeds up the function, but can miss weaker signals.} \item{assay}{Assay to use in differential expression testing} @@ -43,6 +43,8 @@ Increasing logfc.threshold speeds up the function, but can miss weaker signals.} \itemize{ \item{"wilcox"} : Identifies differentially expressed genes between two groups of cells using a Wilcoxon Rank Sum test (default) + \item{"wilcox_limma"} : Identifies differentially expressed genes between two + groups of cells using the limma implementation of the Wilcoxon Rank Sum test \item{"bimod"} : Likelihood-ratio test for single cell gene expression, (McDavid et al., Bioinformatics, 2013) \item{"roc"} : Identifies 'markers' of gene expression using ROC analysis. diff --git a/man/FindAllMarkers.Rd b/man/FindAllMarkers.Rd index 622474624..3fa616abb 100644 --- a/man/FindAllMarkers.Rd +++ b/man/FindAllMarkers.Rd @@ -9,10 +9,10 @@ FindAllMarkers( object, assay = NULL, features = NULL, - logfc.threshold = 0.25, + logfc.threshold = 0.1, test.use = "wilcox", slot = "data", - min.pct = 0.1, + min.pct = 0.01, min.diff.pct = -Inf, node = NULL, verbose = TRUE, @@ -38,13 +38,15 @@ FindAllMarkers( \item{features}{Genes to test. Default is to use all genes} \item{logfc.threshold}{Limit testing to genes which show, on average, at least -X-fold difference (log-scale) between the two groups of cells. Default is 0.25 +X-fold difference (log-scale) between the two groups of cells. Default is 0.1 Increasing logfc.threshold speeds up the function, but can miss weaker signals.} \item{test.use}{Denotes which test to use. Available options are: \itemize{ \item{"wilcox"} : Identifies differentially expressed genes between two groups of cells using a Wilcoxon Rank Sum test (default) + \item{"wilcox_limma"} : Identifies differentially expressed genes between two + groups of cells using the limma implementation of the Wilcoxon Rank Sum test \item{"bimod"} : Likelihood-ratio test for single cell gene expression, (McDavid et al., Bioinformatics, 2013) \item{"roc"} : Identifies 'markers' of gene expression using ROC analysis. @@ -87,7 +89,7 @@ Increasing logfc.threshold speeds up the function, but can miss weaker signals.} \item{min.pct}{only test genes that are detected in a minimum fraction of min.pct cells in either of the two populations. Meant to speed up the function -by not testing genes that are very infrequently expressed. Default is 0.1} +by not testing genes that are very infrequently expressed. Default is 0.01} \item{min.diff.pct}{only test genes that show a minimum difference in the fraction of detection between the two groups. Set to -Inf by default} diff --git a/man/FindMarkers.Rd b/man/FindMarkers.Rd index 840f9ee40..74061e26b 100644 --- a/man/FindMarkers.Rd +++ b/man/FindMarkers.Rd @@ -19,9 +19,9 @@ FindMarkers(object, ...) cells.1 = NULL, cells.2 = NULL, features = NULL, - logfc.threshold = 0.25, + logfc.threshold = 0.1, test.use = "wilcox", - min.pct = 0.1, + min.pct = 0.01, min.diff.pct = -Inf, verbose = TRUE, only.pos = FALSE, @@ -42,9 +42,9 @@ FindMarkers(object, ...) cells.1 = NULL, cells.2 = NULL, features = NULL, - logfc.threshold = 0.25, + logfc.threshold = 0.1, test.use = "wilcox", - min.pct = 0.1, + min.pct = 0.01, min.diff.pct = -Inf, verbose = TRUE, only.pos = FALSE, @@ -68,9 +68,9 @@ FindMarkers(object, ...) cells.1 = NULL, cells.2 = NULL, features = NULL, - logfc.threshold = 0.25, + logfc.threshold = 0.1, test.use = "wilcox", - min.pct = 0.1, + min.pct = 0.01, min.diff.pct = -Inf, verbose = TRUE, only.pos = FALSE, @@ -93,9 +93,9 @@ FindMarkers(object, ...) cells.1 = NULL, cells.2 = NULL, features = NULL, - logfc.threshold = 0.25, + logfc.threshold = 0.1, test.use = "wilcox", - min.pct = 0.1, + min.pct = 0.01, min.diff.pct = -Inf, verbose = TRUE, only.pos = FALSE, @@ -121,10 +121,10 @@ FindMarkers(object, ...) slot = "data", reduction = NULL, features = NULL, - logfc.threshold = 0.25, + logfc.threshold = 0.1, pseudocount.use = 0.1, test.use = "wilcox", - min.pct = 0.1, + min.pct = 0.01, min.diff.pct = -Inf, verbose = TRUE, only.pos = FALSE, @@ -159,13 +159,15 @@ expressing} \item{features}{Genes to test. Default is to use all genes} \item{logfc.threshold}{Limit testing to genes which show, on average, at least -X-fold difference (log-scale) between the two groups of cells. Default is 0.25 +X-fold difference (log-scale) between the two groups of cells. Default is 0.1 Increasing logfc.threshold speeds up the function, but can miss weaker signals.} \item{test.use}{Denotes which test to use. Available options are: \itemize{ \item{"wilcox"} : Identifies differentially expressed genes between two groups of cells using a Wilcoxon Rank Sum test (default) + \item{"wilcox_limma"} : Identifies differentially expressed genes between two + groups of cells using the limma implementation of the Wilcoxon Rank Sum test \item{"bimod"} : Likelihood-ratio test for single cell gene expression, (McDavid et al., Bioinformatics, 2013) \item{"roc"} : Identifies 'markers' of gene expression using ROC analysis. @@ -205,7 +207,7 @@ Increasing logfc.threshold speeds up the function, but can miss weaker signals.} \item{min.pct}{only test genes that are detected in a minimum fraction of min.pct cells in either of the two populations. Meant to speed up the function -by not testing genes that are very infrequently expressed. Default is 0.1} +by not testing genes that are very infrequently expressed. Default is 0.01} \item{min.diff.pct}{only test genes that show a minimum difference in the fraction of detection between the two groups. Set to -Inf by default} diff --git a/man/HarmonyIntegration.Rd b/man/HarmonyIntegration.Rd index 74b2981d7..ea459e35a 100644 --- a/man/HarmonyIntegration.Rd +++ b/man/HarmonyIntegration.Rd @@ -7,7 +7,6 @@ HarmonyIntegration( object, orig, - groups, features = NULL, scale.layer = "scale.data", new.reduction = "harmony", @@ -33,9 +32,6 @@ HarmonyIntegration( \item{orig}{A \link[SeuratObject:DimReduc]{dimensional reduction} to correct} -\item{groups}{A one-column data frame with grouping information; column -should be called \code{group}} - \item{features}{Ignored} \item{scale.layer}{Ignored} diff --git a/man/IntegrateLayers.Rd b/man/IntegrateLayers.Rd index fcda66333..0396b77ee 100644 --- a/man/IntegrateLayers.Rd +++ b/man/IntegrateLayers.Rd @@ -8,7 +8,6 @@ IntegrateLayers( object, method, orig.reduction = "pca", - group.by = NULL, assay = NULL, features = NULL, layers = NULL, @@ -23,9 +22,6 @@ IntegrateLayers( \item{orig.reduction}{Name of dimensional reduction for correction} -\item{group.by}{Name of meta data to group cells by; defaults to splits -assay layers} - \item{assay}{Name of assay for integration} \item{features}{A vector of features to use for integration} diff --git a/man/JointPCAIntegration.Rd b/man/JointPCAIntegration.Rd index 56546d092..3487ed1c8 100644 --- a/man/JointPCAIntegration.Rd +++ b/man/JointPCAIntegration.Rd @@ -22,7 +22,6 @@ JointPCAIntegration( sd.weight = 1, sample.tree = NULL, preserve.order = FALSE, - groups = NULL, verbose = TRUE, ... ) @@ -90,8 +89,6 @@ If NULL, the sample tree will be computed automatically.} \item{preserve.order}{Do not reorder objects based on size for each pairwise integration.} -\item{groups}{A one-column data frame with grouping information} - \item{verbose}{Print progress} \item{...}{Arguments passed on to \code{FindIntegrationAnchors}} diff --git a/man/MixscapeHeatmap.Rd b/man/MixscapeHeatmap.Rd index fabae8b35..a3662b707 100644 --- a/man/MixscapeHeatmap.Rd +++ b/man/MixscapeHeatmap.Rd @@ -37,7 +37,7 @@ use all other cells for comparison; if an object of class \code{phylo} or \item{balanced}{Plot an equal number of genes with both groups of cells.} \item{logfc.threshold}{Limit testing to genes which show, on average, at least -X-fold difference (log-scale) between the two groups of cells. Default is 0.25 +X-fold difference (log-scale) between the two groups of cells. Default is 0.1 Increasing logfc.threshold speeds up the function, but can miss weaker signals.} \item{assay}{Assay to use in differential expression testing} @@ -48,6 +48,8 @@ Increasing logfc.threshold speeds up the function, but can miss weaker signals.} \itemize{ \item{"wilcox"} : Identifies differentially expressed genes between two groups of cells using a Wilcoxon Rank Sum test (default) + \item{"wilcox_limma"} : Identifies differentially expressed genes between two + groups of cells using the limma implementation of the Wilcoxon Rank Sum test \item{"bimod"} : Likelihood-ratio test for single cell gene expression, (McDavid et al., Bioinformatics, 2013) \item{"roc"} : Identifies 'markers' of gene expression using ROC analysis. diff --git a/man/MixscapeLDA.Rd b/man/MixscapeLDA.Rd index 63f9b5b57..723f32a6a 100644 --- a/man/MixscapeLDA.Rd +++ b/man/MixscapeLDA.Rd @@ -43,7 +43,7 @@ MixscapeLDA( \item{verbose}{Print progress bar.} \item{logfc.threshold}{Limit testing to genes which show, on average, at least -X-fold difference (log-scale) between the two groups of cells. Default is 0.25 +X-fold difference (log-scale) between the two groups of cells. Default is 0.1 Increasing logfc.threshold speeds up the function, but can miss weaker signals.} } \value{ diff --git a/man/PrepLDA.Rd b/man/PrepLDA.Rd index 8e5121b6e..be4da7838 100644 --- a/man/PrepLDA.Rd +++ b/man/PrepLDA.Rd @@ -31,7 +31,7 @@ PrepLDA( \item{verbose}{Print progress bar.} \item{logfc.threshold}{Limit testing to genes which show, on average, at least -X-fold difference (log-scale) between the two groups of cells. Default is 0.25 +X-fold difference (log-scale) between the two groups of cells. Default is 0.1 Increasing logfc.threshold speeds up the function, but can miss weaker signals.} } \value{ diff --git a/man/RPCAIntegration.Rd b/man/RPCAIntegration.Rd index a8390a863..b0cba1fb9 100644 --- a/man/RPCAIntegration.Rd +++ b/man/RPCAIntegration.Rd @@ -16,7 +16,6 @@ RPCAIntegration( dims = 1:30, k.filter = NA, scale.layer = "scale.data", - groups = NULL, dims.to.integrate = NULL, k.weight = 100, weight.reduction = NULL, @@ -51,8 +50,6 @@ or SCT} \item{scale.layer}{Name of scaled layer in \code{Assay}} -\item{groups}{A one-column data frame with grouping information} - \item{dims.to.integrate}{Number of dimensions to return integrated values for} \item{k.weight}{Number of neighbors to consider when weighting anchors} @@ -95,6 +92,8 @@ integration.} \item{verbose}{Print progress} \item{...}{Arguments passed on to \code{FindIntegrationAnchors}} + +\item{groups}{A one-column data frame with grouping information} } \description{ Seurat-RPCA Integration diff --git a/man/RunMixscape.Rd b/man/RunMixscape.Rd index 1214043e2..2d2fe5d75 100644 --- a/man/RunMixscape.Rd +++ b/man/RunMixscape.Rd @@ -48,7 +48,7 @@ all are assigned NP.} Usually RNA.} \item{logfc.threshold}{Limit testing to genes which show, on average, at least -X-fold difference (log-scale) between the two groups of cells. Default is 0.25 +X-fold difference (log-scale) between the two groups of cells. Default is 0.1 Increasing logfc.threshold speeds up the function, but can miss weaker signals.} \item{iter.num}{Number of normalmixEM iterations to run if convergence does From 1a5d06eaeddc21acf764cfc1424051af63941561 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Mon, 16 Oct 2023 17:14:36 -0400 Subject: [PATCH 874/979] Bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2dcd652a9..dcac6cc4f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9074 -Date: 2023-10-14 +Version: 4.9.9.9075 +Date: 2023-10-15 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From 0719fbcdfd9c98a49723f1c7f993606c9f6a9ba0 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Mon, 16 Oct 2023 17:51:42 -0400 Subject: [PATCH 875/979] Default assays to V3 when running integration --- R/dimensional_reduction.R | 2 ++ src/RcppExports.cpp | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R index 86f0674ee..a6794589d 100644 --- a/R/dimensional_reduction.R +++ b/R/dimensional_reduction.R @@ -572,6 +572,8 @@ RunCCA.Seurat <- function( verbose = TRUE, ... ) { + op <- options(Seurat.object.assay.version = "v3", Seurat.object.assay.calcn = FALSE) + on.exit(expr = options(op), add = TRUE) assay1 <- assay1 %||% DefaultAssay(object = object1) assay2 <- assay2 %||% DefaultAssay(object = object2) if (assay1 != assay2) { diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 7a3302c6b..540e5c2d8 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -402,7 +402,7 @@ BEGIN_RCPP END_RCPP } -RcppExport SEXP isnull(SEXP); +RcppExport SEXP isnull(void *); static const R_CallMethodDef CallEntries[] = { {"_Seurat_RunModularityClusteringCpp", (DL_FUNC) &_Seurat_RunModularityClusteringCpp, 9}, From 9d4955515e9148567a9021cdb94d01a60c072e4a Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Mon, 16 Oct 2023 17:53:46 -0400 Subject: [PATCH 876/979] Bump version --- DESCRIPTION | 4 ++-- R/dimensional_reduction.R | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index dcac6cc4f..b2f770a5a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9075 -Date: 2023-10-15 +Version: 4.9.9.9076 +Date: 2023-10-16 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R index a6794589d..bcf61f645 100644 --- a/R/dimensional_reduction.R +++ b/R/dimensional_reduction.R @@ -651,7 +651,7 @@ RunCCA.Seurat <- function( warning("Some cells removed after object merge due to minimum feature count cutoff") } combined.scale <- cbind(data1,data2) - combined.object <- SetAssayData(object = combined.object,new.data = combined.scale, slot = "scale.data") + combined.object <- SetAssayData(object = combined.object, new.data = combined.scale, slot = "scale.data") ## combined.object@assays$ToIntegrate@scale.data <- combined.scale if (renormalize) { combined.object <- NormalizeData( From d891cbb74ed453a9ec3c2576543dd8f3727d3c06 Mon Sep 17 00:00:00 2001 From: samuel-marsh Date: Tue, 17 Oct 2023 10:27:17 -0400 Subject: [PATCH 877/979] highlight raster fix --- R/visualization.R | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/R/visualization.R b/R/visualization.R index cd8de4748..e15e6308d 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -7775,6 +7775,8 @@ ScaleColumn <- function(vec, cutoffs) { # @param cols.highlight Colors to highlight cells as # @param col.base Base color to use for unselected cells # @param pt.size Size of unselected cells +# @param raster Convert points to raster format, default is \code{NULL} which +# automatically rasterizes if plotting more than 100,000 cells # # @return A list will cell highlight information # \describe{ @@ -7790,7 +7792,8 @@ SetHighlight <- function( sizes.highlight, cols.highlight, col.base = 'black', - pt.size = 1 + pt.size = 1, + raster = NULL ) { if (is.character(x = cells.highlight)) { cells.highlight <- list(cells.highlight) @@ -7834,6 +7837,12 @@ SetHighlight <- function( size[index.check] <- sizes.highlight[i] } } + + # Check for raster + if (isTRUE(x = raster)) { + size <- size[1] + } + plot.order <- sort(x = unique(x = highlight), na.last = TRUE) plot.order[is.na(x = plot.order)] <- 'Unselected' highlight[is.na(x = highlight)] <- 'Unselected' @@ -7959,7 +7968,8 @@ SingleCorPlot <- function( sizes.highlight = pt.size, cols.highlight = 'red', col.base = 'black', - pt.size = pt.size + pt.size = pt.size, + raster = raster ) cols <- highlight.info$color col.by <- factor( @@ -8134,6 +8144,11 @@ SingleDimPlot <- function( } raster <- raster %||% (nrow(x = data) > 1e5) pt.size <- pt.size %||% AutoPointSize(data = data, raster = raster) + + if (!is.null(x = cells.highlight) && pt.size == AutoPointSize(data = data, raster = raster) && sizes.highlight != pt.size && raster = TRUE) { + warning("When `raster = TRUE` highlighted and non-highlighted cells must be the same size. Plot will use the value provided to 'sizes.highlight.") + } + if (!is.null(x = raster.dpi)) { if (!is.numeric(x = raster.dpi) || length(x = raster.dpi) != 2) stop("'raster.dpi' must be a two-length numeric vector") @@ -8160,7 +8175,8 @@ SingleDimPlot <- function( sizes.highlight = sizes.highlight %||% pt.size, cols.highlight = cols.highlight, col.base = cols[1] %||% '#C3C3C3', - pt.size = pt.size + pt.size = pt.size, + raster = raster ) order <- highlight.info$plot.order data$highlight <- highlight.info$highlight From ab24dc7b66ab1b64ffe16676e9c06aaccbbc55dc Mon Sep 17 00:00:00 2001 From: samuel-marsh Date: Tue, 17 Oct 2023 10:27:30 -0400 Subject: [PATCH 878/979] raster warning fix --- R/visualization.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/visualization.R b/R/visualization.R index e15e6308d..c29609e27 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -7920,7 +7920,7 @@ SingleCorPlot <- function( jitter = TRUE ) { pt.size <- pt.size %||% AutoPointSize(data = data, raster = raster) - if ((nrow(x = data) > 1e5) & !is.null(x = raster)){ + if ((nrow(x = data) > 1e5) & is.null(x = raster)){ message("Rasterizing points since number of points exceeds 100,000.", "\nTo disable this behavior set `raster=FALSE`") } From e95463bd2e726bcc750c181ccea8c0282301a92c Mon Sep 17 00:00:00 2001 From: samuel-marsh Date: Tue, 17 Oct 2023 12:08:47 -0400 Subject: [PATCH 879/979] doc update --- R/visualization.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/visualization.R b/R/visualization.R index c29609e27..84fc58255 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -800,7 +800,7 @@ ColorDimSplit <- function( #' @param cols.highlight A vector of colors to highlight the cells as; will #' repeat to the length groups in cells.highlight #' @param sizes.highlight Size of highlighted cells; will repeat to the length -#' groups in cells.highlight +#' groups in cells.highlight. If \code{sizes.highlight = TRUE} size of all points will be this value. #' @param na.value Color value for NA points when using custom scale #' @param ncol Number of columns for display when combining plots #' @param combine Combine plots into a single \code{\link[patchwork]{patchwork}ed} From c7d2ac8ab7a32aaef158b7d1d677e537da17f6a5 Mon Sep 17 00:00:00 2001 From: samuel-marsh Date: Tue, 17 Oct 2023 12:09:04 -0400 Subject: [PATCH 880/979] styled --- R/visualization.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/visualization.R b/R/visualization.R index 84fc58255..2d552be6b 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -800,7 +800,8 @@ ColorDimSplit <- function( #' @param cols.highlight A vector of colors to highlight the cells as; will #' repeat to the length groups in cells.highlight #' @param sizes.highlight Size of highlighted cells; will repeat to the length -#' groups in cells.highlight. If \code{sizes.highlight = TRUE} size of all points will be this value. +#' groups in cells.highlight. If \code{sizes.highlight = TRUE} size of all +#' points will be this value. #' @param na.value Color value for NA points when using custom scale #' @param ncol Number of columns for display when combining plots #' @param combine Combine plots into a single \code{\link[patchwork]{patchwork}ed} From bb1c5622df819d6e88ceec07d095514ed87c0eeb Mon Sep 17 00:00:00 2001 From: samuel-marsh Date: Tue, 17 Oct 2023 12:14:35 -0400 Subject: [PATCH 881/979] add closing quotation --- R/visualization.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/visualization.R b/R/visualization.R index 2d552be6b..d3842a76a 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -8147,7 +8147,7 @@ SingleDimPlot <- function( pt.size <- pt.size %||% AutoPointSize(data = data, raster = raster) if (!is.null(x = cells.highlight) && pt.size == AutoPointSize(data = data, raster = raster) && sizes.highlight != pt.size && raster = TRUE) { - warning("When `raster = TRUE` highlighted and non-highlighted cells must be the same size. Plot will use the value provided to 'sizes.highlight.") + warning("When `raster = TRUE` highlighted and non-highlighted cells must be the same size. Plot will use the value provided to 'sizes.highlight'.") } if (!is.null(x = raster.dpi)) { From 0e97eae56a47f94f9de006e2be63b52874a0d987 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Tue, 17 Oct 2023 12:54:56 -0400 Subject: [PATCH 882/979] Skip BPCell tests on CRAN --- DESCRIPTION | 1 - man/MVP.Rd | 3 +- src/RcppExports.cpp | 2 +- tests/testthat.R | 8 ++-- tests/testthat/test_differential_expression.R | 32 ++++++++----- tests/testthat/test_dimensional_reduction.R | 24 +++++----- tests/testthat/test_preprocessing.R | 47 ++++++++++++++----- 7 files changed, 76 insertions(+), 41 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b2f770a5a..79326dfcc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -78,7 +78,6 @@ Imports: scales, scattermore (>= 1.2), sctransform (>= 0.4.0), - SeuratObject (>= 4.1.4), shiny, spatstat.explore, spatstat.geom, diff --git a/man/MVP.Rd b/man/MVP.Rd index 85c1281e0..516015275 100644 --- a/man/MVP.Rd +++ b/man/MVP.Rd @@ -9,7 +9,8 @@ MVP( verbose = TRUE, nselect = 2000L, mean.cutoff = c(0.1, 8), - dispersion.cutoff = c(1, Inf) + dispersion.cutoff = c(1, Inf), + ... ) } \arguments{ diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 540e5c2d8..7a3302c6b 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -402,7 +402,7 @@ BEGIN_RCPP END_RCPP } -RcppExport SEXP isnull(void *); +RcppExport SEXP isnull(SEXP); static const R_CallMethodDef CallEntries[] = { {"_Seurat_RunModularityClusteringCpp", (DL_FUNC) &_Seurat_RunModularityClusteringCpp, 9}, diff --git a/tests/testthat.R b/tests/testthat.R index e6f5ed2e5..b5d1c18cd 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -6,7 +6,7 @@ message('Run tests for v5 assay') options(Seurat.object.assay.version = 'v5') test_check("Seurat") -# Run tests for 'v3' -message('Run tests for v3 assay') -options(Seurat.object.assay.version = 'v3') -test_check("Seurat") +# # Run tests for 'v3' +# message('Run tests for v3 assay') +# options(Seurat.object.assay.version = 'v3') +# test_check("Seurat") diff --git a/tests/testthat/test_differential_expression.R b/tests/testthat/test_differential_expression.R index a4c962ec8..e5f2d57a5 100644 --- a/tests/testthat/test_differential_expression.R +++ b/tests/testthat/test_differential_expression.R @@ -32,7 +32,7 @@ test_that("Default settings work as expected with pseudocount = 1", { expect_equal(markers.0[1, "p_val_adj"], 2.201739e-10, tolerance = 1e-15) expect_equal(nrow(x = markers.0), 227) expect_equal(rownames(markers.0)[1], "HLA-DPB1") - + expect_equal(colnames(x = markers.0.limma), c("p_val", "avg_logFC", "pct.1", "pct.2", "p_val_adj")) expect_equal(markers.0.limma[1, "p_val"], 9.572778e-13, tolerance = 1e-18) expect_equal(markers.0.limma[1, "avg_logFC"], -4.034691, tolerance = 1e-6) @@ -49,7 +49,7 @@ test_that("Default settings work as expected with pseudocount = 1", { expect_equal(markers.01[1, "p_val_adj"], 3.916481e-09, tolerance = 1e-14) expect_equal(nrow(x = markers.01), 222) expect_equal(rownames(x = markers.01)[1], "TYMP") - + expect_equal(markers.01.limma[1, "p_val"], 1.702818e-11, tolerance = 1e-16) expect_equal(markers.01.limma[1, "avg_logFC"], -2.539289, tolerance = 1e-6) expect_equal(markers.01.limma[1, "pct.1"], 0.111) @@ -66,7 +66,7 @@ test_that("Default settings work as expected with pseudocount = 1", { expect_equal(results.clr[1, "p_val_adj"], 2.781762e-09, tolerance = 1e-14) expect_equal(nrow(x = results.clr), 167) expect_equal(rownames(x = results.clr)[1], "S100A8") - + expect_equal(results.clr.limma[1, "p_val"], 1.209462e-11, tolerance = 1e-16) expect_equal(results.clr.limma[1, "avg_logFC"], -0.8290693, tolerance = 1e-6) expect_equal(results.clr.limma[1, "pct.1"], 0.111) @@ -83,7 +83,7 @@ test_that("Default settings work as expected with pseudocount = 1", { expect_equal(results.sct[1, "p_val_adj"], 1.022333e-08, tolerance = 1e-13) expect_equal(nrow(x = results.sct), 197) expect_equal(rownames(x = results.sct)[1], "CST3") - + expect_equal(results.sct.limma[1, "p_val"], 4.646968e-11, tolerance = 1e-16) expect_equal(results.sct.limma[1, "avg_logFC"], -1.8522457, tolerance = 1e-6) expect_equal(results.sct.limma[1, "pct.1"], 0.333) @@ -320,20 +320,22 @@ test_that("LR test works", { expect_equal(rownames(x = results)[1], "LYZ") }) -mat_bpcells <- t(as(t(pbmc_small[['RNA']]$counts ), "IterableMatrix")) -pbmc_small[['RNAbp']] <- CreateAssay5Object(counts = mat_bpcells) -pbmc_small <- NormalizeData(pbmc_small, assay = "RNAbp") - -markers.bp <- suppressWarnings(FindMarkers(object = pbmc_small, assay = "RNAbp", ident.1 = 0, verbose = FALSE, base = exp(1),pseudocount.use = 1)) test_that("BPCells FindMarkers gives same results", { + skip_on_cran() + library(BPCells) + library(Matrix) + mat_bpcells <- t(as(t(pbmc_small[['RNA']]$counts ), "IterableMatrix")) + pbmc_small[['RNAbp']] <- CreateAssay5Object(counts = mat_bpcells) + pbmc_small <- NormalizeData(pbmc_small, assay = "RNAbp") + markers.bp <- suppressWarnings(FindMarkers(object = pbmc_small, assay = "RNAbp", ident.1 = 0, verbose = FALSE, base = exp(1),pseudocount.use = 1)) expect_equal(colnames(x = markers.bp), c("p_val", "avg_logFC", "pct.1", "pct.2", "p_val_adj")) expect_equal(markers.bp[1, "p_val"], 9.572778e-13) expect_equal(markers.bp[1, "avg_logFC"], -4.034691, tolerance = 1e-6) expect_equal(markers.bp[1, "pct.1"], 0.083) expect_equal(markers.bp[1, "pct.2"], 0.909) expect_equal(markers.bp[1, "p_val_adj"], 2.201739e-10) - expect_equal(nrow(x = markers.bp), 204) + expect_equal(nrow(x = markers.bp), 227) expect_equal(rownames(markers.bp)[1], "HLA-DPB1") }) @@ -414,9 +416,17 @@ test_that("FindMarkers recognizes log normalization", { expect_equal(markers[1, "avg_log2FC"], -2.614686, tolerance = 1e-6) }) -results.bp <- suppressMessages(suppressWarnings(FindAllMarkers(object = pbmc_small, assay = "RNAbp", pseudocount.use=1))) test_that("BPCells FindAllMarkers gives same results", { + skip_on_cran() + library(BPCells) + library(Matrix) + mat_bpcells <- t(as(t(pbmc_small[['RNA']]$counts ), "IterableMatrix")) + pbmc_small[['RNAbp']] <- CreateAssay5Object(counts = mat_bpcells) + pbmc_small <- NormalizeData(pbmc_small, assay = "RNAbp") + + results.bp <- suppressMessages(suppressWarnings(FindAllMarkers(object = pbmc_small, assay = "RNAbp", pseudocount.use=1))) + expect_equal(colnames(x = results.bp), c("p_val", "avg_log2FC", "pct.1", "pct.2", "p_val_adj", "cluster", "gene")) expect_equal(results.bp[1, "p_val"], 9.572778e-13) expect_equal(results.bp[1, "avg_log2FC"], -5.820829, tolerance = 1e-6) diff --git a/tests/testthat/test_dimensional_reduction.R b/tests/testthat/test_dimensional_reduction.R index 5f83ea862..1eea2d992 100644 --- a/tests/testthat/test_dimensional_reduction.R +++ b/tests/testthat/test_dimensional_reduction.R @@ -51,18 +51,20 @@ test_that("pca returns total variance (see #982)", { }) -mat_bpcells <- t(as(t(obj[['RNA']]$counts ), "IterableMatrix")) -obj[['RNAbp']] <- CreateAssay5Object(counts = mat_bpcells) -DefaultAssay(obj) <- "RNAbp" -obj <- NormalizeData(object = obj, verbose = FALSE) -obj <- ScaleData(object = obj, verbose=FALSE) -pca_result_bp <- suppressWarnings(expr = RunPCA( - object = obj, - features = rownames(obj[['RNAbp']]$counts), - assay = "RNAbp")) - test_that("pca is equivalent for BPCells", { - expect_equivalent(abs(pca_result_bp[['pca']]@cell.embeddings), + skip_on_cran() + library(Matrix) + library(BPCells) + mat_bpcells <- t(x = as(object = t(x = obj[['RNA']]$counts ), Class = "IterableMatrix")) + obj[['RNAbp']] <- CreateAssay5Object(counts = mat_bpcells) + DefaultAssay(obj) <- "RNAbp" + obj <- NormalizeData(object = obj, verbose = FALSE) + obj <- ScaleData(object = obj, verbose=FALSE) + pca_result_bp <- suppressWarnings(expr = RunPCA( + object = obj, + features = rownames(obj[['RNAbp']]$counts), + assay = "RNAbp")) + expect_equivalent(abs(pca_result_bp[['pca']]@cell.embeddings), abs(pca_result[['pca']]@cell.embeddings), tolerance = 1e-5) }) diff --git a/tests/testthat/test_preprocessing.R b/tests/testthat/test_preprocessing.R index 0f24b3d92..174079be9 100644 --- a/tests/testthat/test_preprocessing.R +++ b/tests/testthat/test_preprocessing.R @@ -131,25 +131,41 @@ if(class(object[['RNA']]) == "Assay5") { }) } -# Tests for BPCells NormalizeData -# -------------------------------------------------------------------------------- -#make Iterable matrix -mat_bpcells <- t(as(t(object[['RNA']]$counts ), "IterableMatrix")) -object[['RNAbp']] <- CreateAssay5Object(counts = mat_bpcells) -object <- NormalizeData(object = object, verbose = FALSE, scale.factor = 1e6, assay = "RNAbp") -object <- NormalizeData(object = object, verbose = FALSE, scale.factor = 1e6, assay = "RNA") test_that("NormalizeData scales properly for BPcells", { + # Tests for BPCells NormalizeData + # -------------------------------------------------------------------------------- + + skip_on_cran() + library(Matrix) + library(BPCells) + mat_bpcells <- t(as(t(object[['RNA']]$counts ), "IterableMatrix")) + object[['RNAbp']] <- CreateAssay5Object(counts = mat_bpcells) + + object <- NormalizeData(object = object, verbose = FALSE, scale.factor = 1e6, assay = "RNAbp") + object <- NormalizeData(object = object, verbose = FALSE, scale.factor = 1e6, assay = "RNA") + expect_equal(as.matrix(object[['RNAbp']]$data), as.matrix(object[['RNA']]$data), tolerance = 1e-6) expect_equal(Command(object = object, command = "NormalizeData.RNAbp", value = "scale.factor"), 1e6) expect_equal(Command(object = object, command = "NormalizeData.RNAbp", value = "normalization.method"), "LogNormalize") }) -normalized.data.bp <- LogNormalize(data = GetAssayData(object = object[["RNAbp"]], layer = "counts"), verbose = FALSE) -normalized.data <- LogNormalize(data = GetAssayData(object = object[["RNA"]], layer = "counts"), verbose = FALSE) + test_that("LogNormalize normalizes properly for BPCells", { + skip_on_cran() + library(Matrix) + library(BPCells) + mat_bpcells <- t(as(t(object[['RNA']]$counts ), "IterableMatrix")) + object[['RNAbp']] <- CreateAssay5Object(counts = mat_bpcells) + + object <- NormalizeData(object = object, verbose = FALSE, scale.factor = 1e6, assay = "RNAbp") + object <- NormalizeData(object = object, verbose = FALSE, scale.factor = 1e6, assay = "RNA") + + normalized.data.bp <- LogNormalize(data = GetAssayData(object = object[["RNAbp"]], layer = "counts"), verbose = FALSE) + normalized.data <- LogNormalize(data = GetAssayData(object = object[["RNA"]], layer = "counts"), verbose = FALSE) + expect_equal( as.matrix(normalized.data.bp), as.matrix(normalized.data), @@ -445,10 +461,17 @@ test_that("SCTransform v2 works as expected", { expect_equal(fa["FCER2", "theta"], Inf) }) -object <- suppressWarnings(SCTransform(object = object, assay = "RNAbp", new.assay.name = "SCTbp", - verbose = FALSE, vst.flavor = "v2", seed.use = 1448145)) test_that("SCTransform is equivalent for BPcells ", { - expect_equal(as.matrix(LayerData(object = object[["SCT"]], layer = "data")), + skip_on_cran() + library(Matrix) + library(BPCells) + mat_bpcells <- t(as(t(object[['RNA']]$counts ), "IterableMatrix")) + object[['RNAbp']] <- CreateAssay5Object(counts = mat_bpcells) + + object <- suppressWarnings(SCTransform(object = object, assay = "RNAbp", new.assay.name = "SCTbp", + verbose = FALSE, vst.flavor = "v2", seed.use = 1448145)) + + expect_equal(as.matrix(LayerData(object = object[["SCT"]], layer = "data")), as.matrix(LayerData(object = object[["SCTbp"]], layer = "data")), tolerance = 1e-6) }) From ddbe943375f52e7f3d6bcd241f68698caea481dc Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Tue, 17 Oct 2023 12:56:35 -0400 Subject: [PATCH 883/979] Enable v3 tests --- tests/testthat.R | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/testthat.R b/tests/testthat.R index b5d1c18cd..94db12da2 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -1,12 +1,14 @@ library(testthat) library(Seurat) +# Run tests for 'v3' +message('Run tests for v3 assay') +options(Seurat.object.assay.version = 'v3') +test_check("Seurat") + # Run tests for 'v5' message('Run tests for v5 assay') options(Seurat.object.assay.version = 'v5') test_check("Seurat") -# # Run tests for 'v3' -# message('Run tests for v3 assay') -# options(Seurat.object.assay.version = 'v3') -# test_check("Seurat") + From 81b6af58ab0d8144c98cce4710aace0a05e930d7 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Tue, 17 Oct 2023 12:56:54 -0400 Subject: [PATCH 884/979] Bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 79326dfcc..67271cfc9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9076 -Date: 2023-10-16 +Version: 4.9.9.9077 +Date: 2023-10-17 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From 9011e3d20e7cbc47706a28164b63262313fc44a7 Mon Sep 17 00:00:00 2001 From: zskylarli Date: Tue, 17 Oct 2023 15:26:58 -0400 Subject: [PATCH 885/979] fixed RunPCA.StdAssay when features are not in obj --- R/dimensional_reduction.R | 12 +++++++++++- vignettes/cell_cycle_vignette.Rmd | 4 ++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R index bcf61f645..e47bb4c93 100644 --- a/R/dimensional_reduction.R +++ b/R/dimensional_reduction.R @@ -2449,7 +2449,17 @@ PrepDR5 <- function(object, features = NULL, layer = 'scale.data', verbose = TRU } features <- features.keep features <- features[!is.na(x = features)] - data.use <- data.use[features, ] + features.use <- features[features %in% rownames(data.use)] + if(!isTRUE(all.equal(features, features.use))) { + missing_features <- setdiff(features, features.use) + if(length(missing_features) > 0) { + warning_message <- paste("The following features were not available: ", + paste(missing_features, collapse = ", "), + ".", sep = "") + warning(warning_message, immediate. = TRUE) + } + } + data.use <- data.use[features.use, ] return(data.use) } diff --git a/vignettes/cell_cycle_vignette.Rmd b/vignettes/cell_cycle_vignette.Rmd index a180b1ae1..3c0d026b6 100644 --- a/vignettes/cell_cycle_vignette.Rmd +++ b/vignettes/cell_cycle_vignette.Rmd @@ -39,7 +39,7 @@ library(Seurat) # Read in the expression matrix # The first row is a header row, the first column is rownames -exp.mat <- read.table(file = "../data/nestorawa_forcellcycle_expressionMatrix.txt", header = TRUE, as.is = TRUE, row.names = 1) +exp.mat <- read.table(file = "/Users/sli/seurat-private/data/cell_cycle_vignette_files/nestorawa_forcellcycle_expressionMatrix.txt", header = TRUE, as.is = TRUE, row.names = 1) # A list of cell cycle markers, from Tirosh et al, 2015, is loaded with Seurat. # We can segregate this list into markers of G2/M phase and markers of S phase @@ -47,7 +47,7 @@ s.genes <- cc.genes$s.genes g2m.genes <- cc.genes$g2m.genes # Create our Seurat object and complete the initalization steps -marrow <- CreateSeuratObject(counts = exp.mat) +marrow <- CreateSeuratObject(counts = Matrix::Matrix(as.matrix(exp.mat),sparse = T)) marrow <- NormalizeData(marrow) marrow <- FindVariableFeatures(marrow, selection.method = 'vst') marrow <- ScaleData(marrow, features = rownames(marrow)) From 7bedb7b28a2c6cee74fee72255d6fa528de5d3a0 Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Tue, 17 Oct 2023 16:23:14 -0400 Subject: [PATCH 886/979] update AverageExpression docs and warnings --- R/utilities.R | 22 +++++++++++++--------- man/AggregateExpression.Rd | 9 +++++---- man/AverageExpression.Rd | 9 +++++---- 3 files changed, 23 insertions(+), 17 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index ff647a436..a235a200a 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -334,9 +334,9 @@ AddModuleScore <- function( #' @param assays Which assays to use. Default is all assays #' @param features Features to analyze. Default is all features in the assay #' @param return.seurat Whether to return the data as a Seurat object. Default is FALSE -#' @param group.by Categories for grouping (e.g, ident, replicate, celltype); 'ident' by default -#' @param add.ident (Deprecated) Place an additional label on each cell prior to pseudobulking -#' (very useful if you want to observe cluster pseudobulk values, separated by replicate, for example) +#' @param group.by Category (or vector of categories) for grouping (e.g, ident, replicate, celltype); 'ident' by default +#' To use multiple categories, specify a vector, such as c('ident', 'replicate', 'celltype') +#' @param add.ident (Deprecated). Place an additional label on each cell prior to pseudobulking #' @param normalization.method Method for normalization, see \code{\link{NormalizeData}} #' @param scale.factor Scale factor for normalization, see \code{\link{NormalizeData}} #' @param margin Margin to perform CLR normalization, see \code{\link{NormalizeData}} @@ -351,7 +351,8 @@ AddModuleScore <- function( #' @examples #' \dontrun{ #' data("pbmc_small") -#' head(AggregateExpression(object = pbmc_small)) +#' head(AggregateExpression(object = pbmc_small)$RNA) +#' head(AggregateExpression(object = pbmc_small, group.by = c('ident', 'groups'))$RNA) #' } #' AggregateExpression <- function( @@ -405,9 +406,9 @@ AggregateExpression <- function( #' @param assays Which assays to use. Default is all assays #' @param features Features to analyze. Default is all features in the assay #' @param return.seurat Whether to return the data as a Seurat object. Default is FALSE -#' @param group.by Categories for grouping (e.g, ident, replicate, celltype); 'ident' by default +#' @param group.by Category (or vector of categories) for grouping (e.g, ident, replicate, celltype); 'ident' by default +#' To use multiple categories, specify a vector, such as c('ident', 'replicate', 'celltype') #' @param add.ident (Deprecated). Place an additional label on each cell prior to pseudobulking -#' (very useful if you want to observe cluster pseudobulk values, separated by replicate, for example) #' @param layer Layer(s) to use; if multiple layers are given, assumed to follow #' the order of 'assays' (if specified) or object's assays #' @param slot (Deprecated). Slots(s) to use @@ -426,7 +427,8 @@ AggregateExpression <- function( #' #' @examples #' data("pbmc_small") -#' head(AverageExpression(object = pbmc_small)) +#' head(AverageExpression(object = pbmc_small)$RNA) +#' head(AverageExpression(object = pbmc_small, group.by = c('ident', 'groups'))$RNA) #' AverageExpression <- function( object, @@ -446,7 +448,7 @@ AverageExpression <- function( ) { CheckDots(..., fxns = 'CreateSeuratObject') if (!is.null(x = add.ident)) { - .Deprecated(msg = "'add.ident' is a deprecated argument, please use the 'group.by' argument instead") + .Deprecated(msg = "'add.ident' is a deprecated argument. Please see documentation to see how to pass a vector to the 'group.by' argument to specify multiple grouping variables") group.by <- c('ident', add.ident) } if (!(method %in% c('average', 'aggregate'))) { @@ -469,7 +471,9 @@ AverageExpression <- function( } if (method =="average") { - message("As of Seurat v5, As of Seurat v5, we recommend using AggregateExpression to perform pseudo-bulk analysis.") + inform(message = "As of Seurat v5, we recommend using AggregateExpression to perform pseudo-bulk analysis.", + .frequency = "once", + .frequency_id = "AverageExpression") } object.assays <- .FilterObjects(object = object, classes.keep = c('Assay', 'Assay5')) diff --git a/man/AggregateExpression.Rd b/man/AggregateExpression.Rd index b34356f1a..72f219fbb 100644 --- a/man/AggregateExpression.Rd +++ b/man/AggregateExpression.Rd @@ -27,10 +27,10 @@ AggregateExpression( \item{return.seurat}{Whether to return the data as a Seurat object. Default is FALSE} -\item{group.by}{Categories for grouping (e.g, ident, replicate, celltype); 'ident' by default} +\item{group.by}{Category (or vector of categories) for grouping (e.g, ident, replicate, celltype); 'ident' by default. +To use multiple categories, specify a vector, such as c('ident', 'replicate', 'celltype')} -\item{add.ident}{(Deprecated) Place an additional label on each cell prior to pseudobulking -(very useful if you want to observe cluster pseudobulk values, separated by replicate, for example)} +\item{add.ident}{(Deprecated). Place an additional label on each cell prior to pseudobulking} \item{normalization.method}{Method for normalization, see \code{\link{NormalizeData}}} @@ -58,7 +58,8 @@ before returning the object. \examples{ \dontrun{ data("pbmc_small") -head(AggregateExpression(object = pbmc_small)) +head(AggregateExpression(object = pbmc_small)$RNA) +head(AggregateExpression(object = pbmc_small, group.by = c('ident', 'groups'))$RNA) } } diff --git a/man/AverageExpression.Rd b/man/AverageExpression.Rd index a0ba1410e..bd9ecc177 100644 --- a/man/AverageExpression.Rd +++ b/man/AverageExpression.Rd @@ -30,10 +30,10 @@ AverageExpression( \item{return.seurat}{Whether to return the data as a Seurat object. Default is FALSE} -\item{group.by}{Categories for grouping (e.g, ident, replicate, celltype); 'ident' by default} +\item{group.by}{Category (or vector of categories) for grouping (e.g, ident, replicate, celltype); 'ident' by default. +To use multiple categories, specify a vector, such as c('ident', 'replicate', 'celltype')} -\item{add.ident}{(Deprecated). Place an additional label on each cell prior to pseudobulking -(very useful if you want to observe cluster pseudobulk values, separated by replicate, for example)} +\item{add.ident}{(Deprecated). Place an additional label on each cell prior to pseudobulking} \item{layer}{Layer(s) to use; if multiple layers are given, assumed to follow the order of 'assays' (if specified) or object's assays} @@ -73,7 +73,8 @@ average counts and 'scale.data' is set to the averaged values. } \examples{ data("pbmc_small") -head(AverageExpression(object = pbmc_small)) +head(AverageExpression(object = pbmc_small)$RNA) +head(AverageExpression(object = pbmc_small, group.by = c('ident', 'groups'))$RNA) } \concept{utilities} From 2c91755a72c95307380599530d828314995c8805 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Tue, 17 Oct 2023 16:30:31 -0400 Subject: [PATCH 887/979] Fix tests for ncells --- tests/testthat/test_preprocessing.R | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/tests/testthat/test_preprocessing.R b/tests/testthat/test_preprocessing.R index 174079be9..3d968278c 100644 --- a/tests/testthat/test_preprocessing.R +++ b/tests/testthat/test_preprocessing.R @@ -187,8 +187,9 @@ new.data <- as.matrix(GetAssayData(object = object[["RNA"]], layer = "data")) new.data[1, ] <- rep(x = 0, times = ncol(x = new.data)) object2 <- object -object2[["RNA"]] <- SetAssayData( - object = object[["RNA"]], +object2 <- SetAssayData( + object = object, + assay = "RNA", slot = "data", new.data = new.data ) @@ -235,12 +236,14 @@ test_that("split.by option works with regression", { # Tests for various regression techniques context("Regression") -object <- ScaleData( +suppressWarnings({ + object <- ScaleData( object = object, vars.to.regress = "nCount_RNA", features = rownames(x = object)[1:10], verbose = FALSE, model.use = "linear") + }) test_that("Linear regression works as expected", { expect_equal(dim(x = GetAssayData(object = object[["RNA"]], layer = "scale.data")), c(10, 80)) @@ -299,7 +302,7 @@ downsampled.umis.p.cell <- SampleUMI( ) test_that("SampleUMI gives reasonable downsampled/upsampled UMI counts", { expect_true(!any(colSums(x = downsampled.umis) < 30, colSums(x = downsampled.umis) > 120)) - expect_error(SampleUMI(data = LayerData(object = object, layer = "raw.data"), max.umi = rep(1, 5))) + expect_error(SampleUMI(data = LayerData(object = object, layer = "counts"), max.umi = rep(1, 5))) expect_true(!is.unsorted(x = colSums(x = downsampled.umis.p.cell))) expect_error(SampleUMI( data = LayerData(object = object, layer = "counts"), @@ -411,22 +414,22 @@ test_that("SCTransform v2 works as expected", { }) suppressWarnings(RNGversion(vstr = "3.5.0")) -object <- suppressWarnings(SCTransform(object = object, vst.flavor = "v1", ncells = 40, verbose = FALSE, seed.use = 42)) +object <- suppressWarnings(SCTransform(object = object, vst.flavor = "v1", ncells = 80, verbose = FALSE, seed.use = 42)) test_that("SCTransform ncells param works", { expect_true("SCT" %in% names(object)) - expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], layer = "scale.data"))[1]), 12.02126, tolerance = 1e6) + expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], layer = "scale.data"))[1]), 11.40288, tolerance = 1e-6) expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], layer = "scale.data"))[5]), 0) - expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], layer = "data"))[1]), 60.65299, tolerance = 1e-6) + expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], layer = "data"))[1]), 57.72957, tolerance = 1e-6) expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], layer = "data"))[5]), 11.74404, tolerance = 1e-6) - expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], layer = "counts"))[1]), 136) + expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], layer = "counts"))[1]), 129) expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], layer = "counts"))[5]), 28) expect_equal(length(VariableFeatures(object[["SCT"]])), 220) fa <- SCTResults(object = object, assay = "SCT", slot = "feature.attributes") expect_equal(fa["MS4A1", "detection_rate"], 0.15) expect_equal(fa["MS4A1", "gmean"], 0.2027364, tolerance = 1e-6) expect_equal(fa["MS4A1", "variance"], 1.025158, tolerance = 1e-6) - expect_equal(fa["MS4A1", "residual_mean"], 0.2829672, tolerance = 1e-3) - expect_equal(fa["MS4A1", "residual_variance"], 3.674079, tolerance = 1e-3) + expect_equal(fa["MS4A1", "residual_mean"], 0.2362887, tolerance = 1e-3) + expect_equal(fa["MS4A1", "residual_variance"], 2.875761, tolerance = 1e-3) }) suppressWarnings(object[["SCT_SAVE"]] <- object[["SCT"]]) From 16739ca1171b7a82cbd21016d7150d2a8196e59c Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Tue, 17 Oct 2023 16:46:57 -0400 Subject: [PATCH 888/979] ignore warnings within AverageExpression for HTODemux --- R/preprocessing.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index 37410d805..d545ac9ca 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -249,11 +249,11 @@ HTODemux <- function( ) #average hto signals per cluster #work around so we don't average all the RNA levels which takes time - average.expression <- AverageExpression( + average.expression <- suppressWarnings(AverageExpression( object = object, assays = assay, verbose = FALSE - )[[assay]] + )[[assay]]) #checking for any cluster with all zero counts for any barcode if (sum(average.expression == 0) > 0) { stop("Cells with zero counts exist as a cluster.") From 70243373c8397e7848dcac1f0893820b39e39c99 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Tue, 17 Oct 2023 17:07:46 -0400 Subject: [PATCH 889/979] fix mvp var features --- R/preprocessing5.R | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index c4b5a57e3..580f84d72 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -159,11 +159,9 @@ FindVariableFeatures.StdAssay <- function( sep = '_' ) rownames(x = hvf.info) <- Features(x = object, layer = layer[i]) - object[colnames(x = hvf.info)] <- hvf.info + object[names(x = hvf.info)] <- NULL + object[names(x = hvf.info)] <- hvf.info } - object@meta.data$var.features <- NULL - object@meta.data$var.features.rank <- NULL - VariableFeatures(object = object) <- VariableFeatures(object = object, nfeatures = nfeatures, method = key) return(object) } @@ -1985,10 +1983,16 @@ MVP <- function( ) { hvf.info <- DISP(data = data, nselect = nselect, verbose = verbose) hvf.info$variable <- FALSE + hvf.info$rank <- NA + hvf.info <- hvf.info[order(hvf.info$mvp.dispersion, decreasing = TRUE), , drop = FALSE] means.use <- (hvf.info[, 1] > mean.cutoff[1]) & (hvf.info[, 1] < mean.cutoff[2]) dispersions.use <- (hvf.info[, 3] > dispersion.cutoff[1]) & (hvf.info[, 3] < dispersion.cutoff[2]) hvf.info[which(x = means.use & dispersions.use), 'variable'] <- TRUE - hvf.info[hvf.info$variable,'rank'] <- rank(x = hvf.info[hvf.info$variable,'rank']) - hvf.info[!hvf.info$variable,'rank'] <- NA + rank.rows <- rownames(x = hvf.info)[which(x = means.use & dispersions.use)] + selected.indices <- which(rownames(x = hvf.info) %in% rank.rows) + hvf.info$rank[selected.indices] <- seq_along(selected.indices) + hvf.info <- hvf.info[order(as.numeric(row.names(hvf.info))), ] + # hvf.info[hvf.info$variable,'rank'] <- rank(x = hvf.info[hvf.info$variable,'rank']) + # hvf.info[!hvf.info$variable,'rank'] <- NA return(hvf.info) } From b904d856a616654310530fd1011d7740fa6ef4bc Mon Sep 17 00:00:00 2001 From: gesmira <59940281+Gesmira@users.noreply.github.com> Date: Tue, 17 Oct 2023 17:12:02 -0400 Subject: [PATCH 890/979] bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 12853e1b2..798e1f7a6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9072 -Date: 2023-10-10 +Version: 4.9.9.9078 +Date: 2023-10-17 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From e4107c04a296e7c979cb624446cd1b06cdc37de4 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Tue, 17 Oct 2023 17:25:55 -0400 Subject: [PATCH 891/979] Minor updates for tests and documentation --- R/differential_expression.R | 24 +++++++++---------- R/integration5.R | 1 - R/preprocessing5.R | 8 +------ R/utilities.R | 2 -- man/FetchResiduals.Rd | 6 ----- man/PrepSCTFindMarkers.Rd | 4 ++-- man/RPCAIntegration.Rd | 2 -- man/reexports.Rd | 4 ++-- tests/testthat/test_differential_expression.R | 2 +- 9 files changed, 18 insertions(+), 35 deletions(-) diff --git a/R/differential_expression.R b/R/differential_expression.R index 07218a9d0..0fc69219b 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -576,7 +576,7 @@ FindMarkers.default <- function( } if (inherits(x = object, what = "IterableMatrix")){ if(test.use != "wilcox"){ - stop("Differential expression with BPCells currently only supports the 'wilcox' method.", + stop("Differential expression with BPCells currently only supports the 'wilcox' method.", " Please rerun with test.use = 'wilcox'") } data.use <- object[features, c(cells.1, cells.2), drop = FALSE] @@ -2183,8 +2183,8 @@ PerformDE <- function( #' @template section-future #' @examples #' data("pbmc_small") -#' pbmc_small1 <- SCTransform(object = pbmc_small, variable.features.n = 20) -#' pbmc_small2 <- SCTransform(object = pbmc_small, variable.features.n = 20) +#' pbmc_small1 <- SCTransform(object = pbmc_small, variable.features.n = 20, vst.flavor="v1") +#' pbmc_small2 <- SCTransform(object = pbmc_small, variable.features.n = 20, vst.flavor="v1") #' pbmc_merged <- merge(x = pbmc_small1, y = pbmc_small2) #' pbmc_merged <- PrepSCTFindMarkers(object = pbmc_merged) #' markers <- FindMarkers( @@ -2451,7 +2451,7 @@ ValidateCellGroups <- function( # is requested, makes use of limma::rankSumTestWithCorrelation for a # more efficient implementation of the wilcoxon test. Thanks to Yunshun Chen and # Gordon Smyth for suggesting the limma implementation. If limma is also not installed, -# uses wilcox.test. +# uses wilcox.test. # # @param data.use Data matrix to test # @param cells.1 Group 1 cells @@ -2481,7 +2481,7 @@ WilcoxDETest <- function( cells.1, cells.2, verbose = TRUE, - limma = FALSE, + limma = FALSE, ... ) { data.use <- data.use[, c(cells.1, cells.2), drop = FALSE] @@ -2502,13 +2502,13 @@ WilcoxDETest <- function( group.info[cells.1, "group"] <- "Group1" group.info[cells.2, "group"] <- "Group2" group.info[, "group"] <- factor(x = group.info[, "group"]) - if (presto.check[1] && (!limma)) { - data.use <- data.use[, rownames(group.info), drop = FALSE] - res <- presto::wilcoxauc(X = data.use, y = group.info[, "group"]) + if (presto.check[1] && (!limma)) { + data.use <- data.use[, rownames(group.info), drop = FALSE] + res <- presto::wilcoxauc(X = data.use, y = group.info[, "group"]) res <- res[1:(nrow(x = res)/2),] p_val <- res$pval } else { - if (getOption('Seurat.presto.wilcox.msg', TRUE) && (!limma)) { + if (getOption('Seurat.presto.wilcox.msg', TRUE) && (!limma)) { message( "For a (much!) faster implementation of the Wilcoxon Rank Sum Test,", "\n(default method for FindMarkers) please install the presto package", @@ -2522,7 +2522,7 @@ WilcoxDETest <- function( ) options(Seurat.presto.wilcox.msg = FALSE) } - if (limma.check[1] && overflow.check) { + if (limma.check[1] && overflow.check) { p_val <- my.sapply( X = 1:nrow(x = data.use), FUN = function(x) { @@ -2530,7 +2530,7 @@ WilcoxDETest <- function( } ) } else { - if (limma && overflow.check) { + if (limma && overflow.check) { stop( "To use the limma implementation of the Wilcoxon Rank Sum Test, please install the limma package: @@ -2539,7 +2539,7 @@ WilcoxDETest <- function( BiocManager::install('limma') --------------------------------------------" ) - } else { + } else { data.use <- data.use[, rownames(x = group.info), drop = FALSE] p_val <- my.sapply( X = 1:nrow(x = data.use), diff --git a/R/integration5.R b/R/integration5.R index 05078c9cd..98d464c30 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -296,7 +296,6 @@ attr(x = CCAIntegration, which = 'Seurat.method') <- 'integration' #' @param dims Dimensions of dimensional reduction to use for integration #' @param k.filter Number of anchors to filter #' @param scale.layer Name of scaled layer in \code{Assay} -#' @param groups A one-column data frame with grouping information #' @param verbose Print progress #' @param ... Additional arguments passed to \code{FindIntegrationAnchors} #' diff --git a/R/preprocessing5.R b/R/preprocessing5.R index c4b5a57e3..c9e04a796 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -1447,12 +1447,6 @@ SCTransform.StdAssay <- function( #' @concept preprocessing #' #' @seealso \code{\link[sctransform]{get_residuals}} -#' -#' @examples -#' data("pbmc_small") -#' pbmc_small <- SCTransform(object = pbmc_small, variable.features.n = 20) -#' pbmc_small <- GetResidual(object = pbmc_small, features = c('MS4A1', 'TCL1A')) -#' FetchResiduals <- function( object, features, @@ -1980,7 +1974,7 @@ MVP <- function( verbose = TRUE, nselect = 2000L, mean.cutoff = c(0.1, 8), - dispersion.cutoff = c(1, Inf), + dispersion.cutoff = c(1, Inf), ... ) { hvf.info <- DISP(data = data, nselect = nselect, verbose = verbose) diff --git a/R/utilities.R b/R/utilities.R index ff647a436..980a57a5f 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -562,7 +562,6 @@ AverageExpression <- function( normalization.method = normalization.method, scale.factor = scale.factor, margin = margin, - block.size = block.size, verbose = verbose) } #for multimodal data @@ -588,7 +587,6 @@ AverageExpression <- function( normalization.method = normalization.method, scale.factor = scale.factor, margin = margin, - block.size = block.size, verbose = verbose) } } diff --git a/man/FetchResiduals.Rd b/man/FetchResiduals.Rd index c11af3ec1..ba8fac0c1 100644 --- a/man/FetchResiduals.Rd +++ b/man/FetchResiduals.Rd @@ -51,12 +51,6 @@ features in its scale.data } \description{ This function calls sctransform::get_residuals. -} -\examples{ -data("pbmc_small") -pbmc_small <- SCTransform(object = pbmc_small, variable.features.n = 20) -pbmc_small <- GetResidual(object = pbmc_small, features = c('MS4A1', 'TCL1A')) - } \seealso{ \code{\link[sctransform]{get_residuals}} diff --git a/man/PrepSCTFindMarkers.Rd b/man/PrepSCTFindMarkers.Rd index c24af42ce..4c29d71c0 100644 --- a/man/PrepSCTFindMarkers.Rd +++ b/man/PrepSCTFindMarkers.Rd @@ -51,8 +51,8 @@ to \pkg{future}, see \examples{ data("pbmc_small") -pbmc_small1 <- SCTransform(object = pbmc_small, variable.features.n = 20) -pbmc_small2 <- SCTransform(object = pbmc_small, variable.features.n = 20) +pbmc_small1 <- SCTransform(object = pbmc_small, variable.features.n = 20, vst.flavor="v1") +pbmc_small2 <- SCTransform(object = pbmc_small, variable.features.n = 20, vst.flavor="v1") pbmc_merged <- merge(x = pbmc_small1, y = pbmc_small2) pbmc_merged <- PrepSCTFindMarkers(object = pbmc_merged) markers <- FindMarkers( diff --git a/man/RPCAIntegration.Rd b/man/RPCAIntegration.Rd index b0cba1fb9..d8af16626 100644 --- a/man/RPCAIntegration.Rd +++ b/man/RPCAIntegration.Rd @@ -92,8 +92,6 @@ integration.} \item{verbose}{Print progress} \item{...}{Arguments passed on to \code{FindIntegrationAnchors}} - -\item{groups}{A one-column data frame with grouping information} } \description{ Seurat-RPCA Integration diff --git a/man/reexports.Rd b/man/reexports.Rd index fa7258d42..08ced67a3 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -71,8 +71,8 @@ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ - \item{generics}{\code{\link[generics]{components}}} + \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}} - \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}} + \item{generics}{\code{\link[generics]{components}}} }} diff --git a/tests/testthat/test_differential_expression.R b/tests/testthat/test_differential_expression.R index e5f2d57a5..9b6b18d8a 100644 --- a/tests/testthat/test_differential_expression.R +++ b/tests/testthat/test_differential_expression.R @@ -7,7 +7,7 @@ set.seed(seed = 42) context("FindMarkers") clr.obj <- suppressWarnings(NormalizeData(pbmc_small, normalization.method = "CLR")) -sct.obj <- suppressWarnings(suppressMessages(SCTransform(pbmc_small))) +sct.obj <- suppressWarnings(suppressMessages(SCTransform(pbmc_small, vst.flavor = "v2"))) markers.0 <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, verbose = FALSE, base = exp(1),pseudocount.use = 1)) markers.01 <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1),pseudocount.use = 1)) From 20930325aa372c0c62e4b0515289f06430e9539b Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Tue, 17 Oct 2023 17:30:20 -0400 Subject: [PATCH 892/979] Add additional repositories for BPCells --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 67271cfc9..c091c76ff 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,7 +27,7 @@ Authors@R: c( ) URL: https://satijalab.org/seurat, https://github.com/satijalab/seurat BugReports: https://github.com/satijalab/seurat/issues -Additional_repositories: https://satijalab.r-universe.dev +Additional_repositories: https://satijalab.r-universe.dev, https://bnprks.r-universe.dev Depends: R (>= 4.0.0), methods, From 6476e8be67907c4c9ccc8e97971743bd08657727 Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Tue, 17 Oct 2023 17:32:15 -0400 Subject: [PATCH 893/979] supresswarnings within DietSeurat --- R/objects.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/objects.R b/R/objects.R index 3f0306960..7e3229d9d 100644 --- a/R/objects.R +++ b/R/objects.R @@ -572,7 +572,7 @@ DietSeurat <- function( abort(message = "Cannot remove both 'counts' and 'data' from v3 Assays") } for (lyr in layers.rm) { - object <- tryCatch(expr = { + suppressWarnings(object <- tryCatch(expr = { object[[assay]][[lyr]] <- NULL object }, error = function(e) { @@ -586,7 +586,7 @@ DietSeurat <- function( message("Converting layer ", lyr, " in assay ", assay, " to empty dgCMatrix") object - }) + })) } } if (!is.null(x = features)) { @@ -603,7 +603,7 @@ DietSeurat <- function( object[[assay]] <- NULL next } - object[[assay]] <- subset(x = object[[assay]], features = features.assay) + suppressWarnings(object[[assay]] <- subset(x = object[[assay]], features = features.assay)) } } # remove misc when desired From 8f092b55559a8e2fa8ca7c1cd25b66260c2bd948 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Tue, 17 Oct 2023 18:18:10 -0400 Subject: [PATCH 894/979] Update text for SCT tests --- tests/testthat/test_preprocessing.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test_preprocessing.R b/tests/testthat/test_preprocessing.R index 3d968278c..a80850bc6 100644 --- a/tests/testthat/test_preprocessing.R +++ b/tests/testthat/test_preprocessing.R @@ -378,7 +378,7 @@ test_that("CustomNormalize works as expected", { context("SCTransform") object <- suppressWarnings(SCTransform(object = object, verbose = FALSE, vst.flavor = "v1", seed.use = 1448145)) -test_that("SCTransform wrapper works as expected", { +test_that("SCTransform v1 works as expected", { expect_true("SCT" %in% names(object)) expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], layer = "scale.data"))[1]), 11.40288448) expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], layer = "scale.data"))[5]), 0) From b09b901d5bf1b69ad6d0c55680759f1c3377036d Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Tue, 17 Oct 2023 18:20:23 -0400 Subject: [PATCH 895/979] Bump version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index c091c76ff..4e0c05f04 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 4.9.9.9077 +Version: 4.9.9.9078 Date: 2023-10-17 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. From 59d22b434aabcb12ad786e329fd8eaff2e5fe324 Mon Sep 17 00:00:00 2001 From: gesmira <59940281+Gesmira@users.noreply.github.com> Date: Tue, 17 Oct 2023 18:27:18 -0400 Subject: [PATCH 896/979] bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index dcac6cc4f..0623405be 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9075 -Date: 2023-10-15 +Version: 4.9.9.9079 +Date: 2023-10-17 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From 2ba6cbf280ebebf5aa0f8bc3460d200f725e766a Mon Sep 17 00:00:00 2001 From: gesmira <59940281+Gesmira@users.noreply.github.com> Date: Wed, 18 Oct 2023 20:08:30 -0400 Subject: [PATCH 897/979] Update visualization.R --- R/visualization.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/visualization.R b/R/visualization.R index d3842a76a..a04265e2f 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -8146,7 +8146,7 @@ SingleDimPlot <- function( raster <- raster %||% (nrow(x = data) > 1e5) pt.size <- pt.size %||% AutoPointSize(data = data, raster = raster) - if (!is.null(x = cells.highlight) && pt.size == AutoPointSize(data = data, raster = raster) && sizes.highlight != pt.size && raster = TRUE) { + if (!is.null(x = cells.highlight) && pt.size == AutoPointSize(data = data, raster = raster) && sizes.highlight != pt.size && isTRUE(x = raster) { warning("When `raster = TRUE` highlighted and non-highlighted cells must be the same size. Plot will use the value provided to 'sizes.highlight'.") } From fd27963b2030d0e0b401b48ae46df55cb27de766 Mon Sep 17 00:00:00 2001 From: gesmira <59940281+Gesmira@users.noreply.github.com> Date: Wed, 18 Oct 2023 20:09:11 -0400 Subject: [PATCH 898/979] Update visualization.R --- R/visualization.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/visualization.R b/R/visualization.R index d3842a76a..e431f39fd 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -8146,7 +8146,7 @@ SingleDimPlot <- function( raster <- raster %||% (nrow(x = data) > 1e5) pt.size <- pt.size %||% AutoPointSize(data = data, raster = raster) - if (!is.null(x = cells.highlight) && pt.size == AutoPointSize(data = data, raster = raster) && sizes.highlight != pt.size && raster = TRUE) { + if (!is.null(x = cells.highlight) && pt.size == AutoPointSize(data = data, raster = raster) && sizes.highlight != pt.size && isTRUE(x = raster)) { warning("When `raster = TRUE` highlighted and non-highlighted cells must be the same size. Plot will use the value provided to 'sizes.highlight'.") } From 3944e43ba13bab9393460e00d9da84b83b319eec Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Wed, 18 Oct 2023 20:41:09 -0400 Subject: [PATCH 899/979] Documentation updates --- man/ColorDimSplit.Rd | 3 ++- man/DimPlot.Rd | 3 ++- man/IntegrateEmbeddings.Rd | 3 +++ man/NNPlot.Rd | 3 ++- man/Seurat-package.Rd | 3 +++ man/TransferData.Rd | 3 +++ man/VariableFeaturePlot.Rd | 15 +-------------- 7 files changed, 16 insertions(+), 17 deletions(-) diff --git a/man/ColorDimSplit.Rd b/man/ColorDimSplit.Rd index d786b55fe..33efebabd 100644 --- a/man/ColorDimSplit.Rd +++ b/man/ColorDimSplit.Rd @@ -63,7 +63,8 @@ will also resize to the size(s) passed to \code{sizes.highlight}} \item{\code{cols.highlight}}{A vector of colors to highlight the cells as; will repeat to the length groups in cells.highlight} \item{\code{sizes.highlight}}{Size of highlighted cells; will repeat to the length -groups in cells.highlight} +groups in cells.highlight. If \code{sizes.highlight = TRUE} size of all +points will be this value.} \item{\code{na.value}}{Color value for NA points when using custom scale} \item{\code{ncol}}{Number of columns for display when combining plots} \item{\code{combine}}{Combine plots into a single \code{\link[patchwork]{patchwork}ed} diff --git a/man/DimPlot.Rd b/man/DimPlot.Rd index 1f228dad0..a46a402f0 100644 --- a/man/DimPlot.Rd +++ b/man/DimPlot.Rd @@ -101,7 +101,8 @@ will also resize to the size(s) passed to \code{sizes.highlight}} repeat to the length groups in cells.highlight} \item{sizes.highlight}{Size of highlighted cells; will repeat to the length -groups in cells.highlight} +groups in cells.highlight. If \code{sizes.highlight = TRUE} size of all +points will be this value.} \item{na.value}{Color value for NA points when using custom scale} diff --git a/man/IntegrateEmbeddings.Rd b/man/IntegrateEmbeddings.Rd index dc0469132..304d0500a 100644 --- a/man/IntegrateEmbeddings.Rd +++ b/man/IntegrateEmbeddings.Rd @@ -26,6 +26,7 @@ IntegrateEmbeddings(anchorset, ...) anchorset, reference, query, + query.assay = NULL, new.reduction.name = "integrated_dr", reductions = "pcaproject", dims.to.integrate = NULL, @@ -96,6 +97,8 @@ integration.} \item{query}{Query object used in anchorset construction} +\item{query.assay}{Name of the Assay to use from query} + \item{reuse.weights.matrix}{Can be used in conjunction with the store.weights parameter in TransferData to reuse a precomputed weights matrix.} } diff --git a/man/NNPlot.Rd b/man/NNPlot.Rd index b6d51c6a5..cf41671ee 100644 --- a/man/NNPlot.Rd +++ b/man/NNPlot.Rd @@ -40,7 +40,8 @@ NNPlot( \item{repel}{Repel labels} \item{sizes.highlight}{Size of highlighted cells; will repeat to the length -groups in cells.highlight} +groups in cells.highlight. If \code{sizes.highlight = TRUE} size of all +points will be this value.} \item{pt.size}{Adjust point size for plotting} diff --git a/man/Seurat-package.Rd b/man/Seurat-package.Rd index 79a0fbc5b..c8c0ca311 100644 --- a/man/Seurat-package.Rd +++ b/man/Seurat-package.Rd @@ -51,11 +51,14 @@ Other contributors: \item Saket Choudhary \email{schoudhary@nygenome.org} (\href{https://orcid.org/0000-0001-5202-7633}{ORCID}) [contributor] \item Charlotte Darby \email{cdarby@nygenome.org} (\href{https://orcid.org/0000-0003-2195-5300}{ORCID}) [contributor] \item Jeff Farrell \email{jfarrell@g.harvard.edu} [contributor] + \item Isabella Grabski \email{igrabski@nygenome.org} (\href{https://orcid.org/0000-0002-0616-5469}{ORCID}) [contributor] \item Christoph Hafemeister \email{chafemeister@nygenome.org} (\href{https://orcid.org/0000-0001-6365-8254}{ORCID}) [contributor] \item Yuhan Hao \email{yhao@nygenome.org} (\href{https://orcid.org/0000-0002-1810-0822}{ORCID}) [contributor] \item Austin Hartman \email{ahartman@nygenome.org} (\href{https://orcid.org/0000-0001-7278-1852}{ORCID}) [contributor] \item Jaison Jain \email{jjain@nygenome.org} (\href{https://orcid.org/0000-0002-9478-5018}{ORCID}) [contributor] + \item Longda Jiang \email{ljiang@nygenome.org} (\href{https://orcid.org/0000-0003-4964-6497}{ORCID}) [contributor] \item Madeline Kowalski \email{mkowalski@nygenome.org} (\href{https://orcid.org/0000-0002-5655-7620}{ORCID}) [contributor] + \item Skylar Li \email{sli@nygenome.org} [contributor] \item Gesmira Molla \email{gmolla@nygenome.org} (\href{https://orcid.org/0000-0002-8628-5056}{ORCID}) [contributor] \item Efthymia Papalexi \email{epapalexi@nygenome.org} (\href{https://orcid.org/0000-0001-5898-694X}{ORCID}) [contributor] \item Patrick Roelli \email{proelli@nygenome.org} [contributor] diff --git a/man/TransferData.Rd b/man/TransferData.Rd index f7ec22b26..e30977df7 100644 --- a/man/TransferData.Rd +++ b/man/TransferData.Rd @@ -9,6 +9,7 @@ TransferData( refdata, reference = NULL, query = NULL, + query.assay = NULL, weight.reduction = "pcaproject", l2.norm = FALSE, dims = NULL, @@ -43,6 +44,8 @@ TransferData( \item{query}{Query object into which the data will be transferred.} +\item{query.assay}{Name of the Assay to use from query} + \item{weight.reduction}{Dimensional reduction to use for the weighting anchors. Options are: \itemize{ diff --git a/man/VariableFeaturePlot.Rd b/man/VariableFeaturePlot.Rd index 9320b3f3d..b622025bc 100644 --- a/man/VariableFeaturePlot.Rd +++ b/man/VariableFeaturePlot.Rd @@ -26,20 +26,7 @@ VariableFeaturePlot( \item{log}{Plot the x-axis in log scale} -\item{selection.method}{Which method to pull. For \code{HVFInfo} and -\code{VariableFeatures}, choose one from one of the -following: -\itemize{ - \item \dQuote{vst} - \item \dQuote{sctransform} or \dQuote{sct} - \item \dQuote{mean.var.plot}, \dQuote{dispersion}, \dQuote{mvp}, or - \dQuote{disp} -} -For \code{SVFInfo} and \code{SpatiallyVariableFeatures}, choose from: -\itemize{ - \item \dQuote{markvariogram} - \item \dQuote{moransi} -}} +\item{selection.method}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}} \item{assay}{Assay to pull variable features from} From 9984136c5f21945f6e9069b6f77b535d93b66bd0 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Wed, 18 Oct 2023 20:55:20 -0400 Subject: [PATCH 900/979] Default to verbosity 1 for SCTransform --- R/preprocessing.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index a13c6c46d..85df73633 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -3257,7 +3257,7 @@ SCTransform.default <- function( vst.args[['vst.flavor']] <- vst.flavor vst.args[['umi']] <- umi vst.args[['cell_attr']] <- cell.attr - vst.args[['verbosity']] <- as.numeric(x = verbose) * 2 + vst.args[['verbosity']] <- as.numeric(x = verbose) * 1 vst.args[['return_cell_attr']] <- TRUE vst.args[['return_gene_attr']] <- TRUE vst.args[['return_corrected_umi']] <- do.correct.umi @@ -3397,7 +3397,7 @@ SCTransform.default <- function( vst.out$umi_corrected <- correct_counts( x = vst.out, umi = umi, - verbosity = as.numeric(x = verbose) * 2 + verbosity = as.numeric(x = verbose) * 1 ) } vst.out From 178cb80ce78d69f2d6ba0e465c830fbf39d4a4f6 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Wed, 18 Oct 2023 20:55:49 -0400 Subject: [PATCH 901/979] Use default block.size in AverageExpression --- R/utilities.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/utilities.R b/R/utilities.R index cc78cd37f..7379c47f6 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -1549,7 +1549,6 @@ PseudobulkExpression.Seurat <- function( normalization.method = normalization.method, scale.factor = scale.factor, margin = margin, - block.size = block.size, verbose = verbose ) } From d22053dfc4707eff22854579ebb3cd896969009b Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Wed, 18 Oct 2023 20:56:18 -0400 Subject: [PATCH 902/979] Update DE tests for SCT v2 --- tests/testthat/test_differential_expression.R | 56 +++++++++---------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/tests/testthat/test_differential_expression.R b/tests/testthat/test_differential_expression.R index 9b6b18d8a..5899ef574 100644 --- a/tests/testthat/test_differential_expression.R +++ b/tests/testthat/test_differential_expression.R @@ -7,7 +7,7 @@ set.seed(seed = 42) context("FindMarkers") clr.obj <- suppressWarnings(NormalizeData(pbmc_small, normalization.method = "CLR")) -sct.obj <- suppressWarnings(suppressMessages(SCTransform(pbmc_small, vst.flavor = "v2"))) +sct.obj <- suppressWarnings(suppressMessages(SCTransform(pbmc_small, vst.flavor = "v1"))) markers.0 <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, verbose = FALSE, base = exp(1),pseudocount.use = 1)) markers.01 <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1),pseudocount.use = 1)) @@ -76,21 +76,21 @@ test_that("Default settings work as expected with pseudocount = 1", { expect_equal(rownames(x = results.clr.limma)[1], "S100A8") # SCT normalization - expect_equal(results.sct[1, "p_val"], 4.646968e-11, tolerance = 1e-16) - expect_equal(results.sct[1, "avg_logFC"], -1.8522457, tolerance = 1e-6) - expect_equal(results.sct[1, "pct.1"], 0.333) - expect_equal(results.sct[1, "pct.2"], 1.00) - expect_equal(results.sct[1, "p_val_adj"], 1.022333e-08, tolerance = 1e-13) - expect_equal(nrow(x = results.sct), 197) - expect_equal(rownames(x = results.sct)[1], "CST3") - - expect_equal(results.sct.limma[1, "p_val"], 4.646968e-11, tolerance = 1e-16) - expect_equal(results.sct.limma[1, "avg_logFC"], -1.8522457, tolerance = 1e-6) - expect_equal(results.sct.limma[1, "pct.1"], 0.333) - expect_equal(results.sct.limma[1, "pct.2"], 1.00) - expect_equal(results.sct.limma[1, "p_val_adj"], 1.022333e-08, tolerance = 1e-13) - expect_equal(nrow(x = results.sct.limma), 197) - expect_equal(rownames(x = results.sct.limma)[1], "CST3") + expect_equal(results.sct[1, "p_val"], 6.225491e-11, tolerance = 1e-16) + expect_equal(results.sct[1, "avg_logFC"], -1.081321, tolerance = 1e-6) + expect_equal(results.sct[1, "pct.1"], 0.111) + expect_equal(results.sct[1, "pct.2"], 0.96) + expect_equal(results.sct[1, "p_val_adj"], 1.369608e-08, tolerance = 1e-13) + expect_equal(nrow(x = results.sct), 195) + expect_equal(rownames(x = results.sct)[1], "TYMP") + + expect_equal(results.sct.limma[1, "p_val"], 6.225491e-11, tolerance = 1e-16) + expect_equal(results.sct.limma[1, "avg_logFC"], -1.081321, tolerance = 1e-6) + expect_equal(results.sct.limma[1, "pct.1"], 0.111) + expect_equal(results.sct.limma[1, "pct.2"], 0.96) + expect_equal(results.sct.limma[1, "p_val_adj"], 1.369608e-08, tolerance = 1e-13) + expect_equal(nrow(x = results.sct.limma), 195) + expect_equal(rownames(x = results.sct.limma)[1], "TYMP") }) @@ -129,24 +129,24 @@ test_that("passing cell names works", { results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1), pseudocount.use = 0.1)) results.clr <- suppressWarnings(FindMarkers(object = clr.obj, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1), pseudocount.use = 0.1)) -results.sct <- suppressWarnings(FindMarkers(object = sct.obj, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1), pseudocount.use = 0.1)) +results.sct <- suppressWarnings(FindMarkers(object = sct.obj, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1), pseudocount.use = 0.1, vst.flavor = "v1")) test_that("setting pseudocount.use works", { expect_equal(nrow(x = results), 222) expect_equal(results[1, "avg_logFC"], -2.630395, tolerance = 1e-6) expect_equal(nrow(x = results.clr), 212) expect_equal(results.clr[1, "avg_logFC"], -2.317338, tolerance = 1e-6) - expect_equal(nrow(results.sct), 215) - expect_equal(results.sct[1, "avg_logFC"], -2.421716, tolerance = 1e-6) + expect_equal(nrow(results.sct), 214) + expect_equal(results.sct[1, "avg_logFC"], -2.25392, tolerance = 1e-6) }) results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1), pseudocount.use = 1, mean.fxn = rowMeans)) results.clr <- suppressWarnings(FindMarkers(object = clr.obj, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1), pseudocount.use = 1, mean.fxn = rowMeans)) -results.sct <- suppressWarnings(FindMarkers(object = sct.obj, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1), pseudocount.use = 1,mean.fxn = rowMeans)) +results.sct <- suppressWarnings(FindMarkers(object = sct.obj, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1), pseudocount.use = 1, mean.fxn = rowMeans, vst.flaovr = "v1")) test_that("setting mean.fxn works", { expect_equal(nrow(x = results), 216) expect_equal(results[1, "avg_logFC"], -4.204346, tolerance = 1e-6) expect_equal(results.clr[1, "avg_logFC"], -1.353025, tolerance = 1e-6) - expect_equal(results.sct[1, "avg_logFC"], -2.021490, tolerance = 1e-6) + expect_equal(results.sct[1, "avg_logFC"], -1.064042, tolerance = 1e-6) }) results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, logfc.threshold = 2, verbose = FALSE, base = exp(1), pseudocount.use = 1)) @@ -342,8 +342,8 @@ test_that("BPCells FindMarkers gives same results", { # Tests for FindAllMarkers # ------------------------------------------------------------------------------- results <- suppressMessages(suppressWarnings(FindAllMarkers(object = pbmc_small,pseudocount.use=1))) -results.clr <- suppressMessages(suppressWarnings(FindAllMarkers(object = clr.obj,pseudocount.use=1))) -results.sct <- suppressMessages(suppressWarnings(FindAllMarkers(object = sct.obj,pseudocount.use=1))) +results.clr <- suppressMessages(suppressWarnings(FindAllMarkers(object = clr.obj, pseudocount.use=1))) +results.sct <- suppressMessages(suppressWarnings(FindAllMarkers(object = sct.obj, pseudocount.use=1, vst.flavor = "v1"))) results.pseudo <- suppressMessages(suppressWarnings(FindAllMarkers(object = pbmc_small, pseudocount.use = 0.1))) test_that("FindAllMarkers works as expected", { @@ -366,12 +366,12 @@ test_that("FindAllMarkers works as expected", { expect_equal(rownames(x = results.clr)[1], "HLA-DPB1") # SCT normalization - expect_equal(results.sct[1, "p_val"], 1.366621e-12, tolerance = 1e-17) - expect_equal(results.sct[1, "avg_log2FC"], -2.848796, tolerance = 1e-6) - expect_equal(results.sct[1, "pct.1"], 0.111) + expect_equal(results.sct[1, "p_val"], 4.25861e-12, tolerance = 1e-17) + expect_equal(results.sct[1, "avg_log2FC"], -2.70188, tolerance = 1e-6) + expect_equal(results.sct[1, "pct.1"], 0.167) expect_equal(results.sct[1, "pct.2"], 0.909) - expect_equal(results.sct[1, "p_val_adj"], 3.006566e-10, tolerance = 1e-15) - expect_equal(nrow(x = results.sct), 204) + expect_equal(results.sct[1, "p_val_adj"], 9.368941e-10, tolerance = 1e-15) + expect_equal(nrow(x = results.sct), 212) expect_equal(rownames(x = results.sct)[1], "HLA-DPB1") # pseudocount.use = 0.1 From eb08c8e8042e09997caa4dc10d6cdcc5917a86fe Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Wed, 18 Oct 2023 20:56:40 -0400 Subject: [PATCH 903/979] Update tests to skip harmony if not installed --- tests/testthat/test_integratedata.R | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/tests/testthat/test_integratedata.R b/tests/testthat/test_integratedata.R index fe17ca37b..762dcb132 100644 --- a/tests/testthat/test_integratedata.R +++ b/tests/testthat/test_integratedata.R @@ -158,12 +158,7 @@ int_rpca <- suppressMessages(suppressWarnings(IntegrateLayers( k.weight=10, verbose = FALSE ))) -int_harmony <- suppressMessages(suppressWarnings(IntegrateLayers( - object = pbmc_small, method = HarmonyIntegration, - orig.reduction = "pca", new.reduction = "harmony", - k.weight=25, - verbose = FALSE -))) + # int_mnn <- suppressMessages(suppressWarnings(IntegrateLayers( # object = pbmc_small, method = FastMNNIntegration, # new.reduction = "integrated.mnn", @@ -175,13 +170,26 @@ int_harmony <- suppressMessages(suppressWarnings(IntegrateLayers( test_that("IntegrateLayers returns embeddings with correct dimensions ", { expect_equal(dim(int_cca[["integrated.cca"]]), c(80, 50)) expect_equal(dim(int_rpca[["integrated.rpca"]]), c(80, 50)) - expect_equal(dim(int_harmony[["harmony"]]), c(80, 50)) int_rpca expect_equal(int_cca[["integrated.cca"]]@assay.used, "RNAv5") #expect_equal(int_cca[['integrated.cca']]@cell.embeddings, c(3, 4, 5)) }) +test_that("IntegrateLayers works with harmony", { + skip_on_cran() + skip_if_not_installed("harmony") + int_harmony <- suppressMessages(suppressWarnings(IntegrateLayers( + object = pbmc_small, method = HarmonyIntegration, + orig.reduction = "pca", new.reduction = "harmony", + k.weight=25, + verbose = FALSE + ))) + expect_equal(dim(int_harmony[["harmony"]]), c(80, 50)) + + +}) + test_that("group.by ", { expect_equal(dim(int_cca[["integrated.cca"]]), c(80, 50)) expect_equal(int_cca[["integrated.cca"]]@assay.used, "RNAv5") From d1a4e67ded6d91c883ff3b939bedc4eacf197b0c Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Wed, 18 Oct 2023 20:57:24 -0400 Subject: [PATCH 904/979] Update DE tests for SCT v2 --- tests/testthat/test_preprocessing.R | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test_preprocessing.R b/tests/testthat/test_preprocessing.R index a80850bc6..e017263b0 100644 --- a/tests/testthat/test_preprocessing.R +++ b/tests/testthat/test_preprocessing.R @@ -395,8 +395,12 @@ test_that("SCTransform v1 works as expected", { expect_equal(fa["MS4A1", "residual_variance"], 2.875761, tolerance = 1e-6) }) -object <- suppressWarnings(SCTransform(object = object, verbose = FALSE, vst.flavor = "v2", seed.use = 1448145)) test_that("SCTransform v2 works as expected", { + skip_on_cran() + skip_if_not_installed("glmGamPoi") + + object <- suppressWarnings(SCTransform(object = object, verbose = FALSE, vst.flavor = "v2", seed.use = 1448145)) + expect_true("SCT" %in% names(object)) expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], layer = "scale.data"))[1]), 24.5183, tolerance = 1e-2) expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], layer = "scale.data"))[5]), 0) @@ -433,7 +437,7 @@ test_that("SCTransform ncells param works", { }) suppressWarnings(object[["SCT_SAVE"]] <- object[["SCT"]]) -object[["SCT"]] <- SetAssayData(object = object[["SCT"]], slot = "scale.data", new.data = GetAssayData(object = object[["SCT"]], layer = "scale.data")[1:100, ]) +object[["SCT"]] <- suppressWarnings({SetAssayData(object = object[["SCT"]], slot = "scale.data", new.data = GetAssayData(object = object[["SCT"]], layer = "scale.data")[1:100, ])}) object <- GetResidual(object = object, features = rownames(x = object), verbose = FALSE) test_that("GetResidual works", { expect_equal(dim(GetAssayData(object = object[["SCT"]], layer = "scale.data")), c(220, 80)) @@ -444,9 +448,12 @@ test_that("GetResidual works", { expect_warning(GetResidual(object, features = "asd")) }) -object <- suppressWarnings(SCTransform(object = object, verbose = FALSE, vst.flavor = "v2", seed.use = 1448145)) test_that("SCTransform v2 works as expected", { + skip_on_cran() + skip_if_not_installed("glmGamPoi") + object <- suppressWarnings(SCTransform(object = object, verbose = FALSE, vst.flavor = "v2", seed.use = 1448145)) + expect_true("SCT" %in% names(object)) expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], layer = "scale.data"))[1]), 24.5813, tolerance = 1e-4) expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], layer = "scale.data"))[5]), 0) @@ -466,10 +473,15 @@ test_that("SCTransform v2 works as expected", { test_that("SCTransform is equivalent for BPcells ", { skip_on_cran() + skip_on_cran() + skip_if_not_installed("glmGamPoi") + library(Matrix) library(BPCells) mat_bpcells <- t(as(t(object[['RNA']]$counts ), "IterableMatrix")) object[['RNAbp']] <- CreateAssay5Object(counts = mat_bpcells) + object <- suppressWarnings(SCTransform(object = object, assay = "RNA", new.assay.name = "SCT", + verbose = FALSE, vst.flavor = "v2", seed.use = 1448145)) object <- suppressWarnings(SCTransform(object = object, assay = "RNAbp", new.assay.name = "SCTbp", verbose = FALSE, vst.flavor = "v2", seed.use = 1448145)) From 9f394304ada4199c9530b25c967680d506c74154 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Wed, 18 Oct 2023 20:57:44 -0400 Subject: [PATCH 905/979] Documenation updates --- man/reexports.Rd | 4 ++-- src/RcppExports.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/man/reexports.Rd b/man/reexports.Rd index 08ced67a3..fa7258d42 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -71,8 +71,8 @@ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ - \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}} - \item{generics}{\code{\link[generics]{components}}} + + \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}} }} diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 7a3302c6b..540e5c2d8 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -402,7 +402,7 @@ BEGIN_RCPP END_RCPP } -RcppExport SEXP isnull(SEXP); +RcppExport SEXP isnull(void *); static const R_CallMethodDef CallEntries[] = { {"_Seurat_RunModularityClusteringCpp", (DL_FUNC) &_Seurat_RunModularityClusteringCpp, 9}, From 1a465978d7cd3a7cb77c02d351c52427a4504fce Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Wed, 18 Oct 2023 21:02:00 -0400 Subject: [PATCH 906/979] Bump version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 075781daf..4dae2ff36 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 4.9.9.9080 +Version: 4.9.9.9081 Date: 2023-10-18 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. From 8e49cce932c107e66586c03be86c55d677593c65 Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Wed, 18 Oct 2023 11:54:48 -0400 Subject: [PATCH 907/979] add PseudobulkExpression.Seurat that both AverageExpression and AggregateExpression call --- R/utilities.R | 432 ++++++++++++++++++++----------------- man/AggregateExpression.Rd | 2 +- man/AverageExpression.Rd | 18 +- 3 files changed, 238 insertions(+), 214 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index a235a200a..c80d1cc29 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -368,23 +368,21 @@ AggregateExpression <- function( verbose = TRUE, ... ) { - return( - AverageExpression( - object = object, - method = 'aggregate', - assays = assays, - features = features, - return.seurat = return.seurat, - group.by = group.by, - add.ident = add.ident, - layer = "counts", - normalization.method = normalization.method, - scale.factor = scale.factor, - margin = margin, - verbose = verbose, - ... - ) - ) + return(PseudobulkExpression( + object = object, + assays = assays, + features = features, + return.seurat = return.seurat, + group.by = group.by, + add.ident = add.ident, + layer = 'counts', + method = 'aggregate', + normalization.method = normalization.method, + scale.factor = scale.factor, + margin = margin, + verbose = verbose, + ... + )) } #' Averaged feature expression by identity class @@ -396,11 +394,11 @@ AggregateExpression <- function( #' so that averaging is done in non-log space. Otherwise, if layer is set to #' either 'counts' or 'scale.data', no exponentiation is performed prior to averaging. #' If \code{return.seurat = TRUE} and layer is not 'scale.data', averaged values -#' are placed in the 'counts' layer of the returned object and \code{\link{NormalizeData}} +#' are placed in the 'counts' layer of the returned object and 'log1p' #' is run on the averaged counts and placed in the 'data' layer \code{\link{ScaleData}} #' is then run on the default assay before returning the object. #' If \code{return.seurat = TRUE} and slot is 'scale.data', the 'counts' layer contains -#' average counts and 'scale.data' is set to the averaged values. +#' average counts and 'scale.data' is set to the averaged values of 'scale.data'. #' #' @param object Seurat object #' @param assays Which assays to use. Default is all assays @@ -412,10 +410,6 @@ AggregateExpression <- function( #' @param layer Layer(s) to use; if multiple layers are given, assumed to follow #' the order of 'assays' (if specified) or object's assays #' @param slot (Deprecated). Slots(s) to use -#' @param normalization.method Method for normalization, see \code{\link{NormalizeData}} -#' @param scale.factor Scale factor for normalization, see \code{\link{NormalizeData}} -#' @param margin Margin to perform CLR normalization, see \code{\link{NormalizeData}} -#' @param method Method of collapsing expression values. Either 'average' or 'aggregate' #' @param verbose Print messages and show progress bar #' @param ... Arguments to be passed to methods such as \code{\link{CreateSeuratObject}} #' @@ -439,184 +433,22 @@ AverageExpression <- function( add.ident = NULL, layer = 'data', slot = deprecated(), - method = 'average', - normalization.method = "LogNormalize", - scale.factor = 10000, - margin = 1, verbose = TRUE, ... ) { - CheckDots(..., fxns = 'CreateSeuratObject') - if (!is.null(x = add.ident)) { - .Deprecated(msg = "'add.ident' is a deprecated argument. Please see documentation to see how to pass a vector to the 'group.by' argument to specify multiple grouping variables") - group.by <- c('ident', add.ident) - } - if (!(method %in% c('average', 'aggregate'))) { - stop("'method' must be either 'average' or 'aggregate'") - } - if (is_present(arg = slot)) { - f <- if (.IsFutureSeurat(version = '5.1.0')) { - deprecate_stop - } else if (.IsFutureSeurat(version = '5.0.0')) { - deprecate_warn - } else { - deprecate_soft - } - f( - when = '5.0.0', - what = 'AverageExpression(slot = )', - with = 'AverageExpression(layer = )' - ) - layer <- slot - } - - if (method =="average") { - inform(message = "As of Seurat v5, we recommend using AggregateExpression to perform pseudo-bulk analysis.", - .frequency = "once", - .frequency_id = "AverageExpression") - } - - object.assays <- .FilterObjects(object = object, classes.keep = c('Assay', 'Assay5')) - assays <- assays %||% object.assays - if (!all(assays %in% object.assays)) { - assays <- assays[assays %in% object.assays] - if (length(x = assays) == 0) { - stop("None of the requested assays are present in the object") - } else { - warning("Requested assays that do not exist in object. Proceeding with existing assays only.") - } - } - if (length(x = layer) == 1) { - layer <- rep_len(x = layer, length.out = length(x = assays)) - } else if (length(x = layer) != length(x = assays)) { - stop("Number of layers provided does not match number of assays") - } - data <- FetchData(object = object, vars = rev(x = group.by)) - data <- data[which(rowSums(x = is.na(x = data)) == 0), , drop = F] - if (nrow(x = data) < ncol(x = object)) { - message("Removing cells with NA for 1 or more grouping variables") - object <- subset(x = object, cells = rownames(x = data)) - } - for (i in 1:ncol(x = data)) { - data[, i] <- as.factor(x = data[, i]) - } - num.levels <- sapply( - X = 1:ncol(x = data), - FUN = function(i) { - length(x = levels(x = data[, i])) - } - ) - if (any(num.levels == 1)) { - message(paste0("The following grouping variables have 1 value and will be ignored: ", - paste0(colnames(x = data)[which(num.levels <= 1)], collapse = ", "))) - group.by <- colnames(x = data)[which(num.levels > 1)] - data <- data[, which(num.levels > 1), drop = F] - } - category.matrix <- CreateCategoryMatrix(labels = data, method = method) - data.return <- list() - for (i in 1:length(x = assays)) { - if (inherits(x = features, what = "list")) { - features.i <- features[[i]] - } else { - features.i <- features - } - data.return[[assays[i]]] <- PseudobulkExpression( - object = object[[assays[i]]], - assay = assays[i], - category.matrix = category.matrix, - features = features.i, - layer = layer[i], - verbose = verbose, - ... - ) - } - if (return.seurat) { - op <- options(Seurat.object.assay.version = "v5", Seurat.object.assay.calcn = FALSE) - on.exit(expr = options(op), add = TRUE) - if (layer[1] == 'scale.data') { - na.matrix <- as.matrix(x = data.return[[1]]) - na.matrix[1:length(x = na.matrix)] <- NA - #sum up counts to make seurat object - summed.counts <- PseudobulkExpression( - object = object[[assays[1]]], - assay = assays[1], - category.matrix = category.matrix, - features = features[[1]], - layer = "counts" - ) - toRet <- CreateSeuratObject( - counts = summed.counts, - project = if (method == "average") "Average" else "Aggregate", - assay = names(x = data.return)[1], - ... - ) - LayerData(object = toRet, - layer = "scale.data", - assay = names(x = data.return)[1]) <- data.return[[1]] - } else { - toRet <- CreateSeuratObject( - counts = data.return[[1]], - project = if (method == "average") "Average" else "Aggregate", - assay = names(x = data.return)[1], - ... - ) - LayerData(object = toRet, - layer = "data", - assay = names(x = data.return)[1]) <- NormalizeData(as.matrix(x = data.return[[1]]), - normalization.method = normalization.method, - scale.factor = scale.factor, - margin = margin, - block.size = block.size, - verbose = verbose) - } - #for multimodal data - if (length(x = data.return) > 1) { - for (i in 2:length(x = data.return)) { - if (layer[i] == 'scale.data') { - summed.counts <- PseudobulkExpression( - object = object[[assays[i]]], - assay = assays[i], - category.matrix = category.matrix, - features = features[[i]], - slot = "counts" - ) - toRet[[names(x = data.return)[i]]] <- CreateAssayObject(counts = summed.counts) - LayerData(object = toRet, - layer = "scale.data", - assay = names(x = data.return)[i]) <- data.return[[i]] - } else { - toRet[[names(x = data.return)[i]]] <- CreateAssayObject(counts = data.return[[i]], check.matrix = FALSE) - LayerData(object = toRet, - layer = "data", - assay = names(x = data.return)[i]) <- NormalizeData(as.matrix(x = data.return[[i]]), - normalization.method = normalization.method, - scale.factor = scale.factor, - margin = margin, - block.size = block.size, - verbose = verbose) - } - } - } - if (DefaultAssay(object = object) %in% names(x = data.return)) { - DefaultAssay(object = toRet) <- DefaultAssay(object = object) - if (layer[which(DefaultAssay(object = object) %in% names(x = data.return))[1]] != 'scale.data') { - toRet <- ScaleData(object = toRet, verbose = verbose) - } - } - if ('ident' %in% group.by) { - first.cells <- sapply(X = 1:ncol(x = category.matrix), - FUN = function(x) { - return(category.matrix[,x, drop = FALSE ]@i[1] + 1) - } - ) - Idents(object = toRet, - cells = colnames(x = toRet) - ) <- Idents(object = object)[first.cells] - } - return(toRet) - } else { - return(data.return) - } + return(PseudobulkExpression( + object = object, + assays = assays, + features = features, + return.seurat = return.seurat, + group.by = group.by, + add.ident = add.ident, + layer = layer, + slot = slot, + method = 'average', + verbose = verbose, + ... + )) } #' Match the case of character vectors @@ -1521,6 +1353,210 @@ PseudobulkExpression.StdAssay <- function( return(data.return) } + +#' @method PseudobulkExpression Seurat +#' @importFrom SeuratObject .IsFutureSeurat +# +PseudobulkExpression.Seurat <- function( + object, + assays = NULL, + features = NULL, + return.seurat = FALSE, + group.by = 'ident', + add.ident = NULL, + layer = 'data', + slot = deprecated(), + method = 'average', + normalization.method = "LogNormalize", + scale.factor = 10000, + margin = 1, + verbose = TRUE, + ... +) { + CheckDots(..., fxns = 'CreateSeuratObject') + if (!is.null(x = add.ident)) { + .Deprecated(msg = "'add.ident' is a deprecated argument. Please see documentation to see how to pass a vector to the 'group.by' argument to specify multiple grouping variables") + group.by <- c('ident', add.ident) + } + if (!(method %in% c('average', 'aggregate'))) { + stop("'method' must be either 'average' or 'aggregate'") + } + if (is_present(arg = slot)) { + f <- if (.IsFutureSeurat(version = '5.1.0')) { + deprecate_stop + } else if (.IsFutureSeurat(version = '5.0.0')) { + deprecate_warn + } else { + deprecate_soft + } + f( + when = '5.0.0', + what = 'AverageExpression(slot = )', + with = 'AverageExpression(layer = )' + ) + layer <- slot + } + + if (method =="average") { + inform(message = "As of Seurat v5, we recommend using AggregateExpression to perform pseudo-bulk analysis.", + .frequency = "once", + .frequency_id = "AverageExpression") + } + + object.assays <- .FilterObjects(object = object, classes.keep = c('Assay', 'Assay5')) + assays <- assays %||% object.assays + if (!all(assays %in% object.assays)) { + assays <- assays[assays %in% object.assays] + if (length(x = assays) == 0) { + stop("None of the requested assays are present in the object") + } else { + warning("Requested assays that do not exist in object. Proceeding with existing assays only.") + } + } + if (length(x = layer) == 1) { + layer <- rep_len(x = layer, length.out = length(x = assays)) + } else if (length(x = layer) != length(x = assays)) { + stop("Number of layers provided does not match number of assays") + } + data <- FetchData(object = object, vars = rev(x = group.by)) + data <- data[which(rowSums(x = is.na(x = data)) == 0), , drop = F] + if (nrow(x = data) < ncol(x = object)) { + message("Removing cells with NA for 1 or more grouping variables") + object <- subset(x = object, cells = rownames(x = data)) + } + for (i in 1:ncol(x = data)) { + data[, i] <- as.factor(x = data[, i]) + } + num.levels <- sapply( + X = 1:ncol(x = data), + FUN = function(i) { + length(x = levels(x = data[, i])) + } + ) + if (any(num.levels == 1)) { + message(paste0("The following grouping variables have 1 value and will be ignored: ", + paste0(colnames(x = data)[which(num.levels <= 1)], collapse = ", "))) + group.by <- colnames(x = data)[which(num.levels > 1)] + data <- data[, which(num.levels > 1), drop = F] + } + category.matrix <- CreateCategoryMatrix(labels = data, method = method) + data.return <- list() + for (i in 1:length(x = assays)) { + if (inherits(x = features, what = "list")) { + features.i <- features[[i]] + } else { + features.i <- features + } + data.return[[assays[i]]] <- PseudobulkExpression( + object = object[[assays[i]]], + assay = assays[i], + category.matrix = category.matrix, + features = features.i, + layer = layer[i], + verbose = verbose, + ... + ) + } + if (return.seurat) { + op <- options(Seurat.object.assay.version = "v5", Seurat.object.assay.calcn = FALSE) + on.exit(expr = options(op), add = TRUE) + if (layer[1] == 'scale.data') { + na.matrix <- as.matrix(x = data.return[[1]]) + na.matrix[1:length(x = na.matrix)] <- NA + #sum up counts to make seurat object + summed.counts <- PseudobulkExpression( + object = object[[assays[1]]], + assay = assays[1], + category.matrix = category.matrix, + features = features[[1]], + layer = "counts" + ) + toRet <- CreateSeuratObject( + counts = summed.counts, + project = if (method == "average") "Average" else "Aggregate", + assay = names(x = data.return)[1], + ... + ) + LayerData(object = toRet, + layer = "scale.data", + assay = names(x = data.return)[1]) <- data.return[[1]] + } else { + toRet <- CreateSeuratObject( + counts = data.return[[1]], + project = if (method == "average") "Average" else "Aggregate", + assay = names(x = data.return)[1], + ... + ) + if (method == "aggregate") { + LayerData(object = toRet, + layer = "data", + assay = names(x = data.return)[1]) <- NormalizeData(as.matrix(x = data.return[[1]]), + normalization.method = normalization.method, + verbose = verbose) + } + else { + LayerData(object = toRet, + layer = "data", + assay = names(x = data.return)[1]) <- log1p(x = as.matrix(x = data.return[[1]])) + } + } + #for multimodal data + if (length(x = data.return) > 1) { + for (i in 2:length(x = data.return)) { + if (layer[i] == 'scale.data') { + summed.counts <- PseudobulkExpression( + object = object[[assays[i]]], + assay = assays[i], + category.matrix = category.matrix, + features = features[[i]], + slot = "counts" + ) + toRet[[names(x = data.return)[i]]] <- CreateAssayObject(counts = summed.counts) + LayerData(object = toRet, + layer = "scale.data", + assay = names(x = data.return)[i]) <- data.return[[i]] + } else { + toRet[[names(x = data.return)[i]]] <- CreateAssayObject(counts = data.return[[i]], check.matrix = FALSE) + if (method == "aggregate") { + LayerData(object = toRet, + layer = "data", + assay = names(x = data.return)[i]) <- NormalizeData(as.matrix(x = data.return[[i]]), + normalization.method = normalization.method, + scale.factor = scale.factor, + margin = margin, + block.size = block.size, + verbose = verbose) + } + else { + LayerData(object = toRet, + layer = "data", + assay = names(x = data.return)[i]) <- log1p(x = as.matrix(x = data.return[[i]])) + } + } + } + } + if (DefaultAssay(object = object) %in% names(x = data.return)) { + DefaultAssay(object = toRet) <- DefaultAssay(object = object) + if (layer[which(DefaultAssay(object = object) %in% names(x = data.return))[1]] != 'scale.data') { + toRet <- ScaleData(object = toRet, verbose = verbose) + } + } + if ('ident' %in% group.by) { + first.cells <- sapply(X = 1:ncol(x = category.matrix), + FUN = function(x) { + return(category.matrix[,x, drop = FALSE ]@i[1] + 1) + } + ) + Idents(object = toRet, + cells = colnames(x = toRet) + ) <- Idents(object = object)[first.cells] + } + return(toRet) + } else { + return(data.return) + } +} + #' Regroup idents based on meta.data info #' #' For cells in each ident, set a new identity based on the most common value diff --git a/man/AggregateExpression.Rd b/man/AggregateExpression.Rd index 72f219fbb..ddd728459 100644 --- a/man/AggregateExpression.Rd +++ b/man/AggregateExpression.Rd @@ -27,7 +27,7 @@ AggregateExpression( \item{return.seurat}{Whether to return the data as a Seurat object. Default is FALSE} -\item{group.by}{Category (or vector of categories) for grouping (e.g, ident, replicate, celltype); 'ident' by default. +\item{group.by}{Category (or vector of categories) for grouping (e.g, ident, replicate, celltype); 'ident' by default To use multiple categories, specify a vector, such as c('ident', 'replicate', 'celltype')} \item{add.ident}{(Deprecated). Place an additional label on each cell prior to pseudobulking} diff --git a/man/AverageExpression.Rd b/man/AverageExpression.Rd index bd9ecc177..531cd7ac5 100644 --- a/man/AverageExpression.Rd +++ b/man/AverageExpression.Rd @@ -13,10 +13,6 @@ AverageExpression( add.ident = NULL, layer = "data", slot = deprecated(), - method = "average", - normalization.method = "LogNormalize", - scale.factor = 10000, - margin = 1, verbose = TRUE, ... ) @@ -30,7 +26,7 @@ AverageExpression( \item{return.seurat}{Whether to return the data as a Seurat object. Default is FALSE} -\item{group.by}{Category (or vector of categories) for grouping (e.g, ident, replicate, celltype); 'ident' by default. +\item{group.by}{Category (or vector of categories) for grouping (e.g, ident, replicate, celltype); 'ident' by default To use multiple categories, specify a vector, such as c('ident', 'replicate', 'celltype')} \item{add.ident}{(Deprecated). Place an additional label on each cell prior to pseudobulking} @@ -40,14 +36,6 @@ the order of 'assays' (if specified) or object's assays} \item{slot}{(Deprecated). Slots(s) to use} -\item{method}{Method of collapsing expression values. Either 'average' or 'aggregate'} - -\item{normalization.method}{Method for normalization, see \code{\link{NormalizeData}}} - -\item{scale.factor}{Scale factor for normalization, see \code{\link{NormalizeData}}} - -\item{margin}{Margin to perform CLR normalization, see \code{\link{NormalizeData}}} - \item{verbose}{Print messages and show progress bar} \item{...}{Arguments to be passed to methods such as \code{\link{CreateSeuratObject}}} @@ -65,11 +53,11 @@ normalized and therefore feature values are exponentiated prior to averaging so that averaging is done in non-log space. Otherwise, if layer is set to either 'counts' or 'scale.data', no exponentiation is performed prior to averaging. If \code{return.seurat = TRUE} and layer is not 'scale.data', averaged values -are placed in the 'counts' layer of the returned object and \code{\link{NormalizeData}} +are placed in the 'counts' layer of the returned object and 'log1p' is run on the averaged counts and placed in the 'data' layer \code{\link{ScaleData}} is then run on the default assay before returning the object. If \code{return.seurat = TRUE} and slot is 'scale.data', the 'counts' layer contains -average counts and 'scale.data' is set to the averaged values. +average counts and 'scale.data' is set to the averaged values of 'scale.data'. } \examples{ data("pbmc_small") From 3d38315228f4bbb1533e29d8b796fb0639641eec Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Wed, 18 Oct 2023 12:00:15 -0400 Subject: [PATCH 908/979] slot to layer fix --- R/utilities.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/utilities.R b/R/utilities.R index c80d1cc29..bf3d438f1 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -1509,7 +1509,7 @@ PseudobulkExpression.Seurat <- function( assay = assays[i], category.matrix = category.matrix, features = features[[i]], - slot = "counts" + layer = "counts" ) toRet[[names(x = data.return)[i]]] <- CreateAssayObject(counts = summed.counts) LayerData(object = toRet, From c389a613d913cdd1831e9b8c9b7e184fc1f372c6 Mon Sep 17 00:00:00 2001 From: Izzy Grabski Date: Wed, 18 Oct 2023 13:35:53 -0400 Subject: [PATCH 909/979] fix for query.assay in TransferData --- R/integration.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/integration.R b/R/integration.R index a1139599d..799db0a0a 100644 --- a/R/integration.R +++ b/R/integration.R @@ -3307,7 +3307,9 @@ TransferData <- function( anchors <- slot(object = anchorset, name = "anchors") reference.cells <- slot(object = anchorset, name = "reference.cells") query.cells <- slot(object = anchorset, name = "query.cells") - query.assay <- query.assay %||% DefaultAssay(query) + if (!is.null(query)) { + query.assay <- query.assay %||% DefaultAssay(query) + } label.transfer <- list() ValidateParams_TransferData( anchorset = anchorset, From 1387c05b9254bb69335d8cdc51a4a53f371c20b2 Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Wed, 18 Oct 2023 14:42:40 -0400 Subject: [PATCH 910/979] fix naming of cells with AggregateExpression, add warning when replacing underscore with dashes --- R/utilities.R | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index bf3d438f1..5c2730fc3 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -1396,13 +1396,13 @@ PseudobulkExpression.Seurat <- function( ) layer <- slot } - + if (method =="average") { inform(message = "As of Seurat v5, we recommend using AggregateExpression to perform pseudo-bulk analysis.", .frequency = "once", .frequency_id = "AverageExpression") } - + object.assays <- .FilterObjects(object = object, classes.keep = c('Assay', 'Assay5')) assays <- assays %||% object.assays if (!all(assays %in% object.assays)) { @@ -2809,16 +2809,23 @@ CreateCategoryMatrix <- function( STATS = colsums, FUN = "/") } - colnames(x = category.matrix) <- gsub(pattern = '_', - replacement = '-', - x = colnames(x = category.matrix) - ) - colnames(x = category.matrix) <- sapply( + if (any(grepl(pattern = "_", x = colnames(x = category.matrix) ))) { + inform( + message = "Names of identity class contain underscores ('_'), replacing with dashes ('-')", + .frequency = "regularly", + .frequency_id = "CreateCategoryMatrix" + ) + colnames(x = category.matrix) <- gsub(pattern = '_', + replacement = '-', + x = colnames(x = category.matrix) + ) + } + colnames(x = category.matrix) <- unname(sapply( X = colnames(x = category.matrix), FUN = function(name) { name <- gsub(pattern = "data\\[, [1-9]*\\]", replacement = "", x = name) return(paste0(rev(x = unlist(x = strsplit(x = name, split = ":"))), collapse = "_")) - }) + })) rownames(category.matrix) <- cells.name return(category.matrix) } From 69b054b78f626ee86116a6b6a45f146e912c9b0c Mon Sep 17 00:00:00 2001 From: Longda Date: Wed, 18 Oct 2023 15:13:02 -0400 Subject: [PATCH 911/979] change +1 in FoldChange --- R/differential_expression.R | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/R/differential_expression.R b/R/differential_expression.R index 07218a9d0..d5d88d229 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -785,12 +785,14 @@ FindMarkers.SCTAssay <- function( ) # Default assumes the input is log1p(corrected counts) default.mean.fxn <- function(x) { - return(log(x = rowMeans(x = expm1(x = x)) + pseudocount.use, base = base)) + # return(log(x = rowMeans(x = expm1(x = x)) + pseudocount.use, base = base)) + return(log(x = (rowSums(x = expm1(x = x)) + 1)/NCOL(x), base = base)) } mean.fxn <- mean.fxn %||% switch( EXPR = slot, 'counts' = function(x) { - return(log(x = rowMeans(x = x) + pseudocount.use, base = base)) + # return(log(x = rowMeans(x = x) + pseudocount.use, base = base)) + return(log(x = (rowSums(x = x) + 1)/NCOL(x), base = base)) }, 'scale.data' = rowMeans, default.mean.fxn @@ -1120,6 +1122,7 @@ FoldChange.default <- function( #' when \code{slot} is \dQuote{\code{data}} #' #' @importFrom Matrix rowMeans +#' @importFrom Matrix rowSums #' @rdname FoldChange #' @concept differential_expression #' @export @@ -1140,11 +1143,13 @@ FoldChange.Assay <- function( data <- GetAssayData(object = object, slot = slot) # By default run as if LogNormalize is done log1pdata.mean.fxn <- function(x) { - return(log(x = rowMeans(x = expm1(x = x)) + pseudocount.use, base = base)) + # return(log(x = rowMeans(x = expm1(x = x)) + pseudocount.use, base = base)) + return(log(x = (rowSums(x = expm1(x = x)) + 1)/NCOL(x), base = base)) } scaledata.mean.fxn <- rowMeans counts.mean.fxn <- function(x) { - return(log(x = rowMeans(x = x) + pseudocount.use, base = base)) + # return(log(x = rowMeans(x = x) + pseudocount.use, base = base)) + return(log(x = (rowSums(x = x) + 1)/NCOL(x), base = base)) } if (!is.null(x = norm.method)) { # For anything apart from log normalization set to rowMeans @@ -1196,6 +1201,7 @@ FoldChange.Assay <- function( FoldChange.StdAssay <- FoldChange.Assay #' @importFrom Matrix rowMeans +#' @importFrom Matrix rowSums #' @rdname FoldChange #' @concept differential_expression #' @export @@ -1215,14 +1221,16 @@ FoldChange.SCTAssay <- function( pseudocount.use <- pseudocount.use %||% 1 data <- GetAssayData(object = object, slot = slot) default.mean.fxn <- function(x) { - return(log(x = rowMeans(x = expm1(x = x)) + pseudocount.use, base = base)) + # return(log(x = rowMeans(x = expm1(x = x)) + pseudocount.use, base = base)) + return(log(x = (rowSums(x = expm1(x = x)) + 1)/NCOL(x), base = base)) } mean.fxn <- mean.fxn %||% switch( EXPR = slot, 'data' = default.mean.fxn, 'scale.data' = rowMeans, 'counts' = function(x) { - return(log(x = rowMeans(x = x) + pseudocount.use, base = base)) + # return(log(x = rowMeans(x = x) + pseudocount.use, base = base)) + return(log(x = (rowSums(x = x) + 1)/NCOL(x), base = base)) }, default.mean.fxn ) From 4eca75d4e929349662ee3360372cb6a5f03d6074 Mon Sep 17 00:00:00 2001 From: Longda Date: Wed, 18 Oct 2023 15:44:07 -0400 Subject: [PATCH 912/979] pseudocount.use back to 1 --- R/differential_expression.R | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/R/differential_expression.R b/R/differential_expression.R index d5d88d229..9a5a0320b 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -476,7 +476,7 @@ FindConservedMarkers <- function( #' of the two groups, currently only used for poisson and negative binomial tests #' @param min.cells.group Minimum number of cells in one of the groups #' @param pseudocount.use Pseudocount to add to averaged expression values when -#' calculating logFC. 0.1 by default. +#' calculating logFC. 1 by default. #' @param fc.results data.frame from FoldChange #' @param densify Convert the sparse matrix to a dense form before running the DE test. This can provide speedups but might require higher memory; default is FALSE #' @@ -507,7 +507,7 @@ FindMarkers.default <- function( latent.vars = NULL, min.cells.feature = 3, min.cells.group = 3, - pseudocount.use = 0.1, + pseudocount.use = 1, fc.results = NULL, densify = FALSE, ... @@ -645,7 +645,7 @@ FindMarkers.Assay <- function( latent.vars = NULL, min.cells.feature = 3, min.cells.group = 3, - pseudocount.use = 0.1, + pseudocount.use = 1, mean.fxn = NULL, fc.name = NULL, base = 2, @@ -734,7 +734,7 @@ FindMarkers.SCTAssay <- function( latent.vars = NULL, min.cells.feature = 3, min.cells.group = 3, - pseudocount.use = 0.1, + pseudocount.use = 1, mean.fxn = NULL, fc.name = NULL, base = 2, @@ -786,13 +786,13 @@ FindMarkers.SCTAssay <- function( # Default assumes the input is log1p(corrected counts) default.mean.fxn <- function(x) { # return(log(x = rowMeans(x = expm1(x = x)) + pseudocount.use, base = base)) - return(log(x = (rowSums(x = expm1(x = x)) + 1)/NCOL(x), base = base)) + return(log(x = (rowSums(x = expm1(x = x)) + pseudocount.use)/NCOL(x), base = base)) } mean.fxn <- mean.fxn %||% switch( EXPR = slot, 'counts' = function(x) { # return(log(x = rowMeans(x = x) + pseudocount.use, base = base)) - return(log(x = (rowSums(x = x) + 1)/NCOL(x), base = base)) + return(log(x = (rowSums(x = x) + pseudocount.use)/NCOL(x), base = base)) }, 'scale.data' = rowMeans, default.mean.fxn @@ -857,7 +857,7 @@ FindMarkers.DimReduc <- function( latent.vars = NULL, min.cells.feature = 3, min.cells.group = 3, - pseudocount.use = 0.1, + pseudocount.use = 1, mean.fxn = rowMeans, fc.name = NULL, densify = FALSE, @@ -968,7 +968,7 @@ FindMarkers.Seurat <- function( reduction = NULL, features = NULL, logfc.threshold = 0.1, - pseudocount.use = 0.1, + pseudocount.use = 1, test.use = "wilcox", min.pct = 0.01, min.diff.pct = -Inf, @@ -1133,7 +1133,7 @@ FoldChange.Assay <- function( cells.2, features = NULL, slot = "data", - pseudocount.use = 0.1, + pseudocount.use = 1, fc.name = NULL, mean.fxn = NULL, base = 2, @@ -1144,12 +1144,12 @@ FoldChange.Assay <- function( # By default run as if LogNormalize is done log1pdata.mean.fxn <- function(x) { # return(log(x = rowMeans(x = expm1(x = x)) + pseudocount.use, base = base)) - return(log(x = (rowSums(x = expm1(x = x)) + 1)/NCOL(x), base = base)) + return(log(x = (rowSums(x = expm1(x = x)) + pseudocount.use)/NCOL(x), base = base)) } scaledata.mean.fxn <- rowMeans counts.mean.fxn <- function(x) { # return(log(x = rowMeans(x = x) + pseudocount.use, base = base)) - return(log(x = (rowSums(x = x) + 1)/NCOL(x), base = base)) + return(log(x = (rowSums(x = x) + pseudocount.use)/NCOL(x), base = base)) } if (!is.null(x = norm.method)) { # For anything apart from log normalization set to rowMeans @@ -1222,7 +1222,7 @@ FoldChange.SCTAssay <- function( data <- GetAssayData(object = object, slot = slot) default.mean.fxn <- function(x) { # return(log(x = rowMeans(x = expm1(x = x)) + pseudocount.use, base = base)) - return(log(x = (rowSums(x = expm1(x = x)) + 1)/NCOL(x), base = base)) + return(log(x = (rowSums(x = expm1(x = x)) + pseudocount.use)/NCOL(x), base = base)) } mean.fxn <- mean.fxn %||% switch( EXPR = slot, @@ -1230,7 +1230,7 @@ FoldChange.SCTAssay <- function( 'scale.data' = rowMeans, 'counts' = function(x) { # return(log(x = rowMeans(x = x) + pseudocount.use, base = base)) - return(log(x = (rowSums(x = x) + 1)/NCOL(x), base = base)) + return(log(x = (rowSums(x = x) + pseudocount.use)/NCOL(x), base = base)) }, default.mean.fxn ) @@ -1267,7 +1267,7 @@ FoldChange.DimReduc <- function( cells.2, features = NULL, slot = NULL, - pseudocount.use = 0.1, + pseudocount.use = 1, fc.name = NULL, mean.fxn = NULL, ... @@ -1319,7 +1319,7 @@ FoldChange.Seurat <- function( slot = 'data', reduction = NULL, features = NULL, - pseudocount.use = 0.1, + pseudocount.use = 1, mean.fxn = NULL, base = 2, fc.name = NULL, From 0615e80f40e0a08710b3611111f77ba95af9b957 Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Wed, 18 Oct 2023 15:53:05 -0400 Subject: [PATCH 913/979] fix tests --- tests/testthat/test_utilities.R | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/tests/testthat/test_utilities.R b/tests/testthat/test_utilities.R index c0946c599..37838ee25 100644 --- a/tests/testthat/test_utilities.R +++ b/tests/testthat/test_utilities.R @@ -80,23 +80,19 @@ test_that("AverageExpression with return.seurat", { expect_equal(unname(as.matrix(LayerData(avg.counts[["RNA"]], layer = "counts"))), unname(as.matrix(avg.counts.mat))) avg.data <- LayerData(avg.counts[["RNA"]], layer = "data") + #test that data returned is log1p of average counts expect_equivalent( - as.matrix(NormalizeData(avg.counts.mat)), + as.matrix(log1p(avg.counts.mat)), as.matrix(avg.data), tolerance = 1e-6 ) + #test that scale.data returned is scaled data avg.scale <- LayerData(avg.counts[["RNA"]], layer = "scale.data") expect_equal( - avg.scale['MS4A1', ], - c(a = -0.8141426, b = 1.1162108, c = -0.3020683), - tolerance = 1e-6 - ) - expect_equal( - avg.scale['SPON2', ], - c(a = 0.3387626, b = 0.7866155, c = -1.1253781), + avg.scale, + ScaleData(avg.counts)[['RNA']]$scale.data, tolerance = 1e-6 ) - # data avg.data <- AverageExpression(object, layer = "data", return.seurat = TRUE, verbose = FALSE) expect_s4_class(object = avg.data, "Seurat") @@ -116,7 +112,6 @@ test_that("AverageExpression with return.seurat", { c(a = 0.1213127, b = 0.9338096, c = -1.0551222), tolerance = 1e-6 ) - # scale.data object <- ScaleData(object = object, verbose = FALSE) avg.scale <- AverageExpression(object, layer = "scale.data", return.seurat = TRUE, verbose = FALSE) From 018efe943fe981490b7a7664e4f13ff08be5fe41 Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Wed, 18 Oct 2023 16:55:47 -0400 Subject: [PATCH 914/979] formatting fixes --- R/utilities.R | 95 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 60 insertions(+), 35 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index 5c2730fc3..8cf85edd1 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -1397,10 +1397,12 @@ PseudobulkExpression.Seurat <- function( layer <- slot } - if (method =="average") { - inform(message = "As of Seurat v5, we recommend using AggregateExpression to perform pseudo-bulk analysis.", - .frequency = "once", - .frequency_id = "AverageExpression") + if (method == "average") { + inform( + message = "As of Seurat v5, we recommend using AggregateExpression to perform pseudo-bulk analysis.", + .frequency = "once", + .frequency_id = "AverageExpression" + ) } object.assays <- .FilterObjects(object = object, classes.keep = c('Assay', 'Assay5')) @@ -1421,7 +1423,7 @@ PseudobulkExpression.Seurat <- function( data <- FetchData(object = object, vars = rev(x = group.by)) data <- data[which(rowSums(x = is.na(x = data)) == 0), , drop = F] if (nrow(x = data) < ncol(x = object)) { - message("Removing cells with NA for 1 or more grouping variables") + inform("Removing cells with NA for 1 or more grouping variables") object <- subset(x = object, cells = rownames(x = data)) } for (i in 1:ncol(x = data)) { @@ -1434,8 +1436,12 @@ PseudobulkExpression.Seurat <- function( } ) if (any(num.levels == 1)) { - message(paste0("The following grouping variables have 1 value and will be ignored: ", - paste0(colnames(x = data)[which(num.levels <= 1)], collapse = ", "))) + message( + paste0( + "The following grouping variables have 1 value and will be ignored: ", + paste0(colnames(x = data)[which(num.levels <= 1)], collapse = ", ") + ) + ) group.by <- colnames(x = data)[which(num.levels > 1)] data <- data[, which(num.levels > 1), drop = F] } @@ -1477,9 +1483,11 @@ PseudobulkExpression.Seurat <- function( assay = names(x = data.return)[1], ... ) - LayerData(object = toRet, - layer = "scale.data", - assay = names(x = data.return)[1]) <- data.return[[1]] + LayerData( + object = toRet, + layer = "scale.data", + assay = names(x = data.return)[i] + ) <- data.return[[1]] } else { toRet <- CreateSeuratObject( counts = data.return[[1]], @@ -1488,16 +1496,21 @@ PseudobulkExpression.Seurat <- function( ... ) if (method == "aggregate") { - LayerData(object = toRet, - layer = "data", - assay = names(x = data.return)[1]) <- NormalizeData(as.matrix(x = data.return[[1]]), - normalization.method = normalization.method, - verbose = verbose) + LayerData( + object = toRet, + layer = "data", + assay = names(x = data.return)[1] + ) <- NormalizeData( + as.matrix(x = data.return[[1]]), + normalization.method = normalization.method, + verbose = verbose + ) } else { LayerData(object = toRet, layer = "data", - assay = names(x = data.return)[1]) <- log1p(x = as.matrix(x = data.return[[1]])) + assay = names(x = data.return)[1] + ) <- log1p(x = as.matrix(x = data.return[[1]])) } } #for multimodal data @@ -1512,25 +1525,36 @@ PseudobulkExpression.Seurat <- function( layer = "counts" ) toRet[[names(x = data.return)[i]]] <- CreateAssayObject(counts = summed.counts) - LayerData(object = toRet, - layer = "scale.data", - assay = names(x = data.return)[i]) <- data.return[[i]] + LayerData( + object = toRet, + layer = "scale.data", + assay = names(x = data.return)[i] + ) <- data.return[[i]] } else { - toRet[[names(x = data.return)[i]]] <- CreateAssayObject(counts = data.return[[i]], check.matrix = FALSE) + toRet[[names(x = data.return)[i]]] <- CreateAssayObject( + counts = data.return[[i]], + check.matrix = FALSE + ) if (method == "aggregate") { - LayerData(object = toRet, - layer = "data", - assay = names(x = data.return)[i]) <- NormalizeData(as.matrix(x = data.return[[i]]), - normalization.method = normalization.method, - scale.factor = scale.factor, - margin = margin, - block.size = block.size, - verbose = verbose) + LayerData( + object = toRet, + layer = "data", + assay = names(x = data.return)[i] + ) <- NormalizeData( + as.matrix(x = data.return[[i]]), + normalization.method = normalization.method, + scale.factor = scale.factor, + margin = margin, + block.size = block.size, + verbose = verbose + ) } else { - LayerData(object = toRet, - layer = "data", - assay = names(x = data.return)[i]) <- log1p(x = as.matrix(x = data.return[[i]])) + LayerData( + object = toRet, + layer = "data", + assay = names(x = data.return)[i] + ) <- log1p(x = as.matrix(x = data.return[[i]])) } } } @@ -1542,10 +1566,11 @@ PseudobulkExpression.Seurat <- function( } } if ('ident' %in% group.by) { - first.cells <- sapply(X = 1:ncol(x = category.matrix), - FUN = function(x) { - return(category.matrix[,x, drop = FALSE ]@i[1] + 1) - } + first.cells <- sapply( + X = 1:ncol(x = category.matrix), + FUN = function(x) { + return(category.matrix[,x, drop = FALSE ]@i[1] + 1) + } ) Idents(object = toRet, cells = colnames(x = toRet) From 71aea2e3ffbd4ccd394a8da5d66795fb2181cc5c Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Wed, 18 Oct 2023 17:12:19 -0400 Subject: [PATCH 915/979] fix indents --- R/preprocessing.R | 12 ++++++---- R/utilities.R | 60 +++++++++++++++++++++++++---------------------- 2 files changed, 39 insertions(+), 33 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index d545ac9ca..794700702 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -249,11 +249,13 @@ HTODemux <- function( ) #average hto signals per cluster #work around so we don't average all the RNA levels which takes time - average.expression <- suppressWarnings(AverageExpression( - object = object, - assays = assay, - verbose = FALSE - )[[assay]]) + average.expression <- suppressWarnings( + AverageExpression( + object = object, + assays = assay, + verbose = FALSE + )[[assay]] + ) #checking for any cluster with all zero counts for any barcode if (sum(average.expression == 0) > 0) { stop("Cells with zero counts exist as a cluster.") diff --git a/R/utilities.R b/R/utilities.R index 8cf85edd1..e9d1b7b35 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -368,21 +368,23 @@ AggregateExpression <- function( verbose = TRUE, ... ) { - return(PseudobulkExpression( - object = object, - assays = assays, - features = features, - return.seurat = return.seurat, - group.by = group.by, - add.ident = add.ident, - layer = 'counts', - method = 'aggregate', - normalization.method = normalization.method, - scale.factor = scale.factor, - margin = margin, - verbose = verbose, - ... - )) + return( + PseudobulkExpression( + object = object, + assays = assays, + features = features, + return.seurat = return.seurat, + group.by = group.by, + add.ident = add.ident, + layer = 'counts', + method = 'aggregate', + normalization.method = normalization.method, + scale.factor = scale.factor, + margin = margin, + verbose = verbose, + ... + ) + ) } #' Averaged feature expression by identity class @@ -436,19 +438,21 @@ AverageExpression <- function( verbose = TRUE, ... ) { - return(PseudobulkExpression( - object = object, - assays = assays, - features = features, - return.seurat = return.seurat, - group.by = group.by, - add.ident = add.ident, - layer = layer, - slot = slot, - method = 'average', - verbose = verbose, - ... - )) + return( + PseudobulkExpression( + object = object, + assays = assays, + features = features, + return.seurat = return.seurat, + group.by = group.by, + add.ident = add.ident, + layer = layer, + slot = slot, + method = 'average', + verbose = verbose, + ... + ) + ) } #' Match the case of character vectors From 53b42c3d2e1eaf4b3749bb7947305194ea83ad44 Mon Sep 17 00:00:00 2001 From: Longda Date: Wed, 18 Oct 2023 17:16:57 -0400 Subject: [PATCH 916/979] update documentations --- man/FindMarkers.Rd | 12 ++++++------ man/FoldChange.Rd | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/man/FindMarkers.Rd b/man/FindMarkers.Rd index 74061e26b..9f3c1fac1 100644 --- a/man/FindMarkers.Rd +++ b/man/FindMarkers.Rd @@ -30,7 +30,7 @@ FindMarkers(object, ...) latent.vars = NULL, min.cells.feature = 3, min.cells.group = 3, - pseudocount.use = 0.1, + pseudocount.use = 1, fc.results = NULL, densify = FALSE, ... @@ -53,7 +53,7 @@ FindMarkers(object, ...) latent.vars = NULL, min.cells.feature = 3, min.cells.group = 3, - pseudocount.use = 0.1, + pseudocount.use = 1, mean.fxn = NULL, fc.name = NULL, base = 2, @@ -79,7 +79,7 @@ FindMarkers(object, ...) latent.vars = NULL, min.cells.feature = 3, min.cells.group = 3, - pseudocount.use = 0.1, + pseudocount.use = 1, mean.fxn = NULL, fc.name = NULL, base = 2, @@ -104,7 +104,7 @@ FindMarkers(object, ...) latent.vars = NULL, min.cells.feature = 3, min.cells.group = 3, - pseudocount.use = 0.1, + pseudocount.use = 1, mean.fxn = rowMeans, fc.name = NULL, densify = FALSE, @@ -122,7 +122,7 @@ FindMarkers(object, ...) reduction = NULL, features = NULL, logfc.threshold = 0.1, - pseudocount.use = 0.1, + pseudocount.use = 1, test.use = "wilcox", min.pct = 0.01, min.diff.pct = -Inf, @@ -230,7 +230,7 @@ of the two groups, currently only used for poisson and negative binomial tests} \item{min.cells.group}{Minimum number of cells in one of the groups} \item{pseudocount.use}{Pseudocount to add to averaged expression values when -calculating logFC. 0.1 by default.} +calculating logFC. 1 by default.} \item{fc.results}{data.frame from FoldChange} diff --git a/man/FoldChange.Rd b/man/FoldChange.Rd index ec56378c1..cbdff2579 100644 --- a/man/FoldChange.Rd +++ b/man/FoldChange.Rd @@ -19,7 +19,7 @@ FoldChange(object, ...) cells.2, features = NULL, slot = "data", - pseudocount.use = 0.1, + pseudocount.use = 1, fc.name = NULL, mean.fxn = NULL, base = 2, @@ -46,7 +46,7 @@ FoldChange(object, ...) cells.2, features = NULL, slot = NULL, - pseudocount.use = 0.1, + pseudocount.use = 1, fc.name = NULL, mean.fxn = NULL, ... @@ -62,7 +62,7 @@ FoldChange(object, ...) slot = "data", reduction = NULL, features = NULL, - pseudocount.use = 0.1, + pseudocount.use = 1, mean.fxn = NULL, base = 2, fc.name = NULL, From ea2eedea0fc8b435ee175345aa443a977061d1a1 Mon Sep 17 00:00:00 2001 From: zskylarli Date: Wed, 18 Oct 2023 17:48:37 -0400 Subject: [PATCH 917/979] fixed TransferData arg handling in MapQuery --- R/integration.R | 18 +++++------------- R/zzz.R | 3 ++- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/R/integration.R b/R/integration.R index 3e13d7a07..33c9945fb 100644 --- a/R/integration.R +++ b/R/integration.R @@ -2286,19 +2286,11 @@ MapQuery <- function( integrateembeddings.args$weight.reduction <- integrateembeddings.args$weight.reduction %||% anchor.reduction slot(object = query, name = "tools")$TransferData <- NULL reuse.weights.matrix <- FALSE - query <- exec( - .fn = TransferData, - .args = c(list( - anchorset = anchorset, - reference = reference, - query = query, - refdata = refdata, - store.weights = TRUE, - only.weights = is.null(x = refdata), - verbose = verbose - ), transferdata.args - ) - ) + td.allarguments <- c(list(anchorset = anchorset, + reference = reference, query = query, refdata = refdata, + store.weights = TRUE, only.weights = is.null(x = refdata), + verbose = verbose), transferdata.args) + query <- exec("TransferData",!!!td.allarguments) if (inherits(x = transferdata.args$weight.reduction , "character") && transferdata.args$weight.reduction == integrateembeddings.args$weight.reduction) { reuse.weights.matrix <- TRUE diff --git a/R/zzz.R b/R/zzz.R index eb28f0aa9..0063b2fa0 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -2,7 +2,8 @@ #' @importFrom methods slot slot<- #' @importFrom lifecycle deprecated deprecate_soft deprecate_stop #' deprecate_warn is_present -#' @importFrom rlang abort +#' @importFrom rlang !!! +#' abort #' arg_match #' arg_match0 #' as_name From f87fcfd783675c0cb71e4d43bc7e6288ad9be205 Mon Sep 17 00:00:00 2001 From: zskylarli Date: Wed, 18 Oct 2023 18:09:41 -0400 Subject: [PATCH 918/979] changed arg handling for other wrapped functions --- R/integration.R | 67 ++++++++++++++++++++++--------------------------- 1 file changed, 30 insertions(+), 37 deletions(-) diff --git a/R/integration.R b/R/integration.R index 33c9945fb..69bebbcd2 100644 --- a/R/integration.R +++ b/R/integration.R @@ -2296,18 +2296,16 @@ MapQuery <- function( reuse.weights.matrix <- TRUE } if (anchor.reduction != "cca") { - query <- exec( - .fn = IntegrateEmbeddings, - .args = c(list( - anchorset = anchorset, - reference = reference, - query = query, - new.reduction.name = new.reduction.name, - reuse.weights.matrix = reuse.weights.matrix, - verbose = verbose + ie.allarguments <- c(list( + anchorset = anchorset, + reference = reference, + query = query, + new.reduction.name = new.reduction.name, + reuse.weights.matrix = reuse.weights.matrix, + verbose = verbose ), integrateembeddings.args - ) ) + query <- exec("IntegrateEmbeddings",!!!ie.allarguments) Misc( object = query[[new.reduction.name]], slot = 'ref.dims' @@ -2333,20 +2331,18 @@ MapQuery <- function( query.dims <- reference.dims } ref_nn.num <- Misc(object = reference[[reduction.model]], slot = "model")$n_neighbors - query <- exec( - .fn = ProjectUMAP, - .args = c(list( - query = query, - query.reduction = new.reduction.name, - query.dims = query.dims, - reference = reference, - reference.dims = reference.dims, - reference.reduction = reference.reduction, - reduction.model = reduction.model, - k.param = ref_nn.num - ), projectumap.args - ) + pu.allarguments <- c(list( + query = query, + query.reduction = new.reduction.name, + query.dims = query.dims, + reference = reference, + reference.dims = reference.dims, + reference.reduction = reference.reduction, + reduction.model = reduction.model, + k.param = ref_nn.num + ), projectumap.args ) + query <- exec("ProjectUMAP",!!!pu.allarguments) } return(query) } @@ -7826,22 +7822,19 @@ FastRPCAIntegration <- function( return(x) } ) - - anchor <- exec( - .fn = FindIntegrationAnchors, - .args = c(list( - object.list = object.list, - reference = reference, - anchor.features = anchor.features, - reduction = reduction, - normalization.method = normalization.method, - scale = scale, - k.anchor = k.anchor, - dims = dims, - verbose = verbose + fia.allarguments <- c(list( + object.list = object.list, + reference = reference, + anchor.features = anchor.features, + reduction = reduction, + normalization.method = normalization.method, + scale = scale, + k.anchor = k.anchor, + dims = dims, + verbose = verbose ), findintegrationanchors.args - ) ) + anchor <- exec("FindIntegrationAnchors",!!!fia.allarguments) object_merged <- merge(x = object.list[[1]], y = object.list[2:length(object.list)] From c4ed8a0c9664c27dd448388d26ce67bfa0d83934 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Wed, 18 Oct 2023 18:26:13 -0400 Subject: [PATCH 919/979] bracket fixes --- R/integration5.R | 18 +++++++++--------- R/preprocessing.R | 4 ++-- R/preprocessing5.R | 4 ++-- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/R/integration5.R b/R/integration5.R index 98d464c30..42b37942d 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -224,23 +224,23 @@ CCAIntegration <- function( } else { object.list <- list() for (i in seq_along(along.with = layers)) { - if (inherits(x = object[[layers[i]]], what = "IterableMatrix")) { + if (inherits(x = object[layers[i]], what = "IterableMatrix")) { warning("Converting BPCells matrix to dgCMatrix for integration ", "as on-disk CCA Integration is not currently supported", call. = FALSE, immediate. = TRUE) - counts <- as(object = object[[layers[i]]][features, ], + counts <- as(object = object[layers[i]][features, ], Class = "dgCMatrix") } else { - counts <- object[[layers[i]]][features, ] + counts <- object[layers[i]][features, ] } object.list[[i]] <- CreateSeuratObject(counts = counts) - if (inherits(x = object[[scale.layer]], what = "IterableMatrix")) { - scale.data.layer <- as.matrix(object[[scale.layer]][features, + if (inherits(x = object[scale.layer], what = "IterableMatrix")) { + scale.data.layer <- as.matrix(object[scale.layer][features, Cells(object.list[[i]])]) object.list[[i]][["RNA"]]$scale.data <- scale.data.layer } else { - object.list[[i]][["RNA"]]$scale.data <- object[[scale.layer]][features, + object.list[[i]][["RNA"]]$scale.data <- object[scale.layer][features, Cells(object.list[[i]])] } object.list[[i]][['RNA']]$counts <- NULL @@ -370,7 +370,7 @@ RPCAIntegration <- function( assay <- assay %||% 'RNA' layers <- layers %||% Layers(object = object, search = 'data') #check that there enough cells present - ncells <- sapply(X = layers, FUN = function(x) {ncell <- dim(object[[x]])[2] + ncells <- sapply(X = layers, FUN = function(x) {ncell <- dim(object[x])[2] return(ncell) }) if (min(ncells) < max(dims)) { abort(message = "At least one layer has fewer cells than dimensions specified, please lower 'dims' accordingly.") @@ -390,7 +390,7 @@ RPCAIntegration <- function( } else { object.list <- list() for (i in seq_along(along.with = layers)) { - object.list[[i]] <- suppressMessages(suppressWarnings(CreateSeuratObject(counts = object[[layers[i]]][features,]))) + object.list[[i]] <- suppressMessages(suppressWarnings(CreateSeuratObject(counts = object[layers[i]][features,]))) VariableFeatures(object = object.list[[i]]) <- features object.list[[i]] <- suppressWarnings(ScaleData(object = object.list[[i]], verbose = FALSE)) object.list[[i]] <- RunPCA(object = object.list[[i]], verbose = FALSE, npcs=max(dims)) @@ -493,7 +493,7 @@ JointPCAIntegration <- function( } else { object.list <- list() for (i in seq_along(along.with = layers)) { - object.list[[i]] <- CreateSeuratObject(counts = object[[layers[i]]][features.diet, ] ) + object.list[[i]] <- CreateSeuratObject(counts = object[layers[i]][features.diet, ] ) object.list[[i]][['RNA']]$counts <- NULL object.list[[i]][['joint.pca']] <- CreateDimReducObject( embeddings = Embeddings(object = orig)[Cells(object.list[[i]]),], diff --git a/R/preprocessing.R b/R/preprocessing.R index 37410d805..9a2875da6 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -3817,7 +3817,7 @@ FindVariableFeatures.Assay <- function( verbose = verbose, ... ) - object[names(x = hvf.info)] <- hvf.info + object[[names(x = hvf.info)]] <- hvf.info hvf.info <- hvf.info[which(x = hvf.info[, 1, drop = TRUE] != 0), ] if (selection.method == "vst") { hvf.info <- hvf.info[order(hvf.info$vst.variance.standardized, decreasing = TRUE), , drop = FALSE] @@ -3848,7 +3848,7 @@ FindVariableFeatures.Assay <- function( no = 'mvp' ) vf.name <- paste0(vf.name, '.variable') - object[vf.name] <- rownames(x = object[]) %in% top.features + object[[vf.name]] <- rownames(x = object[[]]) %in% top.features return(object) } diff --git a/R/preprocessing5.R b/R/preprocessing5.R index a93aa2a12..d7bae2d12 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -159,8 +159,8 @@ FindVariableFeatures.StdAssay <- function( sep = '_' ) rownames(x = hvf.info) <- Features(x = object, layer = layer[i]) - object[names(x = hvf.info)] <- NULL - object[names(x = hvf.info)] <- hvf.info + object[[names(x = hvf.info)]] <- NULL + object[[names(x = hvf.info)]] <- hvf.info } return(object) } From c4d056ed5e6d2d24875e1283af41d5dd9dcee94b Mon Sep 17 00:00:00 2001 From: gesmira <59940281+Gesmira@users.noreply.github.com> Date: Wed, 18 Oct 2023 18:30:18 -0400 Subject: [PATCH 920/979] bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2c302e11e..fbec5a94f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9079 -Date: 2023-10-17 +Version: 4.9.9.9080 +Date: 2023-10-18 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From 00cee77d3b158b06c10908a2877f64dbad19491f Mon Sep 17 00:00:00 2001 From: gesmira <59940281+Gesmira@users.noreply.github.com> Date: Wed, 18 Oct 2023 19:00:03 -0400 Subject: [PATCH 921/979] add authors :) --- DESCRIPTION | 3 +++ 1 file changed, 3 insertions(+) diff --git a/DESCRIPTION b/DESCRIPTION index fbec5a94f..075781daf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -8,12 +8,15 @@ Authors@R: c( person(given = "Saket", family = "Choudhary", email = "schoudhary@nygenome.org", role = "ctb", comment = c(ORCID = "0000-0001-5202-7633")), person(given = "Charlotte", family = "Darby", email = "cdarby@nygenome.org", role = "ctb", comment = c(ORCID = "0000-0003-2195-5300")), person(given = "Jeff", family = "Farrell", email = "jfarrell@g.harvard.edu", role = "ctb"), + person(given = "Isabella", family = "Grabski", email = "igrabski@nygenome.org", role = "ctb", comment = c(ORCID = "0000-0002-0616-5469")), person(given = "Christoph", family = "Hafemeister", email = "chafemeister@nygenome.org", role = "ctb", comment = c(ORCID = "0000-0001-6365-8254")), person(given = "Yuhan", family = "Hao", email = "yhao@nygenome.org", role = "ctb", comment = c(ORCID = "0000-0002-1810-0822")), person(given = "Austin", family = "Hartman", email = "ahartman@nygenome.org", role = "ctb", comment = c(ORCID = "0000-0001-7278-1852")), person(given = "Paul", family = "Hoffman", email = "seurat@nygenome.org", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-7693-8957")), person(given = "Jaison", family = "Jain", email = "jjain@nygenome.org", role = "ctb", comment = c(ORCID = "0000-0002-9478-5018")), + person(given = "Longda", family = "Jiang", email = "ljiang@nygenome.org", role = "ctb", comment = c(ORCID = "0000-0003-4964-6497")), person(given = "Madeline", family = "Kowalski", email = "mkowalski@nygenome.org", role = "ctb", comment = c(ORCID = "0000-0002-5655-7620")), + person(given = "Skylar", family = "Li", email = "sli@nygenome.org", role = "ctb"), person(given = "Gesmira", family = "Molla", email = 'gmolla@nygenome.org', role = 'ctb', comment = c(ORCID = '0000-0002-8628-5056')), person(given = "Efthymia", family = "Papalexi", email = "epapalexi@nygenome.org", role = "ctb", comment = c(ORCID = "0000-0001-5898-694X")), person(given = "Patrick", family = "Roelli", email = "proelli@nygenome.org", role = "ctb"), From 5531dd359777b4b58b9eb7e56ef62a296ad0548b Mon Sep 17 00:00:00 2001 From: Gesmira Date: Wed, 18 Oct 2023 19:24:40 -0400 Subject: [PATCH 922/979] double bracker --- R/utilities.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index 980a57a5f..3f0c04457 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -1333,7 +1333,7 @@ PercentageFeatureSet <- function( layer <- layers[i] features.layer <- features %||% grep( pattern = pattern, - x = rownames(x = object[[assay]][[layer]]), + x = rownames(x = object[[assay]][layer]), value = TRUE) layer.data <- LayerData(object = object, assay = assay, @@ -1476,7 +1476,7 @@ PseudobulkExpression.StdAssay <- function( features.assay <- Reduce( f = intersect, x = c(list(features.to.avg), - lapply(X = layers.set, FUN = function(l) rownames(object[[l]])) + lapply(X = layers.set, FUN = function(l) rownames(object[l])) ) ) if (length(x = features.assay) == 0) { From e4e3d06fec290d69a9e056793723928361b5cb64 Mon Sep 17 00:00:00 2001 From: gesmira <59940281+Gesmira@users.noreply.github.com> Date: Wed, 18 Oct 2023 20:01:51 -0400 Subject: [PATCH 923/979] parentheses :tired_face: --- R/utilities.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/utilities.R b/R/utilities.R index 3ffaf994f..cc78cd37f 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -452,6 +452,7 @@ AverageExpression <- function( verbose = verbose, ... ) + ) } #' Match the case of character vectors From 3c1b026507607119a6229d092f2f54e38257a1d2 Mon Sep 17 00:00:00 2001 From: gesmira <59940281+Gesmira@users.noreply.github.com> Date: Wed, 18 Oct 2023 20:06:05 -0400 Subject: [PATCH 924/979] Update visualization.R --- R/visualization.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/visualization.R b/R/visualization.R index 72af56ba0..a13ae5854 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -8170,7 +8170,7 @@ SingleDimPlot <- function( raster <- raster %||% (nrow(x = data) > 1e5) pt.size <- pt.size %||% AutoPointSize(data = data, raster = raster) - if (!is.null(x = cells.highlight) && pt.size == AutoPointSize(data = data, raster = raster) && sizes.highlight != pt.size && raster = TRUE) { + if (!is.null(x = cells.highlight) && pt.size == AutoPointSize(data = data, raster = raster) && sizes.highlight != pt.size && isTRUE(x = raster)) { warning("When `raster = TRUE` highlighted and non-highlighted cells must be the same size. Plot will use the value provided to 'sizes.highlight'.") } From c1b958b50b361c2b1d86adf7c1650251854f0701 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Thu, 19 Oct 2023 11:52:03 -0400 Subject: [PATCH 925/979] Fix generics for VariableFeatures.SCTAssay and .SCTModel --- R/objects.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/objects.R b/R/objects.R index 7e3229d9d..d35bdc834 100644 --- a/R/objects.R +++ b/R/objects.R @@ -1937,7 +1937,7 @@ SCTResults.Seurat <- function(object, assay = "SCT", slot, model = NULL, ...) { #' @method VariableFeatures SCTModel #' @export #' -VariableFeatures.SCTModel <- function(object, selection.method = NULL, nfeatures = 3000, ...) { +VariableFeatures.SCTModel <- function(object, method = NULL, nfeatures = 3000, ...) { if (!is_scalar_integerish(x = nfeatures) || (!is_na(x = nfeatures < 1L) && nfeatures < 1L)) { abort(message = "'nfeatures' must be a single positive integer") } @@ -1957,7 +1957,7 @@ VariableFeatures.SCTModel <- function(object, selection.method = NULL, nfeatures #' VariableFeatures.SCTAssay <- function( object, - selection.method = NULL, + method = NULL, layer = NULL, nfeatures = NULL, simplify = TRUE, From 6d685cbcf2e818d3a2cb3c5e7402196a6b901e10 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Thu, 19 Oct 2023 11:52:23 -0400 Subject: [PATCH 926/979] Documentation updates --- man/reexports.Rd | 4 ++-- src/RcppExports.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/man/reexports.Rd b/man/reexports.Rd index fa7258d42..08ced67a3 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -71,8 +71,8 @@ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ - \item{generics}{\code{\link[generics]{components}}} + \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}} - \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}} + \item{generics}{\code{\link[generics]{components}}} }} diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 540e5c2d8..7a3302c6b 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -402,7 +402,7 @@ BEGIN_RCPP END_RCPP } -RcppExport SEXP isnull(void *); +RcppExport SEXP isnull(SEXP); static const R_CallMethodDef CallEntries[] = { {"_Seurat_RunModularityClusteringCpp", (DL_FUNC) &_Seurat_RunModularityClusteringCpp, 9}, From 0259360e1bb7fc6d8e6893d5e901fd966a3a91a8 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Thu, 19 Oct 2023 11:52:33 -0400 Subject: [PATCH 927/979] bump version --- DESCRIPTION | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4dae2ff36..cc34c3d79 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9081 -Date: 2023-10-18 +Version: 4.9.9.9082 +Date: 2023-10-19 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( @@ -80,7 +80,7 @@ Imports: Rtsne, scales, scattermore (>= 1.2), - sctransform (>= 0.4.0), + sctransform (>= 0.4.1), shiny, spatstat.explore, spatstat.geom, From 889c26cbf1f0f2a71dd83de76ce5dd09e65d577f Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Thu, 19 Oct 2023 14:03:03 -0400 Subject: [PATCH 928/979] Import rlang --- R/objects.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/objects.R b/R/objects.R index d35bdc834..857b62f42 100644 --- a/R/objects.R +++ b/R/objects.R @@ -3128,6 +3128,7 @@ UpdateKey <- function(key) { # # @return \code{object} with the latest slot definitions # +#' @importFrom rlang exec !!! UpdateSlots <- function(object) { object.list <- sapply( X = slotNames(x = object), @@ -3144,7 +3145,7 @@ UpdateSlots <- function(object) { ) object.list <- Filter(f = Negate(f = is.null), x = object.list) object.list <- c('Class' = class(x = object)[1], object.list) - object <- rlang::exec( + object <- exec( .fn = new, !!! object.list ) From b5c092476c8a9e70eb876b3d591b893813fa9c33 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Thu, 19 Oct 2023 14:03:42 -0400 Subject: [PATCH 929/979] bump version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index cc34c3d79..4dc79ebdd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 4.9.9.9082 +Version: 4.9.9.9083 Date: 2023-10-19 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. From 6e50e53f68de7794d1846b3eeaacc89af88f057a Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Thu, 19 Oct 2023 14:06:01 -0400 Subject: [PATCH 930/979] Update NAMESPACE --- NAMESPACE | 1 + 1 file changed, 1 insertion(+) diff --git a/NAMESPACE b/NAMESPACE index 193fc35e4..dc36d9b12 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -790,6 +790,7 @@ importFrom(purrr,imap) importFrom(reticulate,import) importFrom(reticulate,py_module_available) importFrom(reticulate,py_set_seed) +importFrom(rlang,"!!!") importFrom(rlang,"!!") importFrom(rlang,abort) importFrom(rlang,arg_match) From 975eba4bc0617f2570d80e5b2e9a854a5b50701b Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Thu, 19 Oct 2023 14:06:16 -0400 Subject: [PATCH 931/979] bump version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4dc79ebdd..ed1545d7f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 4.9.9.9083 +Version: 4.9.9.9084 Date: 2023-10-19 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. From d913f53638f21eb8a22999e88c42766244f91ed4 Mon Sep 17 00:00:00 2001 From: Longda Date: Thu, 19 Oct 2023 15:25:56 -0400 Subject: [PATCH 932/979] update the tests in test_diff_expression --- tests/testthat/test_differential_expression.R | 96 +++++++++---------- 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/tests/testthat/test_differential_expression.R b/tests/testthat/test_differential_expression.R index 5899ef574..f7b26b020 100644 --- a/tests/testthat/test_differential_expression.R +++ b/tests/testthat/test_differential_expression.R @@ -26,24 +26,24 @@ test_that("Default settings work as expected with pseudocount = 1", { expect_error(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = "test")) expect_equal(colnames(x = markers.0), c("p_val", "avg_logFC", "pct.1", "pct.2", "p_val_adj")) expect_equal(markers.0[1, "p_val"], 9.572778e-13, tolerance = 1e-18) - expect_equal(markers.0[1, "avg_logFC"], -4.034691, tolerance = 1e-6) + expect_equal(markers.0[1, "avg_logFC"], -4.180029, tolerance = 1e-6) expect_equal(markers.0[1, "pct.1"], 0.083) expect_equal(markers.0[1, "pct.2"], 0.909) expect_equal(markers.0[1, "p_val_adj"], 2.201739e-10, tolerance = 1e-15) - expect_equal(nrow(x = markers.0), 227) + expect_equal(nrow(x = markers.0), 228) expect_equal(rownames(markers.0)[1], "HLA-DPB1") expect_equal(colnames(x = markers.0.limma), c("p_val", "avg_logFC", "pct.1", "pct.2", "p_val_adj")) expect_equal(markers.0.limma[1, "p_val"], 9.572778e-13, tolerance = 1e-18) - expect_equal(markers.0.limma[1, "avg_logFC"], -4.034691, tolerance = 1e-6) + expect_equal(markers.0.limma[1, "avg_logFC"], -4.180029, tolerance = 1e-6) expect_equal(markers.0.limma[1, "pct.1"], 0.083) expect_equal(markers.0.limma[1, "pct.2"], 0.909) expect_equal(markers.0.limma[1, "p_val_adj"], 2.201739e-10, tolerance = 1e-15) - expect_equal(nrow(x = markers.0.limma), 227) + expect_equal(nrow(x = markers.0.limma), 228) expect_equal(rownames(markers.0.limma)[1], "HLA-DPB1") expect_equal(markers.01[1, "p_val"], 1.702818e-11, tolerance = 1e-16) - expect_equal(markers.01[1, "avg_logFC"], -2.539289, tolerance = 1e-6) + expect_equal(markers.01[1, "avg_logFC"], -2.638242, tolerance = 1e-6) expect_equal(markers.01[1, "pct.1"], 0.111) expect_equal(markers.01[1, "pct.2"], 1.00) expect_equal(markers.01[1, "p_val_adj"], 3.916481e-09, tolerance = 1e-14) @@ -51,7 +51,7 @@ test_that("Default settings work as expected with pseudocount = 1", { expect_equal(rownames(x = markers.01)[1], "TYMP") expect_equal(markers.01.limma[1, "p_val"], 1.702818e-11, tolerance = 1e-16) - expect_equal(markers.01.limma[1, "avg_logFC"], -2.539289, tolerance = 1e-6) + expect_equal(markers.01.limma[1, "avg_logFC"], -2.638242, tolerance = 1e-6) expect_equal(markers.01.limma[1, "pct.1"], 0.111) expect_equal(markers.01.limma[1, "pct.2"], 1.00) expect_equal(markers.01.limma[1, "p_val_adj"], 3.916481e-09, tolerance = 1e-14) @@ -60,36 +60,36 @@ test_that("Default settings work as expected with pseudocount = 1", { # CLR normalization expect_equal(results.clr[1, "p_val"], 1.209462e-11, tolerance = 1e-16) - expect_equal(results.clr[1, "avg_logFC"], -0.8290693, tolerance = 1e-6) + expect_equal(results.clr[1, "avg_logFC"], -2.946633, tolerance = 1e-6) expect_equal(results.clr[1, "pct.1"], 0.111) expect_equal(results.clr[1, "pct.2"], 0.96) expect_equal(results.clr[1, "p_val_adj"], 2.781762e-09, tolerance = 1e-14) - expect_equal(nrow(x = results.clr), 167) + expect_equal(nrow(x = results.clr), 213) expect_equal(rownames(x = results.clr)[1], "S100A8") expect_equal(results.clr.limma[1, "p_val"], 1.209462e-11, tolerance = 1e-16) - expect_equal(results.clr.limma[1, "avg_logFC"], -0.8290693, tolerance = 1e-6) + expect_equal(results.clr.limma[1, "avg_logFC"], -2.946633, tolerance = 1e-6) expect_equal(results.clr.limma[1, "pct.1"], 0.111) expect_equal(results.clr.limma[1, "pct.2"], 0.96) expect_equal(results.clr.limma[1, "p_val_adj"], 2.781762e-09, tolerance = 1e-14) - expect_equal(nrow(x = results.clr.limma), 167) + expect_equal(nrow(x = results.clr.limma), 213) expect_equal(rownames(x = results.clr.limma)[1], "S100A8") # SCT normalization expect_equal(results.sct[1, "p_val"], 6.225491e-11, tolerance = 1e-16) - expect_equal(results.sct[1, "avg_logFC"], -1.081321, tolerance = 1e-6) + expect_equal(results.sct[1, "avg_logFC"], -2.545867, tolerance = 1e-6) expect_equal(results.sct[1, "pct.1"], 0.111) expect_equal(results.sct[1, "pct.2"], 0.96) expect_equal(results.sct[1, "p_val_adj"], 1.369608e-08, tolerance = 1e-13) - expect_equal(nrow(x = results.sct), 195) + expect_equal(nrow(x = results.sct), 214) expect_equal(rownames(x = results.sct)[1], "TYMP") expect_equal(results.sct.limma[1, "p_val"], 6.225491e-11, tolerance = 1e-16) - expect_equal(results.sct.limma[1, "avg_logFC"], -1.081321, tolerance = 1e-6) + expect_equal(results.sct.limma[1, "avg_logFC"], -2.545867, tolerance = 1e-6) expect_equal(results.sct.limma[1, "pct.1"], 0.111) expect_equal(results.sct.limma[1, "pct.2"], 0.96) expect_equal(results.sct.limma[1, "p_val_adj"], 1.369608e-08, tolerance = 1e-13) - expect_equal(nrow(x = results.sct.limma), 195) + expect_equal(nrow(x = results.sct.limma), 214) expect_equal(rownames(x = results.sct.limma)[1], "TYMP") }) @@ -100,7 +100,7 @@ vargenes.results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = test_that("features parameter behaves correctly ", { expect_equal(nrow(x = tymp.results), 1) expect_equal(tymp.results[1, "p_val"], 3.227445e-07, tolerance = 1e-12) - expect_equal(tymp.results[1, "avg_logFC"], -2.093928, tolerance = 1e-6) + expect_equal(tymp.results[1, "avg_logFC"], -2.188179, tolerance = 1e-6) expect_equal(tymp.results[1, "pct.1"], 0.111) expect_equal(tymp.results[1, "pct.2"], 0.682) expect_equal(tymp.results[1, "p_val_adj"], 7.423123e-05, tolerance = 1e-10) @@ -108,7 +108,7 @@ test_that("features parameter behaves correctly ", { expect_equal(nrow(x = vargenes.results), 20) expect_equal(vargenes.results[20, "p_val"], 4.225151e-01, tolerance = 1e-6) - expect_equal(vargenes.results[20, "avg_logFC"], 1.5976958, tolerance = 1e-6) + expect_equal(vargenes.results[20, "avg_logFC"], 1.796863, tolerance = 1e-6) expect_equal(vargenes.results[20, "pct.1"], 0.139) expect_equal(vargenes.results[20, "pct.2"], 0.091) expect_equal(vargenes.results[20, "p_val_adj"], 1.000000e+00) @@ -120,7 +120,7 @@ results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = Cells(x = test_that("passing cell names works", { expect_equal(nrow(x = results), 216) expect_equal(results[1, "p_val"], 0.0001690882) - expect_equal(results[1, "avg_logFC"], -1.790824, tolerance = 1e-6) + expect_equal(results[1, "avg_logFC"], -1.967123, tolerance = 1e-6) expect_equal(results[1, "pct.1"], 0.075) expect_equal(results[1, "pct.2"], 0.450) expect_equal(results[1, "p_val_adj"], 0.03889028) @@ -132,11 +132,11 @@ results.clr <- suppressWarnings(FindMarkers(object = clr.obj, ident.1 = 0, ident results.sct <- suppressWarnings(FindMarkers(object = sct.obj, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1), pseudocount.use = 0.1, vst.flavor = "v1")) test_that("setting pseudocount.use works", { expect_equal(nrow(x = results), 222) - expect_equal(results[1, "avg_logFC"], -2.630395, tolerance = 1e-6) - expect_equal(nrow(x = results.clr), 212) - expect_equal(results.clr[1, "avg_logFC"], -2.317338, tolerance = 1e-6) - expect_equal(nrow(results.sct), 214) - expect_equal(results.sct[1, "avg_logFC"], -2.25392, tolerance = 1e-6) + expect_equal(results[1, "avg_logFC"], -2.640848, tolerance = 1e-6) + expect_equal(nrow(x = results.clr), 214) + expect_equal(results.clr[1, "avg_logFC"], -3.322368, tolerance = 1e-6) + expect_equal(nrow(results.sct), 215) + expect_equal(results.sct[1, "avg_logFC"], -2.668866, tolerance = 1e-6) }) results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1), pseudocount.use = 1, mean.fxn = rowMeans)) @@ -151,7 +151,7 @@ test_that("setting mean.fxn works", { results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, logfc.threshold = 2, verbose = FALSE, base = exp(1), pseudocount.use = 1)) test_that("logfc.threshold works", { - expect_equal(nrow(x = results), 118) + expect_equal(nrow(x = results), 139) expect_gte(min(abs(x = results$avg_logFC)), 2) }) @@ -192,7 +192,7 @@ results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident. test_that("max.cells.per.ident works", { expect_equal(nrow(x = results), 222) expect_equal(results[1, "p_val"], 3.428568e-08, tolerance = 1e-13) - expect_equal(results[1, "avg_logFC"], -2.539289, tolerance = 1e-6) + expect_equal(results[1, "avg_logFC"], -2.638242, tolerance = 1e-6) expect_equal(results[1, "pct.1"], 0.111) expect_equal(results[1, "pct.2"], 1) expect_equal(results[1, "p_val_adj"], 7.885706e-06) @@ -205,7 +205,7 @@ test_that("latent.vars works", { expect_warning(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, latent.vars= "groups", verbose = FALSE)) expect_equal(nrow(x = results), 222) expect_equal(results[1, "p_val"], 2.130202e-16, tolerance = 1e-21) - expect_equal(results[1, "avg_logFC"], -3.082150, tolerance = 1e-6) + expect_equal(results[1, "avg_logFC"], -3.102866, tolerance = 1e-6) expect_equal(results[1, "pct.1"], 0.417) expect_equal(results[1, "pct.2"], 1) expect_equal(results[1, "p_val_adj"], 4.899466e-14, tolerance = 1e-19) @@ -218,10 +218,10 @@ Idents(object = t2) <- "groups" results2 <- suppressWarnings(FindMarkers(object = t2, ident.1 = "g1", ident.2 = "g2", verbose = FALSE, base = exp(1), pseudocount.use = 1)) test_that("group.by works", { - expect_equal(nrow(x = results), 188) + expect_equal(nrow(x = results), 190) expect_equal(results, results2) expect_equal(results[1, "p_val"], 0.02870319) - expect_equal(results[1, "avg_logFC"], 0.8226720, tolerance = 1e-6) + expect_equal(results[1, "avg_logFC"], 0.8473584, tolerance = 1e-6) expect_equal(results[1, "pct.1"], 0.455) expect_equal(results[1, "pct.2"], 0.194) expect_equal(results[1, "p_val_adj"], 1) @@ -234,10 +234,10 @@ Idents(object = t2) <- "groups" results2 <- suppressWarnings(FindMarkers(object = t2, ident.1 = "g1", ident.2 = "g2", verbose = FALSE, base = exp(1), pseudocount.use = 1)) test_that("subset.ident works", { - expect_equal(nrow(x = results), 182) + expect_equal(nrow(x = results), 183) expect_equal(results, results2) expect_equal(results[1, "p_val"], 0.01293720) - expect_equal(results[1, "avg_logFC"], 1.799280, tolerance = 1e-6) + expect_equal(results[1, "avg_logFC"], 1.912603, tolerance = 1e-6) expect_equal(results[1, "pct.1"], 0.50) expect_equal(results[1, "pct.2"], 0.125) expect_equal(results[1, "p_val_adj"], 1) @@ -256,7 +256,7 @@ results <- FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, test.use = test_that("bimod test works", { expect_equal(nrow(x = results), 222) expect_equal(results[1, "p_val"], 4.751376e-17, tolerance = 1e-22) - expect_equal(results[1, "avg_logFC"], -2.552769, tolerance = 1e-6) + expect_equal(results[1, "avg_logFC"], -2.57219, tolerance = 1e-6) expect_equal(results[1, "pct.1"], 0.306) expect_equal(results[1, "pct.2"], 1.00) expect_equal(results[1, "p_val_adj"], 1.092816e-14, tolerance = 1e-19) @@ -280,7 +280,7 @@ results <- FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, test.use = test_that("t test works", { expect_equal(nrow(x = results), 222) expect_equal(results["CST3", "p_val"], 1.170112e-15, tolerance = 1e-20) - expect_equal(results["CST3", "avg_logFC"], -2.552769 , tolerance = 1e-6) + expect_equal(results["CST3", "avg_logFC"], -2.57219, tolerance = 1e-6) expect_equal(results["CST3", "pct.1"], 0.306) expect_equal(results["CST3", "pct.2"], 1.00) expect_equal(results["CST3", "p_val_adj"], 2.691258e-13, tolerance = 1e-18) @@ -289,9 +289,9 @@ test_that("t test works", { results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, test.use = "negbinom", verbose = FALSE, base = exp(1), pseudocount.use = 1)) test_that("negbinom test works", { - expect_equal(nrow(x = results), 188) + expect_equal(nrow(x = results), 204) expect_equal(results["CST3", "p_val"], 1.354443e-17, tolerance = 1e-22) - expect_equal(results["CST3", "avg_logFC"], -2.353701, tolerance = 1e-6) + expect_equal(results["CST3", "avg_logFC"], -2.878123, tolerance = 1e-6) expect_equal(results["CST3", "pct.1"], 0.306) expect_equal(results["CST3", "pct.2"], 1.00) expect_equal(results["CST3", "p_val_adj"], 3.115218e-15, tolerance = 1e-20) @@ -300,9 +300,9 @@ test_that("negbinom test works", { results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, test.use = "poisson", verbose = FALSE, base = exp(1), pseudocount.use = 1)) test_that("poisson test works", { - expect_equal(nrow(x = results), 188) + expect_equal(nrow(x = results), 204) expect_equal(results["CST3", "p_val"], 3.792196e-78, tolerance = 1e-83) - expect_equal(results["CST3", "avg_logFC"], -2.353701, tolerance = 1e-6) + expect_equal(results["CST3", "avg_logFC"], -2.878123, tolerance = 1e-6) expect_equal(results["CST3", "pct.1"], 0.306) expect_equal(results["CST3", "pct.2"], 1.00) expect_equal(results["CST3", "p_val_adj"], 8.722050e-76, tolerance = 1e-81) @@ -313,7 +313,7 @@ results <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident. test_that("LR test works", { expect_equal(nrow(x = results), 222) expect_equal(results["CST3", "p_val"], 3.990707e-16, tolerance = 1e-21) - expect_equal(results["CST3", "avg_logFC"], -2.552769, tolerance = 1e-6) + expect_equal(results["CST3", "avg_logFC"], -2.57219, tolerance = 1e-6) expect_equal(results["CST3", "pct.1"], 0.306) expect_equal(results["CST3", "pct.2"], 1.00) expect_equal(results["CST3", "p_val_adj"], 9.178625e-14, tolerance = 1e-19) @@ -331,11 +331,11 @@ test_that("BPCells FindMarkers gives same results", { markers.bp <- suppressWarnings(FindMarkers(object = pbmc_small, assay = "RNAbp", ident.1 = 0, verbose = FALSE, base = exp(1),pseudocount.use = 1)) expect_equal(colnames(x = markers.bp), c("p_val", "avg_logFC", "pct.1", "pct.2", "p_val_adj")) expect_equal(markers.bp[1, "p_val"], 9.572778e-13) - expect_equal(markers.bp[1, "avg_logFC"], -4.034691, tolerance = 1e-6) + expect_equal(markers.bp[1, "avg_logFC"], -4.180029, tolerance = 1e-6) expect_equal(markers.bp[1, "pct.1"], 0.083) expect_equal(markers.bp[1, "pct.2"], 0.909) expect_equal(markers.bp[1, "p_val_adj"], 2.201739e-10) - expect_equal(nrow(x = markers.bp), 227) + expect_equal(nrow(x = markers.bp), 228) expect_equal(rownames(markers.bp)[1], "HLA-DPB1") }) @@ -349,7 +349,7 @@ results.pseudo <- suppressMessages(suppressWarnings(FindAllMarkers(object = pbmc test_that("FindAllMarkers works as expected", { expect_equal(colnames(x = results), c("p_val", "avg_log2FC", "pct.1", "pct.2", "p_val_adj", "cluster", "gene")) expect_equal(results[1, "p_val"], 9.572778e-13, tolerance = 1e-18) - expect_equal(results[1, "avg_log2FC"], -5.820829, tolerance = 1e-6) + expect_equal(results[1, "avg_log2FC"], -6.030507, tolerance = 1e-6) expect_equal(results[1, "pct.1"], 0.083) expect_equal(results[1, "pct.2"], 0.909) expect_equal(results[1, "p_val_adj"], 2.201739e-10, tolerance = 1e-15) @@ -358,7 +358,7 @@ test_that("FindAllMarkers works as expected", { # CLR normalization expect_equal(results.clr[1, "p_val"], 1.338858e-12, tolerance = 1e-17) - expect_equal(results.clr[1, "avg_log2FC"], -1.079924, tolerance = 1e-6) + expect_equal(results.clr[1, "avg_log2FC"], -4.088546, tolerance = 1e-6) expect_equal(results.clr[1, "pct.1"], 0.083) expect_equal(results.clr[1, "pct.2"], 0.909) expect_equal(results.clr[1, "p_val_adj"], 3.079373e-10, tolerance = 1e-15) @@ -367,7 +367,7 @@ test_that("FindAllMarkers works as expected", { # SCT normalization expect_equal(results.sct[1, "p_val"], 4.25861e-12, tolerance = 1e-17) - expect_equal(results.sct[1, "avg_log2FC"], -2.70188, tolerance = 1e-6) + expect_equal(results.sct[1, "avg_log2FC"], -5.088014, tolerance = 1e-6) expect_equal(results.sct[1, "pct.1"], 0.167) expect_equal(results.sct[1, "pct.2"], 0.909) expect_equal(results.sct[1, "p_val_adj"], 9.368941e-10, tolerance = 1e-15) @@ -376,7 +376,7 @@ test_that("FindAllMarkers works as expected", { # pseudocount.use = 0.1 expect_equal(results.pseudo[1, "p_val"], 9.572778e-13, tolerance = 1e-18) - expect_equal(results.pseudo[1, "avg_log2FC"], -6.013818, tolerance = 1e-6) + expect_equal(results.pseudo[1, "avg_log2FC"], -6.036353, tolerance = 1e-6) expect_equal(results.pseudo[1, "pct.1"], 0.083) expect_equal(results.pseudo[1, "pct.2"], 0.909) expect_equal(results.pseudo[1, "p_val_adj"], 2.201739e-10, tolerance = 1e-15) @@ -429,7 +429,7 @@ test_that("BPCells FindAllMarkers gives same results", { expect_equal(colnames(x = results.bp), c("p_val", "avg_log2FC", "pct.1", "pct.2", "p_val_adj", "cluster", "gene")) expect_equal(results.bp[1, "p_val"], 9.572778e-13) - expect_equal(results.bp[1, "avg_log2FC"], -5.820829, tolerance = 1e-6) + expect_equal(results.bp[1, "avg_log2FC"], -6.030507, tolerance = 1e-6) expect_equal(results.bp[1, "pct.1"], 0.083) expect_equal(results.bp[1, "pct.2"], 0.909) expect_equal(results.bp[1, "p_val_adj"], 2.201739e-10) @@ -453,18 +453,18 @@ if (requireNamespace('metap', quietly = TRUE)) { test_that("FindConservedMarkers works", { expect_equal(colnames(x = markers), c(paste0("g2_", standard.names), paste0("g1_", standard.names), "max_pval", "minimump_p_val")) expect_equal(markers[1, "g2_p_val"], 4.983576e-05) - expect_equal(markers[1, "g2_avg_logFC"], -4.125279, tolerance = 1e-6) + expect_equal(markers[1, "g2_avg_logFC"], -4.364959, tolerance = 1e-6) # expect_equal(markers[1, "g2_pct.1"], 0.062) expect_equal(markers[1, "g2_pct.2"], 0.75) expect_equal(markers[1, "g2_p_val_adj"], 0.0114622238) expect_equal(markers[1, "g1_p_val"], 3.946643e-08, tolerance = 1e-13) - expect_equal(markers[1, "g1_avg_logFC"], -3.589384, tolerance = 1e-6) + expect_equal(markers[1, "g1_avg_logFC"], -3.69215, tolerance = 1e-6) expect_equal(markers[1, "g1_pct.1"], 0.10) expect_equal(markers[1, "g1_pct.2"], 0.958) expect_equal(markers[1, "g1_p_val_adj"], 9.077279e-06) expect_equal(markers[1, "max_pval"], 4.983576e-05) expect_equal(markers[1, "minimump_p_val"], 7.893286e-08, tolerance = 1e-13) - expect_equal(nrow(markers), 217) + expect_equal(nrow(markers), 219) expect_equal(rownames(markers)[1], "HLA-DRB1") expect_equal(markers[, "max_pval"], unname(obj = apply(X = markers, MARGIN = 1, FUN = function(x) max(x[c("g1_p_val", "g2_p_val")])))) }) @@ -485,11 +485,11 @@ if (requireNamespace('metap', quietly = TRUE)) { expect_warning(FindConservedMarkers(object = pbmc.test, ident.1 = 0, grouping.var = "groups", test.use = "t")) expect_equal(colnames(x = markers.missing), paste0("g2_", standard.names)) expect_equal(markers.missing[1, "g2_p_val"], 1.672911e-13, tolerance = 1e-18) - expect_equal(markers.missing[1, "g2_avg_logFC"], -4.527888, tolerance = 1e-6) + expect_equal(markers.missing[1, "g2_avg_logFC"], -4.796379, tolerance = 1e-6) # expect_equal(markers.missing[1, "g2_pct.1"], 0.062) expect_equal(markers.missing[1, "g2_pct.2"], 0.95) expect_equal(markers.missing[1, "g2_p_val_adj"], 3.847695e-11, tolerance = 1e-16) - expect_equal(nrow(markers.missing), 225) + expect_equal(nrow(markers.missing), 226) expect_equal(rownames(markers.missing)[1], "HLA-DPB1") }) } From 53f72b48ceb856653380a9c94a0ab686760b56a9 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Thu, 19 Oct 2023 15:49:46 -0400 Subject: [PATCH 933/979] Update tests for fold change --- tests/testthat/test_differential_expression.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test_differential_expression.R b/tests/testthat/test_differential_expression.R index f7b26b020..a1f40d12c 100644 --- a/tests/testthat/test_differential_expression.R +++ b/tests/testthat/test_differential_expression.R @@ -413,7 +413,7 @@ object <- suppressMessages(FindClusters(object, verbose = FALSE)) markers <- FindMarkers(object = object, ident.1="0", ident.2="1",pseudocount.use = 1, verbose=FALSE) test_that("FindMarkers recognizes log normalization", { expect_equal(markers[1, "p_val"], 1.598053e-14, tolerance = 1e-19) - expect_equal(markers[1, "avg_log2FC"], -2.614686, tolerance = 1e-6) + expect_equal(markers[1, "avg_log2FC"], -2.634458, tolerance = 1e-6) }) From 6a7c97bd0e27a9374b8bb6d009bfd3477972a586 Mon Sep 17 00:00:00 2001 From: Izzy Grabski Date: Thu, 19 Oct 2023 16:00:59 -0400 Subject: [PATCH 934/979] fixing ranking of results in FindMarkers --- R/differential_expression.R | 2 +- src/RcppExports.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/differential_expression.R b/R/differential_expression.R index 735ed0241..6f4cfcf9e 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -610,7 +610,7 @@ FindMarkers.default <- function( if (test.use %in% DEmethods_nocorrect()) { de.results <- de.results[order(-de.results$power, -de.results[, 1]), ] } else { - de.results <- de.results[order(de.results$p_val, -de.results[, 1]), ] + de.results <- de.results[order(de.results$p_val, -abs(de.results[,colnames(fc.results)[1]])), ] de.results$p_val_adj = p.adjust( p = de.results$p_val, method = "bonferroni", diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 7a3302c6b..540e5c2d8 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -402,7 +402,7 @@ BEGIN_RCPP END_RCPP } -RcppExport SEXP isnull(SEXP); +RcppExport SEXP isnull(void *); static const R_CallMethodDef CallEntries[] = { {"_Seurat_RunModularityClusteringCpp", (DL_FUNC) &_Seurat_RunModularityClusteringCpp, 9}, From ca3d8bf515c69d3d2bf1df274d428c8d4089a068 Mon Sep 17 00:00:00 2001 From: Izzy Grabski Date: Thu, 19 Oct 2023 16:02:35 -0400 Subject: [PATCH 935/979] bumped version number --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index ed1545d7f..1351b91cc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 4.9.9.9084 +Version: 4.9.9.9085 Date: 2023-10-19 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. From 533d4c83d4936b16d94027cfe8ffd3b52fb1bf37 Mon Sep 17 00:00:00 2001 From: Izzy Grabski Date: Thu, 19 Oct 2023 16:04:32 -0400 Subject: [PATCH 936/979] undoing change to RcppExports --- src/RcppExports.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 540e5c2d8..7a3302c6b 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -402,7 +402,7 @@ BEGIN_RCPP END_RCPP } -RcppExport SEXP isnull(void *); +RcppExport SEXP isnull(SEXP); static const R_CallMethodDef CallEntries[] = { {"_Seurat_RunModularityClusteringCpp", (DL_FUNC) &_Seurat_RunModularityClusteringCpp, 9}, From d0839d2358b81d1693e9c7dc2261a717c414e372 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Thu, 19 Oct 2023 16:07:57 -0400 Subject: [PATCH 937/979] Update version --- DESCRIPTION | 2 +- man/reexports.Rd | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index ed1545d7f..1351b91cc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 4.9.9.9084 +Version: 4.9.9.9085 Date: 2023-10-19 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. diff --git a/man/reexports.Rd b/man/reexports.Rd index 08ced67a3..fa7258d42 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -71,8 +71,8 @@ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ - \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}} - \item{generics}{\code{\link[generics]{components}}} + + \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}} }} From b07a40f9738565ac275dd9efeadfa82362f33c36 Mon Sep 17 00:00:00 2001 From: Longda Date: Thu, 19 Oct 2023 18:29:07 -0400 Subject: [PATCH 938/979] change TopDEGenesMixscape back to minpct=0.1 --- R/mixscape.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/mixscape.R b/R/mixscape.R index 69edf3e87..885c0a3f2 100644 --- a/R/mixscape.R +++ b/R/mixscape.R @@ -1320,7 +1320,8 @@ TopDEGenesMixscape <- function( assay = de.assay, test.use = test.use, logfc.threshold = logfc.threshold, - verbose = verbose + verbose = verbose, + min.pct = 0.1 ) de.genes <- de.genes[de.genes$p_val_adj < pval.cutoff, ] }, From fcbf16f66c3162e247f22ab2689dca053acd853c Mon Sep 17 00:00:00 2001 From: Izzy Grabski Date: Thu, 19 Oct 2023 18:41:13 -0400 Subject: [PATCH 939/979] clarify documentation on DE methods --- R/differential_expression.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/differential_expression.R b/R/differential_expression.R index 6f4cfcf9e..c1b1fd4bd 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -421,9 +421,11 @@ FindConservedMarkers <- function( #' @param test.use Denotes which test to use. Available options are: #' \itemize{ #' \item{"wilcox"} : Identifies differentially expressed genes between two -#' groups of cells using a Wilcoxon Rank Sum test (default) +#' groups of cells using a Wilcoxon Rank Sum test (default); will use a fast +#' implementation by Presto if installed #' \item{"wilcox_limma"} : Identifies differentially expressed genes between two -#' groups of cells using the limma implementation of the Wilcoxon Rank Sum test +#' groups of cells using the limma implementation of the Wilcoxon Rank Sum test; +#' set this option to reproduce results from Seurat v4 #' \item{"bimod"} : Likelihood-ratio test for single cell gene expression, #' (McDavid et al., Bioinformatics, 2013) #' \item{"roc"} : Identifies 'markers' of gene expression using ROC analysis. From f88f3e322f00fe150183b4f11cf92ede8d07768f Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Thu, 19 Oct 2023 18:54:37 -0400 Subject: [PATCH 940/979] bump version --- DESCRIPTION | 2 +- src/RcppExports.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1351b91cc..01e3a9d79 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 4.9.9.9085 +Version: 4.9.9.9086 Date: 2023-10-19 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 7a3302c6b..540e5c2d8 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -402,7 +402,7 @@ BEGIN_RCPP END_RCPP } -RcppExport SEXP isnull(SEXP); +RcppExport SEXP isnull(void *); static const R_CallMethodDef CallEntries[] = { {"_Seurat_RunModularityClusteringCpp", (DL_FUNC) &_Seurat_RunModularityClusteringCpp, 9}, From 85345a2c340dce84b9127e4e4330ba6bc998589c Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Fri, 20 Oct 2023 15:57:33 -0400 Subject: [PATCH 941/979] AverageExpression docs minor fix --- R/utilities.R | 4 ++-- man/AggregateExpression.Rd | 2 +- man/AverageExpression.Rd | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index 7379c47f6..6e5579c5b 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -326,7 +326,7 @@ AddModuleScore <- function( #' Returns summed counts ("pseudobulk") for each identity class. #' #' If \code{return.seurat = TRUE}, aggregated values are placed in the 'counts' -#' slot of the returned object. The data is then normalized by running \code{\link{NormalizeData}} +#' layer of the returned object. The data is then normalized by running \code{\link{NormalizeData}} #' on the aggregated counts. \code{\link{ScaleData}} is then run on the default assay #' before returning the object. #' @@ -399,7 +399,7 @@ AggregateExpression <- function( #' are placed in the 'counts' layer of the returned object and 'log1p' #' is run on the averaged counts and placed in the 'data' layer \code{\link{ScaleData}} #' is then run on the default assay before returning the object. -#' If \code{return.seurat = TRUE} and slot is 'scale.data', the 'counts' layer contains +#' If \code{return.seurat = TRUE} and layer is 'scale.data', the 'counts' layer contains #' average counts and 'scale.data' is set to the averaged values of 'scale.data'. #' #' @param object Seurat object diff --git a/man/AggregateExpression.Rd b/man/AggregateExpression.Rd index ddd728459..71983b4de 100644 --- a/man/AggregateExpression.Rd +++ b/man/AggregateExpression.Rd @@ -51,7 +51,7 @@ Returns summed counts ("pseudobulk") for each identity class. } \details{ If \code{return.seurat = TRUE}, aggregated values are placed in the 'counts' -slot of the returned object. The data is then normalized by running \code{\link{NormalizeData}} +layer of the returned object. The data is then normalized by running \code{\link{NormalizeData}} on the aggregated counts. \code{\link{ScaleData}} is then run on the default assay before returning the object. } diff --git a/man/AverageExpression.Rd b/man/AverageExpression.Rd index 531cd7ac5..a40b3fe13 100644 --- a/man/AverageExpression.Rd +++ b/man/AverageExpression.Rd @@ -56,7 +56,7 @@ If \code{return.seurat = TRUE} and layer is not 'scale.data', averaged values are placed in the 'counts' layer of the returned object and 'log1p' is run on the averaged counts and placed in the 'data' layer \code{\link{ScaleData}} is then run on the default assay before returning the object. -If \code{return.seurat = TRUE} and slot is 'scale.data', the 'counts' layer contains +If \code{return.seurat = TRUE} and layer is 'scale.data', the 'counts' layer contains average counts and 'scale.data' is set to the averaged values of 'scale.data'. } \examples{ From f5b3820bf2640ecc2742a531cbba6c920de1eb93 Mon Sep 17 00:00:00 2001 From: Longda Date: Fri, 20 Oct 2023 17:06:00 -0400 Subject: [PATCH 942/979] Update mixscape.R There are 2 bugs in the DEenrichRPlot() function that I forgot to fix (https://github.com/satijalab/seurat-private/pull/780). It should not affect any test or result in the vignettes. It should be fine to merge. --- R/mixscape.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/mixscape.R b/R/mixscape.R index 885c0a3f2..1d10c240c 100644 --- a/R/mixscape.R +++ b/R/mixscape.R @@ -209,8 +209,9 @@ DEenrichRPlot <- function( } if (isTRUE(x = balanced)) { - neg.markers <- all.markers[all.markers[, 2] < logfc.threshold & all.markers[, 1] < p.val.cutoff, , drop = FALSE] + neg.markers <- all.markers[all.markers[, 2] < -logfc.threshold & all.markers[, 1] < p.val.cutoff, , drop = FALSE] neg.markers.list <- rownames(x = neg.markers)[1:min(max.genes, nrow(x = neg.markers))] + Sys.sleep(1) neg.er <- enrichR::enrichr(genes = neg.markers.list, databases = enrich.database) neg.er <- do.call(what = cbind, args = neg.er) neg.er$log10pval <- -log10(x = neg.er[, paste(enrich.database, sep = ".", "P.value")]) From 146e458e27b14b43980b8f2c9405408421a215a4 Mon Sep 17 00:00:00 2001 From: Longda Date: Fri, 20 Oct 2023 17:13:31 -0400 Subject: [PATCH 943/979] Update visualization.R Fix a bug in ImageFeaturePlot() that I forgot to merge (https://github.com/satijalab/seurat-private/pull/781). --- R/visualization.R | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/R/visualization.R b/R/visualization.R index be6f82447..2d1175688 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -2858,9 +2858,18 @@ ImageFeaturePlot <- function( names(x = pdata) <- pnames for (i in names(x = pdata)) { ul <- unlist(x = strsplit(x = i, split = '_')) - img <- paste(ul[1:length(ul)-1], collapse = '_') + # img <- paste(ul[1:length(ul)-1], collapse = '_') # Apply overlap - lyr <- ul[length(ul)] + # lyr <- ul[length(ul)] + if(length(ul) > 1) { + img <- paste(ul[1:length(ul)-1], collapse = '_') + lyr <- ul[length(ul)] + } else if (length(ul) == 1) { + img <- ul[1] + lyr <- "centroids" + } else { + stop("the length of ul is 0. please check.") + } if (is.na(x = lyr)) { lyr <- boundaries[[img]] } From 71d8296daecdb02dee016a942d6fed57ee20f8d8 Mon Sep 17 00:00:00 2001 From: Longda Date: Fri, 20 Oct 2023 18:46:02 -0400 Subject: [PATCH 944/979] update ImageDimPlot() --- R/visualization.R | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/R/visualization.R b/R/visualization.R index 2d1175688..d82d0a079 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -2398,7 +2398,8 @@ ImageDimPlot <- function( overlap = FALSE, axes = FALSE, combine = TRUE, - coord.fixed = TRUE + coord.fixed = TRUE, + flip_xy = TRUE ) { cells <- cells %||% Cells(x = object) # Determine FOV to use @@ -2563,6 +2564,10 @@ ImageDimPlot <- function( if (isTRUE(coord.fixed)) { p <- p + coord_fixed() } + if(!isTRUE(flip_xy) && isTRUE(coord.fixed)){ + xy_ratio = (max(pdata[[i]]$x) - min(pdata[[i]]$x)) / (max(pdata[[i]]$y) - min(pdata[[i]]$y)) + p = p + coord_flip() + theme(aspect.ratio = 1/xy_ratio) + } plots[[idx]] <- p idx <- idx + 1L } @@ -2862,8 +2867,8 @@ ImageFeaturePlot <- function( # Apply overlap # lyr <- ul[length(ul)] if(length(ul) > 1) { - img <- paste(ul[1:length(ul)-1], collapse = '_') - lyr <- ul[length(ul)] + img <- paste(ul[1:length(ul)-1], collapse = '_') + lyr <- ul[length(ul)] } else if (length(ul) == 1) { img <- ul[1] lyr <- "centroids" From 8adc9dd28ff0c6650df628e2200d3548f8f32674 Mon Sep 17 00:00:00 2001 From: Gesmira Date: Fri, 20 Oct 2023 20:26:41 -0400 Subject: [PATCH 945/979] fix variable features for the 1000000000th time --- R/preprocessing5.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index d7bae2d12..5911c1885 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -159,6 +159,8 @@ FindVariableFeatures.StdAssay <- function( sep = '_' ) rownames(x = hvf.info) <- Features(x = object, layer = layer[i]) + object[["var.features"]] <- NULL + object[["var.features.rank"]] <- NULL object[[names(x = hvf.info)]] <- NULL object[[names(x = hvf.info)]] <- hvf.info } From 634c4cb80e75f7b6b46bc2690c9f79868b71e212 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Sat, 21 Oct 2023 09:31:01 -0400 Subject: [PATCH 946/979] Documentation updates --- man/DEenrichRPlot.Rd | 6 ++++-- man/FindAllMarkers.Rd | 6 ++++-- man/FindMarkers.Rd | 6 ++++-- man/ImageDimPlot.Rd | 3 ++- man/MixscapeHeatmap.Rd | 6 ++++-- 5 files changed, 18 insertions(+), 9 deletions(-) diff --git a/man/DEenrichRPlot.Rd b/man/DEenrichRPlot.Rd index 0f833ee02..031a2a922 100644 --- a/man/DEenrichRPlot.Rd +++ b/man/DEenrichRPlot.Rd @@ -42,9 +42,11 @@ Increasing logfc.threshold speeds up the function, but can miss weaker signals.} \item{test.use}{Denotes which test to use. Available options are: \itemize{ \item{"wilcox"} : Identifies differentially expressed genes between two - groups of cells using a Wilcoxon Rank Sum test (default) + groups of cells using a Wilcoxon Rank Sum test (default); will use a fast + implementation by Presto if installed \item{"wilcox_limma"} : Identifies differentially expressed genes between two - groups of cells using the limma implementation of the Wilcoxon Rank Sum test + groups of cells using the limma implementation of the Wilcoxon Rank Sum test; + set this option to reproduce results from Seurat v4 \item{"bimod"} : Likelihood-ratio test for single cell gene expression, (McDavid et al., Bioinformatics, 2013) \item{"roc"} : Identifies 'markers' of gene expression using ROC analysis. diff --git a/man/FindAllMarkers.Rd b/man/FindAllMarkers.Rd index 3fa616abb..7505aa3e8 100644 --- a/man/FindAllMarkers.Rd +++ b/man/FindAllMarkers.Rd @@ -44,9 +44,11 @@ Increasing logfc.threshold speeds up the function, but can miss weaker signals.} \item{test.use}{Denotes which test to use. Available options are: \itemize{ \item{"wilcox"} : Identifies differentially expressed genes between two - groups of cells using a Wilcoxon Rank Sum test (default) + groups of cells using a Wilcoxon Rank Sum test (default); will use a fast + implementation by Presto if installed \item{"wilcox_limma"} : Identifies differentially expressed genes between two - groups of cells using the limma implementation of the Wilcoxon Rank Sum test + groups of cells using the limma implementation of the Wilcoxon Rank Sum test; + set this option to reproduce results from Seurat v4 \item{"bimod"} : Likelihood-ratio test for single cell gene expression, (McDavid et al., Bioinformatics, 2013) \item{"roc"} : Identifies 'markers' of gene expression using ROC analysis. diff --git a/man/FindMarkers.Rd b/man/FindMarkers.Rd index 9f3c1fac1..94665bdc1 100644 --- a/man/FindMarkers.Rd +++ b/man/FindMarkers.Rd @@ -165,9 +165,11 @@ Increasing logfc.threshold speeds up the function, but can miss weaker signals.} \item{test.use}{Denotes which test to use. Available options are: \itemize{ \item{"wilcox"} : Identifies differentially expressed genes between two - groups of cells using a Wilcoxon Rank Sum test (default) + groups of cells using a Wilcoxon Rank Sum test (default); will use a fast + implementation by Presto if installed \item{"wilcox_limma"} : Identifies differentially expressed genes between two - groups of cells using the limma implementation of the Wilcoxon Rank Sum test + groups of cells using the limma implementation of the Wilcoxon Rank Sum test; + set this option to reproduce results from Seurat v4 \item{"bimod"} : Likelihood-ratio test for single cell gene expression, (McDavid et al., Bioinformatics, 2013) \item{"roc"} : Identifies 'markers' of gene expression using ROC analysis. diff --git a/man/ImageDimPlot.Rd b/man/ImageDimPlot.Rd index 412ab0efb..652e38a00 100644 --- a/man/ImageDimPlot.Rd +++ b/man/ImageDimPlot.Rd @@ -28,7 +28,8 @@ ImageDimPlot( overlap = FALSE, axes = FALSE, combine = TRUE, - coord.fixed = TRUE + coord.fixed = TRUE, + flip_xy = TRUE ) } \arguments{ diff --git a/man/MixscapeHeatmap.Rd b/man/MixscapeHeatmap.Rd index a3662b707..5c98d74c0 100644 --- a/man/MixscapeHeatmap.Rd +++ b/man/MixscapeHeatmap.Rd @@ -47,9 +47,11 @@ Increasing logfc.threshold speeds up the function, but can miss weaker signals.} \item{test.use}{Denotes which test to use. Available options are: \itemize{ \item{"wilcox"} : Identifies differentially expressed genes between two - groups of cells using a Wilcoxon Rank Sum test (default) + groups of cells using a Wilcoxon Rank Sum test (default); will use a fast + implementation by Presto if installed \item{"wilcox_limma"} : Identifies differentially expressed genes between two - groups of cells using the limma implementation of the Wilcoxon Rank Sum test + groups of cells using the limma implementation of the Wilcoxon Rank Sum test; + set this option to reproduce results from Seurat v4 \item{"bimod"} : Likelihood-ratio test for single cell gene expression, (McDavid et al., Bioinformatics, 2013) \item{"roc"} : Identifies 'markers' of gene expression using ROC analysis. From 2a2724d6256b79e0939ee5e369feb247f4a81620 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Sat, 21 Oct 2023 09:35:56 -0400 Subject: [PATCH 947/979] bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 01e3a9d79..ffba2c070 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9086 -Date: 2023-10-19 +Version: 4.9.9.9087 +Date: 2023-10-21 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From 9067e433473bffd73309cbc3aebffb370e7b1ba4 Mon Sep 17 00:00:00 2001 From: rsatija Date: Sat, 21 Oct 2023 12:30:32 -0400 Subject: [PATCH 948/979] Initial commit but needs an mvp fix --- R/integration.R | 2 ++ R/preprocessing5.R | 1 + 2 files changed, 3 insertions(+) diff --git a/R/integration.R b/R/integration.R index 6d664b362..2ca5d964a 100644 --- a/R/integration.R +++ b/R/integration.R @@ -5231,6 +5231,7 @@ if (normalization.method == 'SCT') { feature.sd <- sqrt(x = RowVarSparse(mat = as.sparse(reference.data))) } feature.sd[is.na(x = feature.sd)] <- 1 + feature.sd[feature.sd==0] <- 1 } else { feature.sd <- rep(x = 1, nrow(x = reference.data)) } @@ -5328,6 +5329,7 @@ ProjectCellEmbeddings.IterableMatrix <- function( ) } feature.sd[is.na(x = feature.sd)] <- 1 + feature.sd[feature.sd==0] <- 1 } else { feature.sd <- rep(x = 1, nrow(x = reference.data)) } diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 5911c1885..2ecc3407f 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -164,6 +164,7 @@ FindVariableFeatures.StdAssay <- function( object[[names(x = hvf.info)]] <- NULL object[[names(x = hvf.info)]] <- hvf.info } + VariableFeatures(object) <- VariableFeatures(object, nfeatures=nfeatures,method = selection.method) return(object) } From d0875e907aa3978b07e3b86a51f6e013d8ad5d82 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Sat, 21 Oct 2023 19:26:08 -0400 Subject: [PATCH 949/979] Update integration to handle multiple layers in FindTransferAnchors --- R/integration.R | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/R/integration.R b/R/integration.R index 2ca5d964a..dd0a30a5a 100644 --- a/R/integration.R +++ b/R/integration.R @@ -961,10 +961,25 @@ FindTransferAnchors <- function( key = "ProjectPC_", assay = reference.assay ) - combined.ob <- suppressWarnings(expr = merge( - x = DietSeurat(object = reference, counts = FALSE), - y = DietSeurat(object = query, counts = FALSE), - )) + # combined.ob <- suppressWarnings(expr = merge( + # x = DietSeurat(object = reference, counts = FALSE), + # y = DietSeurat(object = query, counts = FALSE), + # )) + ref.diet <- DietSeurat(object = reference, counts = FALSE) + query.diet <- DietSeurat(object = query, counts = FALSE) + + counts.list <- list(reference = LayerData(ref.diet[[reference.assay]], layer = "data")) + query.data.list <- list() + for (i in Layers(object = query.diet[[reference.assay]], search = "data")) { + data.layer.name <- gsub(pattern = "data.", replacement = "", x = i) + counts.list[[data.layer.name]] <- LayerData(object = query[[reference.assay]], layer = i) + } + combined.ob <- CreateSeuratObject(counts = counts.list, assay = reference.assay) + for (i in Layers(object = combined.ob[[reference.assay]], search = "counts")){ + data.layer.name <- gsub(pattern = "counts.", replacement = "data.", x = i) # replace counts. to data. + layer.data <- LayerData(object = combined.ob, layer = i) + LayerData(object = combined.ob, layer = data.layer.name) <- layer.data # set layer data + } combined.ob[["pcaproject"]] <- combined.pca colnames(x = orig.loadings) <- paste0("ProjectPC_", 1:ncol(x = orig.loadings)) Loadings(object = combined.ob[["pcaproject"]]) <- orig.loadings[, dims] @@ -2290,9 +2305,9 @@ MapQuery <- function( integrateembeddings.args$weight.reduction <- integrateembeddings.args$weight.reduction %||% anchor.reduction slot(object = query, name = "tools")$TransferData <- NULL reuse.weights.matrix <- FALSE - td.allarguments <- c(list(anchorset = anchorset, - reference = reference, query = query, refdata = refdata, - store.weights = TRUE, only.weights = is.null(x = refdata), + td.allarguments <- c(list(anchorset = anchorset, + reference = reference, query = query, refdata = refdata, + store.weights = TRUE, only.weights = is.null(x = refdata), verbose = verbose), transferdata.args) query <- exec("TransferData",!!!td.allarguments) if (inherits(x = transferdata.args$weight.reduction , "character") && @@ -6107,7 +6122,7 @@ ValidateParams_TransferData <- function( query.cells, reference, query, - query.assay, + query.assay, refdata, weight.reduction, l2.norm, From def596a7d5b6c5f412b2d07631ff07e43d9ccc05 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Sat, 21 Oct 2023 19:26:59 -0400 Subject: [PATCH 950/979] bump version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 01e3a9d79..c6b141049 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 4.9.9.9086 +Version: 4.9.9.9087 Date: 2023-10-19 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. From 0b85c4fc0abc2b752505c12dd5bb76f6b7722be7 Mon Sep 17 00:00:00 2001 From: rsatija Date: Sat, 21 Oct 2023 20:29:44 -0400 Subject: [PATCH 951/979] Key fix --- R/preprocessing5.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index 2ecc3407f..bcbc5127e 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -164,7 +164,7 @@ FindVariableFeatures.StdAssay <- function( object[[names(x = hvf.info)]] <- NULL object[[names(x = hvf.info)]] <- hvf.info } - VariableFeatures(object) <- VariableFeatures(object, nfeatures=nfeatures,method = selection.method) + VariableFeatures(object) <- VariableFeatures(object, nfeatures=nfeatures,method = key) return(object) } From 2589575107d93f2dfe782245a0503d79657cec0c Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Sun, 22 Oct 2023 17:37:44 -0400 Subject: [PATCH 952/979] Update author status --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index ffba2c070..b157f82fc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -12,7 +12,7 @@ Authors@R: c( person(given = "Christoph", family = "Hafemeister", email = "chafemeister@nygenome.org", role = "ctb", comment = c(ORCID = "0000-0001-6365-8254")), person(given = "Yuhan", family = "Hao", email = "yhao@nygenome.org", role = "ctb", comment = c(ORCID = "0000-0002-1810-0822")), person(given = "Austin", family = "Hartman", email = "ahartman@nygenome.org", role = "ctb", comment = c(ORCID = "0000-0001-7278-1852")), - person(given = "Paul", family = "Hoffman", email = "seurat@nygenome.org", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-7693-8957")), + person(given = "Paul", family = "Hoffman", email = "hoff0792@umn.edu", role = "ctb", comment = c(ORCID = "0000-0002-7693-8957")), person(given = "Jaison", family = "Jain", email = "jjain@nygenome.org", role = "ctb", comment = c(ORCID = "0000-0002-9478-5018")), person(given = "Longda", family = "Jiang", email = "ljiang@nygenome.org", role = "ctb", comment = c(ORCID = "0000-0003-4964-6497")), person(given = "Madeline", family = "Kowalski", email = "mkowalski@nygenome.org", role = "ctb", comment = c(ORCID = "0000-0002-5655-7620")), @@ -20,7 +20,7 @@ Authors@R: c( person(given = "Gesmira", family = "Molla", email = 'gmolla@nygenome.org', role = 'ctb', comment = c(ORCID = '0000-0002-8628-5056')), person(given = "Efthymia", family = "Papalexi", email = "epapalexi@nygenome.org", role = "ctb", comment = c(ORCID = "0000-0001-5898-694X")), person(given = "Patrick", family = "Roelli", email = "proelli@nygenome.org", role = "ctb"), - person(given = "Rahul", family = "Satija", email = "rsatija@nygenome.org", role = "ctb", comment = c(ORCID = "0000-0001-9448-8833")), + person(given = "Rahul", family = "Satija", email = "seurat@nygenome.org", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-9448-8833")), person(given = "Karthik", family = "Shekhar", email = "kshekhar@berkeley.edu", role = "ctb"), person(given = "Avi", family = "Srivastava", email = "asrivastava@nygenome.org", role = "ctb", comment = c(ORCID = "0000-0001-9798-2079")), person(given = "Tim", family = "Stuart", email = "tstuart@nygenome.org", role = "ctb", comment = c(ORCID = "0000-0002-3044-0897")), From 72fc81efab411297d7cf3bea488eac469a8a250e Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Sun, 22 Oct 2023 17:38:16 -0400 Subject: [PATCH 953/979] Import CheckFMargin from SeuratObject --- R/preprocessing5.R | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index bcbc5127e..e9c7920c5 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -240,6 +240,7 @@ FindSpatiallyVariableFeatures.StdAssay <- function( #' @method LogNormalize default #' #' @param margin Margin to normalize over +#' @importFrom SeuratObject .CheckFmargin #' #' @export #' @@ -250,7 +251,7 @@ LogNormalize.default <- function( verbose = TRUE, ... ) { - margin <- SeuratObject:::.CheckFmargin(fmargin = margin) + margin <- .CheckFmargin(fmargin = margin) ncells <- dim(x = data)[margin] if (isTRUE(x = verbose)) { pb <- txtProgressBar(file = stderr(), style = 3) @@ -551,7 +552,7 @@ VST.IterableMatrix <- function( ... ) { nfeatures <- nrow(x = data) - hvf.info <- SeuratObject::EmptyDF(n = nfeatures) + hvf.info <- EmptyDF(n = nfeatures) hvf.stats <- BPCells::matrix_stats( matrix = data, row_stats = 'variance')$row_stats @@ -591,6 +592,7 @@ VST.IterableMatrix <- function( } #' @importFrom Matrix rowMeans +#' @importFrom SeuratObject EmptyDF #' #' @rdname VST #' @method VST dgCMatrix @@ -606,7 +608,7 @@ VST.dgCMatrix <- function( ... ) { nfeatures <- nrow(x = data) - hvf.info <- SeuratObject::EmptyDF(n = nfeatures) + hvf.info <- EmptyDF(n = nfeatures) # Calculate feature means hvf.info$mean <- Matrix::rowMeans(x = data) # Calculate feature variance @@ -776,6 +778,8 @@ DISP <- function( return(hvf.info) } +#' @importFrom SeuratObject .CheckFmargin +#' .FeatureVar <- function( data, mu, @@ -785,7 +789,7 @@ DISP <- function( clip = NULL, verbose = TRUE ) { - fmargin <- SeuratObject:::.CheckFmargin(fmargin = fmargin) + fmargin <- .CheckFmargin(fmargin = fmargin) ncells <- dim(x = data)[-fmargin] nfeatures <- dim(x = data)[fmargin] fvars <- vector(mode = 'numeric', length = nfeatures) @@ -886,6 +890,7 @@ DISP <- function( #' @param verbose Show progress updates #' #' @keywords internal +#' @importFrom SeuratObject .CheckFmargin #' #' @noRd #' @@ -898,7 +903,7 @@ DISP <- function( clip = NULL, verbose = TRUE ) { - fmargin <- SeuratObject:::.CheckFmargin(fmargin = fmargin) + fmargin <- .CheckFmargin(fmargin = fmargin) if (fmargin != .MARGIN(object = data)) { data <- t(x = data) fmargin <- .MARGIN(object = data) @@ -959,8 +964,9 @@ DISP <- function( return(fvars) } +#' @importFrom SeuratObject .CheckFmargin .SparseMean <- function(data, margin = 1L) { - margin <- SeuratObject:::.CheckFmargin(fmargin = margin) + margin <- .CheckFmargin(fmargin = margin) if (margin != .MARGIN(object = data)) { data <- t(x = data) margin <- .MARGIN(object = data) @@ -989,6 +995,7 @@ DISP <- function( #' root of the number of cells #' #' @importFrom Matrix rowMeans +#' @importFrom SeuratObject .CheckFmargin #' #' @keywords internal #' @@ -1003,7 +1010,7 @@ DISP <- function( verbose = TRUE, ... ) { - fmargin <- SeuratObject:::.CheckFmargin(fmargin = fmargin) + fmargin <- .CheckFmargin(fmargin = fmargin) nfeatures <- dim(x = data)[fmargin] # TODO: Support transposed matrices # nfeatures <- nrow(x = data) From f6965f1eb0ab5d8c1b4313ab7688770433b15c77 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Sun, 22 Oct 2023 17:38:42 -0400 Subject: [PATCH 954/979] Import EmptyDf from SeuratObject --- R/sketching.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/sketching.R b/R/sketching.R index 81238d8db..0eedae3e5 100644 --- a/R/sketching.R +++ b/R/sketching.R @@ -478,6 +478,8 @@ LeverageScore.default <- function( #' @param seed A positive integer. The seed for the random number generator, defaults to 123. #' @param verbose Print progress and diagnostic messages #' +#' @importFrom SeuratObject EmptyDF +#' #' @rdname LeverageScore #' @method LeverageScore StdAssay #' @@ -500,7 +502,7 @@ LeverageScore.StdAssay <- function( if (!is_quosure(x = method)) { method <- enquo(arg = method) } - scores <- SeuratObject::EmptyDF(n = ncol(x = object)) + scores <- EmptyDF(n = ncol(x = object)) row.names(x = scores) <- colnames(x = object) scores[, 1] <- NA_real_ for (i in seq_along(along.with = layer)) { From 25463df28d40d3f4462179da67954d48621b1223 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Sun, 22 Oct 2023 17:38:58 -0400 Subject: [PATCH 955/979] Add documentation for flip_xy --- NAMESPACE | 1 + R/visualization.R | 1 + man/ImageDimPlot.Rd | 2 ++ man/Seurat-package.Rd | 4 ++-- man/reexports.Rd | 4 ++-- src/RcppExports.cpp | 2 +- 6 files changed, 9 insertions(+), 5 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index dc36d9b12..000515261 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -521,6 +521,7 @@ importFrom(SeuratObject,"Project<-") importFrom(SeuratObject,"Tool<-") importFrom(SeuratObject,"VariableFeatures<-") importFrom(SeuratObject,.CalcN) +importFrom(SeuratObject,.CheckFmargin) importFrom(SeuratObject,.FilterObjects) importFrom(SeuratObject,.IsFutureSeurat) importFrom(SeuratObject,.MARGIN) diff --git a/R/visualization.R b/R/visualization.R index d82d0a079..aacbaf8b8 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -2362,6 +2362,7 @@ PolyFeaturePlot <- function( #' \code{patchwork} ggplot object.If \code{FALSE}, #' return a list of ggplot objects #' @param coord.fixed Plot cartesian coordinates with fixed aspect ratio +#' @param flip_xy Flag to flip X and Y axes. Default is FALSE. #' #' @return If \code{combine = TRUE}, a \code{patchwork} #' ggplot object; otherwise, a list of ggplot objects diff --git a/man/ImageDimPlot.Rd b/man/ImageDimPlot.Rd index 652e38a00..3667c3ef4 100644 --- a/man/ImageDimPlot.Rd +++ b/man/ImageDimPlot.Rd @@ -96,6 +96,8 @@ given (first is lowest)} return a list of ggplot objects} \item{coord.fixed}{Plot cartesian coordinates with fixed aspect ratio} + +\item{flip_xy}{Flag to flip X and Y axes. Default is FALSE.} } \value{ If \code{combine = TRUE}, a \code{patchwork} diff --git a/man/Seurat-package.Rd b/man/Seurat-package.Rd index c8c0ca311..282f3568b 100644 --- a/man/Seurat-package.Rd +++ b/man/Seurat-package.Rd @@ -43,7 +43,7 @@ Useful links: } \author{ -\strong{Maintainer}: Paul Hoffman \email{seurat@nygenome.org} (\href{https://orcid.org/0000-0002-7693-8957}{ORCID}) +\strong{Maintainer}: Rahul Satija \email{seurat@nygenome.org} (\href{https://orcid.org/0000-0001-9448-8833}{ORCID}) Other contributors: \itemize{ @@ -55,6 +55,7 @@ Other contributors: \item Christoph Hafemeister \email{chafemeister@nygenome.org} (\href{https://orcid.org/0000-0001-6365-8254}{ORCID}) [contributor] \item Yuhan Hao \email{yhao@nygenome.org} (\href{https://orcid.org/0000-0002-1810-0822}{ORCID}) [contributor] \item Austin Hartman \email{ahartman@nygenome.org} (\href{https://orcid.org/0000-0001-7278-1852}{ORCID}) [contributor] + \item Paul Hoffman \email{hoff0792@umn.edu} (\href{https://orcid.org/0000-0002-7693-8957}{ORCID}) [contributor] \item Jaison Jain \email{jjain@nygenome.org} (\href{https://orcid.org/0000-0002-9478-5018}{ORCID}) [contributor] \item Longda Jiang \email{ljiang@nygenome.org} (\href{https://orcid.org/0000-0003-4964-6497}{ORCID}) [contributor] \item Madeline Kowalski \email{mkowalski@nygenome.org} (\href{https://orcid.org/0000-0002-5655-7620}{ORCID}) [contributor] @@ -62,7 +63,6 @@ Other contributors: \item Gesmira Molla \email{gmolla@nygenome.org} (\href{https://orcid.org/0000-0002-8628-5056}{ORCID}) [contributor] \item Efthymia Papalexi \email{epapalexi@nygenome.org} (\href{https://orcid.org/0000-0001-5898-694X}{ORCID}) [contributor] \item Patrick Roelli \email{proelli@nygenome.org} [contributor] - \item Rahul Satija \email{rsatija@nygenome.org} (\href{https://orcid.org/0000-0001-9448-8833}{ORCID}) [contributor] \item Karthik Shekhar \email{kshekhar@berkeley.edu} [contributor] \item Avi Srivastava \email{asrivastava@nygenome.org} (\href{https://orcid.org/0000-0001-9798-2079}{ORCID}) [contributor] \item Tim Stuart \email{tstuart@nygenome.org} (\href{https://orcid.org/0000-0002-3044-0897}{ORCID}) [contributor] diff --git a/man/reexports.Rd b/man/reexports.Rd index fa7258d42..08ced67a3 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -71,8 +71,8 @@ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ - \item{generics}{\code{\link[generics]{components}}} + \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}} - \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}} + \item{generics}{\code{\link[generics]{components}}} }} diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 540e5c2d8..7a3302c6b 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -402,7 +402,7 @@ BEGIN_RCPP END_RCPP } -RcppExport SEXP isnull(void *); +RcppExport SEXP isnull(SEXP); static const R_CallMethodDef CallEntries[] = { {"_Seurat_RunModularityClusteringCpp", (DL_FUNC) &_Seurat_RunModularityClusteringCpp, 9}, From 49d51132a065becd115723ca7119a1ba86bca174 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Sun, 22 Oct 2023 17:41:29 -0400 Subject: [PATCH 956/979] bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b157f82fc..8b5231508 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9087 -Date: 2023-10-21 +Version: 4.9.9.9088 +Date: 2023-10-22 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From 8fb633c86e5a6966bc3e12eee63f7409129e75fd Mon Sep 17 00:00:00 2001 From: Izzy Grabski Date: Mon, 23 Oct 2023 10:50:20 -0400 Subject: [PATCH 957/979] Changed default Mixscape test to wilcox --- R/mixscape.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/mixscape.R b/R/mixscape.R index 1d10c240c..c8bd4566f 100644 --- a/R/mixscape.R +++ b/R/mixscape.R @@ -1302,7 +1302,7 @@ TopDEGenesMixscape <- function( ident.2 = NULL, labels = 'gene', de.assay = "RNA", - test.use = "LR", + test.use = "wilcox", pval.cutoff = 5e-2, logfc.threshold = 0.25, verbose = TRUE From b9bb23a8849972175b60b58101e4cfdc4ce90b5d Mon Sep 17 00:00:00 2001 From: Madeline Kowalski Date: Mon, 23 Oct 2023 11:04:47 -0400 Subject: [PATCH 958/979] update documentation for logfc.threshold=0.25 --- R/mixscape.R | 4 ++++ man/RunMixscape.Rd | 7 ++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/R/mixscape.R b/R/mixscape.R index c8bd4566f..2dadf2bd4 100644 --- a/R/mixscape.R +++ b/R/mixscape.R @@ -644,6 +644,10 @@ RunLDA.Seurat <- function( #' all are assigned NP. #' @param de.assay Assay to use when performing differential expression analysis. #' Usually RNA. +#' @param logfc.threshold Limit testing to genes which show, on average, +#' at least X-fold difference (log-scale) between the two groups of cells. +#' Default is 0.25 Increasing logfc.threshold speeds up the function, but can miss +#' weaker signals. #' @param iter.num Number of normalmixEM iterations to run if convergence does #' not occur. #' @param verbose Display messages diff --git a/man/RunMixscape.Rd b/man/RunMixscape.Rd index 2d2fe5d75..953d95d0c 100644 --- a/man/RunMixscape.Rd +++ b/man/RunMixscape.Rd @@ -47,9 +47,10 @@ all are assigned NP.} \item{de.assay}{Assay to use when performing differential expression analysis. Usually RNA.} -\item{logfc.threshold}{Limit testing to genes which show, on average, at least -X-fold difference (log-scale) between the two groups of cells. Default is 0.1 -Increasing logfc.threshold speeds up the function, but can miss weaker signals.} +\item{logfc.threshold}{Limit testing to genes which show, on average, +at least X-fold difference (log-scale) between the two groups of cells. +Default is 0.25 Increasing logfc.threshold speeds up the function, but can miss +weaker signals.} \item{iter.num}{Number of normalmixEM iterations to run if convergence does not occur.} From 25a8259dfaf1a3762fc1303ad8c3fdb09287042f Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Mon, 23 Oct 2023 11:43:15 -0400 Subject: [PATCH 959/979] Doc updates for RunMixScape --- R/mixscape.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/R/mixscape.R b/R/mixscape.R index 2dadf2bd4..a5ea9529d 100644 --- a/R/mixscape.R +++ b/R/mixscape.R @@ -628,7 +628,6 @@ RunLDA.Seurat <- function( #' Function to identify perturbed and non-perturbed gRNA expressing cells that #' accounts for multiple treatments/conditions/chemical perturbations. #' -#' @inheritParams FindMarkers #' @importFrom ggplot2 geom_density position_dodge #' @param object An object of class Seurat. #' @param assay Assay to use for mixscape classification. @@ -1290,9 +1289,9 @@ ProjectVec <- function(v1, v2) { # @param ident.2 Non-targetting class or cells # @param labels metadata column with target gene classification. # @param de.assay Name of Assay DE is performed on. -# @param test.use Denotes which test to use. See all available tests on +# @param test.use Denotes which test to use. See all available tests on # FindMarkers documentation. -# @param pval.cut.off P-value cut-off for selection of significantly DE genes. +# @param pval.cutoff P-value cut-off for selection of significantly DE genes. # @param logfc.threshold Limit testing to genes which show, on average, at # least X-fold difference (log-scale) between the two groups of cells. Default # is 0.25 Increasing logfc.threshold speeds up the function, but can miss From 4ce432329cc7b941506a78b2086e9f56c302da3a Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Mon, 23 Oct 2023 11:43:25 -0400 Subject: [PATCH 960/979] bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 8b5231508..e0c3745bf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.9.9.9088 -Date: 2023-10-22 +Version: 4.9.9.9089 +Date: 2023-10-23 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( From 038e98fc51369204ad4369e22337c176f1939dad Mon Sep 17 00:00:00 2001 From: zskylarli Date: Mon, 23 Oct 2023 11:55:15 -0400 Subject: [PATCH 961/979] minor fix for single bracket in FindSpatialVF --- DESCRIPTION | 2 +- R/preprocessing.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e0c3745bf..e95edff3c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 4.9.9.9089 +Version: 4.9.9.9090 Date: 2023-10-23 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. diff --git a/R/preprocessing.R b/R/preprocessing.R index 85df73633..3e0dd7d89 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -4053,7 +4053,7 @@ FindSpatiallyVariableFeatures.Assay <- function( svf.info[[var.name]] <- FALSE svf.info[[var.name]][1:(min(nrow(x = svf.info), nfeatures))] <- TRUE svf.info[[var.name.rank]] <- 1:nrow(x = svf.info) - object[names(x = svf.info)] <- svf.info + object[[names(x = svf.info)]] <- svf.info return(object) } From 82b4f4f09efe622d47abde5875d0296de8b99028 Mon Sep 17 00:00:00 2001 From: rsatija Date: Mon, 23 Oct 2023 12:39:27 -0400 Subject: [PATCH 962/979] Fixed multilayer FTA w/anchor filtering --- R/integration.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/integration.R b/R/integration.R index dd0a30a5a..a739ea7fb 100644 --- a/R/integration.R +++ b/R/integration.R @@ -980,8 +980,10 @@ FindTransferAnchors <- function( layer.data <- LayerData(object = combined.ob, layer = i) LayerData(object = combined.ob, layer = data.layer.name) <- layer.data # set layer data } - combined.ob[["pcaproject"]] <- combined.pca colnames(x = orig.loadings) <- paste0("ProjectPC_", 1:ncol(x = orig.loadings)) + + combined.ob[["pcaproject"]] <- combined.pca + Loadings(object = combined.ob[["pcaproject"]], projected = FALSE) <- orig.loadings[, dims] Loadings(object = combined.ob[["pcaproject"]]) <- orig.loadings[, dims] } # Use reciprocal PCA projection in anchor finding From b5fba3f2b52d154e0836855b1d23f4f5dcb4a5cb Mon Sep 17 00:00:00 2001 From: rsatija Date: Mon, 23 Oct 2023 13:01:06 -0400 Subject: [PATCH 963/979] Turn off anchor filtering by default --- R/integration.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/integration.R b/R/integration.R index a739ea7fb..16d759c69 100644 --- a/R/integration.R +++ b/R/integration.R @@ -761,7 +761,7 @@ FindTransferAnchors <- function( l2.norm = TRUE, dims = 1:30, k.anchor = 5, - k.filter = 200, + k.filter = NA, k.score = 30, max.features = 200, nn.method = "annoy", From 75463b000fc604f26778cb86e851ac822595a534 Mon Sep 17 00:00:00 2001 From: rsatija Date: Mon, 23 Oct 2023 13:10:58 -0400 Subject: [PATCH 964/979] Version bump --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index e95edff3c..31617efdd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 4.9.9.9090 +Version: 4.9.9.9099 Date: 2023-10-23 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. From c961387f825bba2e9ce853454b9666058963be67 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Mon, 23 Oct 2023 13:37:38 -0400 Subject: [PATCH 965/979] Skip limma tests on CRAN --- tests/testthat/test_differential_expression.R | 81 ++++++++++--------- tests/testthat/test_preprocessing.R | 3 + 2 files changed, 46 insertions(+), 38 deletions(-) diff --git a/tests/testthat/test_differential_expression.R b/tests/testthat/test_differential_expression.R index a1f40d12c..8e19a52c1 100644 --- a/tests/testthat/test_differential_expression.R +++ b/tests/testthat/test_differential_expression.R @@ -14,11 +14,6 @@ markers.01 <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ide results.clr <- suppressWarnings(FindMarkers(object = clr.obj, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1), pseudocount.use = 1)) results.sct <- suppressWarnings(FindMarkers(object = sct.obj, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1), pseudocount.use = 1)) -markers.0.limma <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, verbose = FALSE, base = exp(1),pseudocount.use = 1,test.use='wilcox_limma')) -markers.01.limma <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1),pseudocount.use = 1,test.use='wilcox_limma')) -results.clr.limma <- suppressWarnings(FindMarkers(object = clr.obj, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1), pseudocount.use = 1,test.use='wilcox_limma')) -results.sct.limma <- suppressWarnings(FindMarkers(object = sct.obj, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1), pseudocount.use = 1,test.use='wilcox_limma')) - test_that("Default settings work as expected with pseudocount = 1", { expect_error(FindMarkers(object = pbmc_small)) @@ -33,15 +28,6 @@ test_that("Default settings work as expected with pseudocount = 1", { expect_equal(nrow(x = markers.0), 228) expect_equal(rownames(markers.0)[1], "HLA-DPB1") - expect_equal(colnames(x = markers.0.limma), c("p_val", "avg_logFC", "pct.1", "pct.2", "p_val_adj")) - expect_equal(markers.0.limma[1, "p_val"], 9.572778e-13, tolerance = 1e-18) - expect_equal(markers.0.limma[1, "avg_logFC"], -4.180029, tolerance = 1e-6) - expect_equal(markers.0.limma[1, "pct.1"], 0.083) - expect_equal(markers.0.limma[1, "pct.2"], 0.909) - expect_equal(markers.0.limma[1, "p_val_adj"], 2.201739e-10, tolerance = 1e-15) - expect_equal(nrow(x = markers.0.limma), 228) - expect_equal(rownames(markers.0.limma)[1], "HLA-DPB1") - expect_equal(markers.01[1, "p_val"], 1.702818e-11, tolerance = 1e-16) expect_equal(markers.01[1, "avg_logFC"], -2.638242, tolerance = 1e-6) expect_equal(markers.01[1, "pct.1"], 0.111) @@ -50,14 +36,6 @@ test_that("Default settings work as expected with pseudocount = 1", { expect_equal(nrow(x = markers.01), 222) expect_equal(rownames(x = markers.01)[1], "TYMP") - expect_equal(markers.01.limma[1, "p_val"], 1.702818e-11, tolerance = 1e-16) - expect_equal(markers.01.limma[1, "avg_logFC"], -2.638242, tolerance = 1e-6) - expect_equal(markers.01.limma[1, "pct.1"], 0.111) - expect_equal(markers.01.limma[1, "pct.2"], 1.00) - expect_equal(markers.01.limma[1, "p_val_adj"], 3.916481e-09, tolerance = 1e-14) - expect_equal(nrow(x = markers.01.limma), 222) - expect_equal(rownames(x = markers.01.limma)[1], "TYMP") - # CLR normalization expect_equal(results.clr[1, "p_val"], 1.209462e-11, tolerance = 1e-16) expect_equal(results.clr[1, "avg_logFC"], -2.946633, tolerance = 1e-6) @@ -67,14 +45,6 @@ test_that("Default settings work as expected with pseudocount = 1", { expect_equal(nrow(x = results.clr), 213) expect_equal(rownames(x = results.clr)[1], "S100A8") - expect_equal(results.clr.limma[1, "p_val"], 1.209462e-11, tolerance = 1e-16) - expect_equal(results.clr.limma[1, "avg_logFC"], -2.946633, tolerance = 1e-6) - expect_equal(results.clr.limma[1, "pct.1"], 0.111) - expect_equal(results.clr.limma[1, "pct.2"], 0.96) - expect_equal(results.clr.limma[1, "p_val_adj"], 2.781762e-09, tolerance = 1e-14) - expect_equal(nrow(x = results.clr.limma), 213) - expect_equal(rownames(x = results.clr.limma)[1], "S100A8") - # SCT normalization expect_equal(results.sct[1, "p_val"], 6.225491e-11, tolerance = 1e-16) expect_equal(results.sct[1, "avg_logFC"], -2.545867, tolerance = 1e-6) @@ -83,14 +53,6 @@ test_that("Default settings work as expected with pseudocount = 1", { expect_equal(results.sct[1, "p_val_adj"], 1.369608e-08, tolerance = 1e-13) expect_equal(nrow(x = results.sct), 214) expect_equal(rownames(x = results.sct)[1], "TYMP") - - expect_equal(results.sct.limma[1, "p_val"], 6.225491e-11, tolerance = 1e-16) - expect_equal(results.sct.limma[1, "avg_logFC"], -2.545867, tolerance = 1e-6) - expect_equal(results.sct.limma[1, "pct.1"], 0.111) - expect_equal(results.sct.limma[1, "pct.2"], 0.96) - expect_equal(results.sct.limma[1, "p_val_adj"], 1.369608e-08, tolerance = 1e-13) - expect_equal(nrow(x = results.sct.limma), 214) - expect_equal(rownames(x = results.sct.limma)[1], "TYMP") }) @@ -320,9 +282,51 @@ test_that("LR test works", { expect_equal(rownames(x = results)[1], "LYZ") }) +test_that("FindMarkers with wilcox_limma works", { + skip_on_cran() + skip_if_not_installed("limma") + markers.0.limma <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, verbose = FALSE, base = exp(1),pseudocount.use = 1,test.use='wilcox_limma')) + markers.01.limma <- suppressWarnings(FindMarkers(object = pbmc_small, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1),pseudocount.use = 1,test.use='wilcox_limma')) + results.clr.limma <- suppressWarnings(FindMarkers(object = clr.obj, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1), pseudocount.use = 1,test.use='wilcox_limma')) + results.sct.limma <- suppressWarnings(FindMarkers(object = sct.obj, ident.1 = 0, ident.2 = 1, verbose = FALSE, base = exp(1), pseudocount.use = 1,test.use='wilcox_limma')) + + expect_equal(colnames(x = markers.0.limma), c("p_val", "avg_logFC", "pct.1", "pct.2", "p_val_adj")) + expect_equal(markers.0.limma[1, "p_val"], 9.572778e-13, tolerance = 1e-18) + expect_equal(markers.0.limma[1, "avg_logFC"], -4.180029, tolerance = 1e-6) + expect_equal(markers.0.limma[1, "pct.1"], 0.083) + expect_equal(markers.0.limma[1, "pct.2"], 0.909) + expect_equal(markers.0.limma[1, "p_val_adj"], 2.201739e-10, tolerance = 1e-15) + expect_equal(nrow(x = markers.0.limma), 228) + expect_equal(rownames(markers.0.limma)[1], "HLA-DPB1") + + expect_equal(markers.01.limma[1, "p_val"], 1.702818e-11, tolerance = 1e-16) + expect_equal(markers.01.limma[1, "avg_logFC"], -2.638242, tolerance = 1e-6) + expect_equal(markers.01.limma[1, "pct.1"], 0.111) + expect_equal(markers.01.limma[1, "pct.2"], 1.00) + expect_equal(markers.01.limma[1, "p_val_adj"], 3.916481e-09, tolerance = 1e-14) + expect_equal(nrow(x = markers.01.limma), 222) + expect_equal(rownames(x = markers.01.limma)[1], "TYMP") + + expect_equal(results.clr.limma[1, "p_val"], 1.209462e-11, tolerance = 1e-16) + expect_equal(results.clr.limma[1, "avg_logFC"], -2.946633, tolerance = 1e-6) + expect_equal(results.clr.limma[1, "pct.1"], 0.111) + expect_equal(results.clr.limma[1, "pct.2"], 0.96) + expect_equal(results.clr.limma[1, "p_val_adj"], 2.781762e-09, tolerance = 1e-14) + expect_equal(nrow(x = results.clr.limma), 213) + expect_equal(rownames(x = results.clr.limma)[1], "S100A8") + + expect_equal(results.sct.limma[1, "p_val"], 6.225491e-11, tolerance = 1e-16) + expect_equal(results.sct.limma[1, "avg_logFC"], -2.545867, tolerance = 1e-6) + expect_equal(results.sct.limma[1, "pct.1"], 0.111) + expect_equal(results.sct.limma[1, "pct.2"], 0.96) + expect_equal(results.sct.limma[1, "p_val_adj"], 1.369608e-08, tolerance = 1e-13) + expect_equal(nrow(x = results.sct.limma), 214) + expect_equal(rownames(x = results.sct.limma)[1], "TYMP") +}) test_that("BPCells FindMarkers gives same results", { skip_on_cran() + skip_if_not_installed("BPCells") library(BPCells) library(Matrix) mat_bpcells <- t(as(t(pbmc_small[['RNA']]$counts ), "IterableMatrix")) @@ -419,6 +423,7 @@ test_that("FindMarkers recognizes log normalization", { test_that("BPCells FindAllMarkers gives same results", { skip_on_cran() + skip_if_not_installed("BPCells") library(BPCells) library(Matrix) mat_bpcells <- t(as(t(pbmc_small[['RNA']]$counts ), "IterableMatrix")) diff --git a/tests/testthat/test_preprocessing.R b/tests/testthat/test_preprocessing.R index e017263b0..274ea24ae 100644 --- a/tests/testthat/test_preprocessing.R +++ b/tests/testthat/test_preprocessing.R @@ -139,6 +139,7 @@ test_that("NormalizeData scales properly for BPcells", { skip_on_cran() library(Matrix) + skip_if_not_installed("BPCells") library(BPCells) mat_bpcells <- t(as(t(object[['RNA']]$counts ), "IterableMatrix")) object[['RNAbp']] <- CreateAssay5Object(counts = mat_bpcells) @@ -156,6 +157,7 @@ test_that("NormalizeData scales properly for BPcells", { test_that("LogNormalize normalizes properly for BPCells", { skip_on_cran() library(Matrix) + skip_if_not_installed("BPCells") library(BPCells) mat_bpcells <- t(as(t(object[['RNA']]$counts ), "IterableMatrix")) object[['RNAbp']] <- CreateAssay5Object(counts = mat_bpcells) @@ -477,6 +479,7 @@ test_that("SCTransform is equivalent for BPcells ", { skip_if_not_installed("glmGamPoi") library(Matrix) + skip_if_not_installed("BPCells") library(BPCells) mat_bpcells <- t(as(t(object[['RNA']]$counts ), "IterableMatrix")) object[['RNAbp']] <- CreateAssay5Object(counts = mat_bpcells) From a5cd94fc6e856017544b4a48b862295e4f0da761 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Mon, 23 Oct 2023 13:37:55 -0400 Subject: [PATCH 966/979] bump version --- DESCRIPTION | 2 +- man/reexports.Rd | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e95edff3c..712989994 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 4.9.9.9090 +Version: 4.9.9.9091 Date: 2023-10-23 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. diff --git a/man/reexports.Rd b/man/reexports.Rd index 08ced67a3..fa7258d42 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -71,8 +71,8 @@ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ - \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}} - \item{generics}{\code{\link[generics]{components}}} + + \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}} }} From 3bec18db00605824154be38ff99db99a1ce2cdcc Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Mon, 23 Oct 2023 13:42:26 -0400 Subject: [PATCH 967/979] Update documentation --- man/FindTransferAnchors.Rd | 2 +- man/reexports.Rd | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/man/FindTransferAnchors.Rd b/man/FindTransferAnchors.Rd index 398a55a03..f0dfbbc60 100644 --- a/man/FindTransferAnchors.Rd +++ b/man/FindTransferAnchors.Rd @@ -21,7 +21,7 @@ FindTransferAnchors( l2.norm = TRUE, dims = 1:30, k.anchor = 5, - k.filter = 200, + k.filter = NA, k.score = 30, max.features = 200, nn.method = "annoy", diff --git a/man/reexports.Rd b/man/reexports.Rd index fa7258d42..08ced67a3 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -71,8 +71,8 @@ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ - \item{generics}{\code{\link[generics]{components}}} + \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}} - \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}} + \item{generics}{\code{\link[generics]{components}}} }} From 3cf6f2079c3e5f2a297e88d3351db849ebdb7c8d Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Mon, 23 Oct 2023 13:54:32 -0400 Subject: [PATCH 968/979] bump version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 712989994..73fc6dd0c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 4.9.9.9091 +Version: 5.0.0 Date: 2023-10-23 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. @@ -34,7 +34,7 @@ Additional_repositories: https://satijalab.r-universe.dev, https://bnprks.r-univ Depends: R (>= 4.0.0), methods, - SeuratObject (>= 4.9.9.9091) + SeuratObject (>= 5.0.0) Imports: cluster, cowplot, From 5c9b0b1f7866d1a95aa70c7f9379afd1ea8c498a Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Mon, 23 Oct 2023 16:25:40 -0400 Subject: [PATCH 969/979] Register PseudobulkExpression --- NAMESPACE | 1 + R/utilities.R | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index 000515261..dcdd73c3f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -80,6 +80,7 @@ S3method(ProjectUMAP,DimReduc) S3method(ProjectUMAP,Seurat) S3method(ProjectUMAP,default) S3method(PseudobulkExpression,Assay) +S3method(PseudobulkExpression,Seurat) S3method(PseudobulkExpression,StdAssay) S3method(Radius,STARmap) S3method(Radius,SlideSeq) diff --git a/R/utilities.R b/R/utilities.R index 6e5579c5b..83c78cda9 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -1360,7 +1360,7 @@ PseudobulkExpression.StdAssay <- function( #' @method PseudobulkExpression Seurat #' @importFrom SeuratObject .IsFutureSeurat -# +#' @export PseudobulkExpression.Seurat <- function( object, assays = NULL, From 47ac692893d6050fc7832d269cf271b978f15d8c Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Mon, 23 Oct 2023 18:23:15 -0400 Subject: [PATCH 970/979] Fix `\itemize` issue; update docs --- R/dimensional_reduction.R | 16 ++++++++------ R/preprocessing.R | 43 ++++++++++++++++++++----------------- R/visualization.R | 35 +++++++++++++++++++++--------- man/FeaturePlot.Rd | 22 +++++++++++++------ man/FindVariableFeatures.Rd | 28 +++++++++++++----------- man/ImageFeaturePlot.Rd | 9 ++++---- man/NormalizeData.Rd | 15 +++++++------ man/PolyFeaturePlot.Rd | 9 ++++---- man/RunTSNE.Rd | 11 ++++++---- man/SpatialPlot.Rd | 13 ++++++++--- man/reexports.Rd | 4 ++-- 11 files changed, 124 insertions(+), 81 deletions(-) diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R index e47bb4c93..298118440 100644 --- a/R/dimensional_reduction.R +++ b/R/dimensional_reduction.R @@ -1104,14 +1104,16 @@ RunPCA.Seurat5 <- function( #' @param tsne.method Select the method to use to compute the tSNE. Available #' methods are: #' \itemize{ -#' \item{Rtsne: }{Use the Rtsne package Barnes-Hut implementation of tSNE (default)} -# \item{tsne: }{standard tsne - not recommended for large datasets} -#' \item{FIt-SNE: }{Use the FFT-accelerated Interpolation-based t-SNE. Based on -#' Kluger Lab code found here: https://github.com/KlugerLab/FIt-SNE} +#' \item \dQuote{\code{Rtsne}}: Use the Rtsne package Barnes-Hut +#' implementation of tSNE (default) +#' \item \dQuote{\code{FIt-SNE}}: Use the FFT-accelerated Interpolation-based +#' t-SNE. Based on Kluger Lab code found here: +#' \url{https://github.com/KlugerLab/FIt-SNE} #' } #' @param dim.embed The dimensional space of the resulting tSNE embedding #' (default is 2). For example, set to 3 for a 3d tSNE -#' @param reduction.key dimensional reduction key, specifies the string before the number for the dimension names. tSNE_ by default +#' @param reduction.key dimensional reduction key, specifies the string before +#' the number for the dimension names. \dQuote{\code{tSNE_}} by default #' #' @importFrom Rtsne Rtsne #' @@ -2453,8 +2455,8 @@ PrepDR5 <- function(object, features = NULL, layer = 'scale.data', verbose = TRU if(!isTRUE(all.equal(features, features.use))) { missing_features <- setdiff(features, features.use) if(length(missing_features) > 0) { - warning_message <- paste("The following features were not available: ", - paste(missing_features, collapse = ", "), + warning_message <- paste("The following features were not available: ", + paste(missing_features, collapse = ", "), ".", sep = "") warning(warning_message, immediate. = TRUE) } diff --git a/R/preprocessing.R b/R/preprocessing.R index 3e0dd7d89..c78a1507d 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -3640,19 +3640,21 @@ SubsetByBarcodeInflections <- function(object) { #' @param selection.method How to choose top variable features. Choose one of : #' \itemize{ -#' \item{vst:}{ First, fits a line to the relationship of log(variance) and -#' log(mean) using local polynomial regression (loess). Then standardizes the -#' feature values using the observed mean and expected variance (given by the -#' fitted line). Feature variance is then calculated on the standardized values -#' after clipping to a maximum (see clip.max parameter).} -#' \item{mean.var.plot (mvp):}{ First, uses a function to calculate average -#' expression (mean.function) and dispersion (dispersion.function) for each -#' feature. Next, divides features into num.bin (deafult 20) bins based on -#' their average expression, and calculates z-scores for dispersion within -#' each bin. The purpose of this is to identify variable features while -#' controlling for the strong relationship between variability and average -#' expression.} -#' \item{dispersion (disp):}{ selects the genes with the highest dispersion values} +#' \item \dQuote{\code{vst}}: First, fits a line to the relationship of +#' log(variance) and log(mean) using local polynomial regression (loess). +#' Then standardizes the feature values using the observed mean and +#' expected variance (given by the fitted line). Feature variance is then +#' calculated on the standardized values +#' after clipping to a maximum (see clip.max parameter). +#' \item \dQuote{\code{mean.var.plot}} (mvp): First, uses a function to +#' calculate average expression (mean.function) and dispersion +#' (dispersion.function) for each feature. Next, divides features into +#' \code{num.bin} (deafult 20) bins based on their average expression, +#' and calculates z-scores for dispersion within each bin. The purpose of +#' this is to identify variable features while controlling for the +#' strong relationship between variability and average expression +#' \item \dQuote{\code{dispersion}} (disp): selects the genes with the +#' highest dispersion values #' } #' @param loess.span (vst method) Loess span parameter used when fitting the #' variance-mean relationship @@ -4153,13 +4155,14 @@ LogNormalize.V3Matrix <- function( #' #' @param normalization.method Method for normalization. #' \itemize{ -#' \item{LogNormalize: }{Feature counts for each cell are divided by the total -#' counts for that cell and multiplied by the scale.factor. This is then -#' natural-log transformed using log1p.} -#' \item{CLR: }{Applies a centered log ratio transformation} -#' \item{RC: }{Relative counts. Feature counts for each cell are divided by the total -#' counts for that cell and multiplied by the scale.factor. No log-transformation is applied. -#' For counts per million (CPM) set \code{scale.factor = 1e6}} +#' \item \dQuote{\code{LogNormalize}}: Feature counts for each cell are +#' divided by the total counts for that cell and multiplied by the +#' \code{scale.factor}. This is then natural-log transformed using \code{log1p} +#' \item \dQuote{\code{CLR}}: Applies a centered log ratio transformation +#' \item \dQuote{\code{RC}}: Relative counts. Feature counts for each cell +#' are divided by the total counts for that cell and multiplied by the +#' \code{scale.factor}. No log-transformation is applied. For counts per +#' million (CPM) set \code{scale.factor = 1e6} #' } #' @param scale.factor Sets the scale factor for cell-level normalization #' @param margin If performing CLR normalization, normalize across features (1) or cells (2) diff --git a/R/visualization.R b/R/visualization.R index aacbaf8b8..76605a25d 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -995,10 +995,11 @@ DimPlot <- function( #' cells expressing given feature are getting buried. #' @param features Vector of features to plot. Features can come from: #' \itemize{ -#' \item An \code{Assay} feature (e.g. a gene name - "MS4A1") -#' \item A column name from meta.data (e.g. mitochondrial percentage - "percent.mito") -#' \item A column name from a \code{DimReduc} object corresponding to the cell embedding values -#' (e.g. the PC 1 scores - "PC_1") +#' \item An \code{Assay} feature (e.g. a gene name - "MS4A1") +#' \item A column name from meta.data (e.g. mitochondrial percentage - +#' "percent.mito") +#' \item A column name from a \code{DimReduc} object corresponding to the +#' cell embedding values (e.g. the PC 1 scores - "PC_1") #' } #' @param cols The two colors to form the gradient over. Provide as string vector with #' the first color corresponding to low values, the second to high. Also accepts a Brewer @@ -1015,9 +1016,16 @@ DimPlot <- function( #' to split by cell identity' #' @param keep.scale How to handle the color scale across multiple plots. Options are: #' \itemize{ -#' \item{"feature" (default; by row/feature scaling):}{ The plots for each individual feature are scaled to the maximum expression of the feature across the conditions provided to 'split.by'.} -#' \item{"all" (universal scaling):}{ The plots for all features and conditions are scaled to the maximum expression value for the feature with the highest overall expression.} -#' \item{NULL (no scaling):}{ Each individual plot is scaled to the maximum expression value of the feature in the condition provided to 'split.by'. Be aware setting NULL will result in color scales that are not comparable between plots.} +#' \item \dQuote{feature} (default; by row/feature scaling): The plots for +#' each individual feature are scaled to the maximum expression of the +#' feature across the conditions provided to \code{split.by} +#' \item \dQuote{all} (universal scaling): The plots for all features and +#' conditions are scaled to the maximum expression value for the feature +#' with the highest overall expression +#' \item \code{all} (no scaling): Each individual plot is scaled to the +#' maximum expression value of the feature in the condition provided to +#' \code{split.by}. Be aware setting \code{NULL} will result in color +#' scales that are not comparable between plots #' } #' @param slot Which slot to pull expression data from? #' @param blend Scale and blend expression values to visualize coexpression of two features @@ -3835,9 +3843,16 @@ ISpatialFeaturePlot <- function( #' data, or scale.data) #' @param keep.scale How to handle the color scale across multiple plots. Options are: #' \itemize{ -#' \item{"feature" (default; by row/feature scaling):}{ The plots for each individual feature are scaled to the maximum expression of the feature across the conditions provided to 'split.by'.} -#' \item{"all" (universal scaling):}{ The plots for all features and conditions are scaled to the maximum expression value for the feature with the highest overall expression.} -#' \item{NULL (no scaling):}{ Each individual plot is scaled to the maximum expression value of the feature in the condition provided to 'split.by'. Be aware setting NULL will result in color scales that are not comparable between plots.} +#' \item \dQuote{feature} (default; by row/feature scaling): The plots for +#' each individual feature are scaled to the maximum expression of the +#' feature across the conditions provided to \code{split.by} +#' \item \dQuote{all} (universal scaling): The plots for all features and +#' conditions are scaled to the maximum expression value for the feature +#' with the highest overall expression +#' \item \code{NULL} (no scaling): Each individual plot is scaled to the +#' maximum expression value of the feature in the condition provided to +#' \code{split.by}; be aware setting \code{NULL} will result in color +#' scales that are not comparable between plots #' } #' @param min.cutoff,max.cutoff Vector of minimum and maximum cutoff #' values for each feature, may specify quantile in the form of 'q##' where '##' diff --git a/man/FeaturePlot.Rd b/man/FeaturePlot.Rd index bf09e5640..fa5c4293a 100644 --- a/man/FeaturePlot.Rd +++ b/man/FeaturePlot.Rd @@ -47,10 +47,11 @@ FeaturePlot( \item{features}{Vector of features to plot. Features can come from: \itemize{ - \item An \code{Assay} feature (e.g. a gene name - "MS4A1") - \item A column name from meta.data (e.g. mitochondrial percentage - "percent.mito") - \item A column name from a \code{DimReduc} object corresponding to the cell embedding values - (e.g. the PC 1 scores - "PC_1") + \item An \code{Assay} feature (e.g. a gene name - "MS4A1") + \item A column name from meta.data (e.g. mitochondrial percentage - + "percent.mito") + \item A column name from a \code{DimReduc} object corresponding to the + cell embedding values (e.g. the PC 1 scores - "PC_1") }} \item{dims}{Dimensions to plot, must be a two-length numeric vector specifying x- and y-dimensions} @@ -84,9 +85,16 @@ to split by cell identity'} \item{keep.scale}{How to handle the color scale across multiple plots. Options are: \itemize{ - \item{"feature" (default; by row/feature scaling):}{ The plots for each individual feature are scaled to the maximum expression of the feature across the conditions provided to 'split.by'.} - \item{"all" (universal scaling):}{ The plots for all features and conditions are scaled to the maximum expression value for the feature with the highest overall expression.} - \item{NULL (no scaling):}{ Each individual plot is scaled to the maximum expression value of the feature in the condition provided to 'split.by'. Be aware setting NULL will result in color scales that are not comparable between plots.} + \item \dQuote{feature} (default; by row/feature scaling): The plots for + each individual feature are scaled to the maximum expression of the + feature across the conditions provided to \code{split.by} + \item \dQuote{all} (universal scaling): The plots for all features and + conditions are scaled to the maximum expression value for the feature + with the highest overall expression + \item \code{all} (no scaling): Each individual plot is scaled to the + maximum expression value of the feature in the condition provided to + \code{split.by}. Be aware setting \code{NULL} will result in color + scales that are not comparable between plots }} \item{shape.by}{If NULL, all points are circles (default). You can specify any diff --git a/man/FindVariableFeatures.Rd b/man/FindVariableFeatures.Rd index 8ec4f4859..6944c5ace 100644 --- a/man/FindVariableFeatures.Rd +++ b/man/FindVariableFeatures.Rd @@ -66,19 +66,21 @@ FindVariableFeatures(object, ...) \item{selection.method}{How to choose top variable features. Choose one of : \itemize{ - \item{vst:}{ First, fits a line to the relationship of log(variance) and - log(mean) using local polynomial regression (loess). Then standardizes the - feature values using the observed mean and expected variance (given by the - fitted line). Feature variance is then calculated on the standardized values - after clipping to a maximum (see clip.max parameter).} - \item{mean.var.plot (mvp):}{ First, uses a function to calculate average - expression (mean.function) and dispersion (dispersion.function) for each - feature. Next, divides features into num.bin (deafult 20) bins based on - their average expression, and calculates z-scores for dispersion within - each bin. The purpose of this is to identify variable features while - controlling for the strong relationship between variability and average - expression.} - \item{dispersion (disp):}{ selects the genes with the highest dispersion values} + \item \dQuote{\code{vst}}: First, fits a line to the relationship of + log(variance) and log(mean) using local polynomial regression (loess). + Then standardizes the feature values using the observed mean and + expected variance (given by the fitted line). Feature variance is then + calculated on the standardized values + after clipping to a maximum (see clip.max parameter). + \item \dQuote{\code{mean.var.plot}} (mvp): First, uses a function to + calculate average expression (mean.function) and dispersion + (dispersion.function) for each feature. Next, divides features into + \code{num.bin} (deafult 20) bins based on their average expression, + and calculates z-scores for dispersion within each bin. The purpose of + this is to identify variable features while controlling for the + strong relationship between variability and average expression + \item \dQuote{\code{dispersion}} (disp): selects the genes with the + highest dispersion values }} \item{loess.span}{(vst method) Loess span parameter used when fitting the diff --git a/man/ImageFeaturePlot.Rd b/man/ImageFeaturePlot.Rd index e40ab169a..c5dbd50e5 100644 --- a/man/ImageFeaturePlot.Rd +++ b/man/ImageFeaturePlot.Rd @@ -43,10 +43,11 @@ ImageFeaturePlot( \item{features}{Vector of features to plot. Features can come from: \itemize{ - \item An \code{Assay} feature (e.g. a gene name - "MS4A1") - \item A column name from meta.data (e.g. mitochondrial percentage - "percent.mito") - \item A column name from a \code{DimReduc} object corresponding to the cell embedding values - (e.g. the PC 1 scores - "PC_1") + \item An \code{Assay} feature (e.g. a gene name - "MS4A1") + \item A column name from meta.data (e.g. mitochondrial percentage - + "percent.mito") + \item A column name from a \code{DimReduc} object corresponding to the + cell embedding values (e.g. the PC 1 scores - "PC_1") }} \item{fov}{Name of FOV to plot} diff --git a/man/NormalizeData.Rd b/man/NormalizeData.Rd index 0d8b48e5b..1f6edd463 100644 --- a/man/NormalizeData.Rd +++ b/man/NormalizeData.Rd @@ -45,13 +45,14 @@ NormalizeData(object, ...) \item{normalization.method}{Method for normalization. \itemize{ - \item{LogNormalize: }{Feature counts for each cell are divided by the total - counts for that cell and multiplied by the scale.factor. This is then - natural-log transformed using log1p.} - \item{CLR: }{Applies a centered log ratio transformation} - \item{RC: }{Relative counts. Feature counts for each cell are divided by the total - counts for that cell and multiplied by the scale.factor. No log-transformation is applied. - For counts per million (CPM) set \code{scale.factor = 1e6}} + \item \dQuote{\code{LogNormalize}}: Feature counts for each cell are + divided by the total counts for that cell and multiplied by the + \code{scale.factor}. This is then natural-log transformed using \code{log1p} + \item \dQuote{\code{CLR}}: Applies a centered log ratio transformation + \item \dQuote{\code{RC}}: Relative counts. Feature counts for each cell + are divided by the total counts for that cell and multiplied by the + \code{scale.factor}. No log-transformation is applied. For counts per + million (CPM) set \code{scale.factor = 1e6} }} \item{scale.factor}{Sets the scale factor for cell-level normalization} diff --git a/man/PolyFeaturePlot.Rd b/man/PolyFeaturePlot.Rd index 59a75466d..6c822aa49 100644 --- a/man/PolyFeaturePlot.Rd +++ b/man/PolyFeaturePlot.Rd @@ -21,10 +21,11 @@ PolyFeaturePlot( \item{features}{Vector of features to plot. Features can come from: \itemize{ - \item An \code{Assay} feature (e.g. a gene name - "MS4A1") - \item A column name from meta.data (e.g. mitochondrial percentage - "percent.mito") - \item A column name from a \code{DimReduc} object corresponding to the cell embedding values - (e.g. the PC 1 scores - "PC_1") + \item An \code{Assay} feature (e.g. a gene name - "MS4A1") + \item A column name from meta.data (e.g. mitochondrial percentage - + "percent.mito") + \item A column name from a \code{DimReduc} object corresponding to the + cell embedding values (e.g. the PC 1 scores - "PC_1") }} \item{cells}{Vector of cells to plot (default is all cells)} diff --git a/man/RunTSNE.Rd b/man/RunTSNE.Rd index 82077c88d..edd94dbc1 100644 --- a/man/RunTSNE.Rd +++ b/man/RunTSNE.Rd @@ -68,15 +68,18 @@ RunTSNE(object, ...) \item{tsne.method}{Select the method to use to compute the tSNE. Available methods are: \itemize{ -\item{Rtsne: }{Use the Rtsne package Barnes-Hut implementation of tSNE (default)} -\item{FIt-SNE: }{Use the FFT-accelerated Interpolation-based t-SNE. Based on -Kluger Lab code found here: https://github.com/KlugerLab/FIt-SNE} + \item \dQuote{\code{Rtsne}}: Use the Rtsne package Barnes-Hut + implementation of tSNE (default) + \item \dQuote{\code{FIt-SNE}}: Use the FFT-accelerated Interpolation-based + t-SNE. Based on Kluger Lab code found here: + \url{https://github.com/KlugerLab/FIt-SNE} }} \item{dim.embed}{The dimensional space of the resulting tSNE embedding (default is 2). For example, set to 3 for a 3d tSNE} -\item{reduction.key}{dimensional reduction key, specifies the string before the number for the dimension names. tSNE_ by default} +\item{reduction.key}{dimensional reduction key, specifies the string before +the number for the dimension names. \dQuote{\code{tSNE_}} by default} \item{cells}{Which cells to analyze (default, all cells)} diff --git a/man/SpatialPlot.Rd b/man/SpatialPlot.Rd index 5bd9f8162..502c1a0ba 100644 --- a/man/SpatialPlot.Rd +++ b/man/SpatialPlot.Rd @@ -107,9 +107,16 @@ data, or scale.data)} \item{keep.scale}{How to handle the color scale across multiple plots. Options are: \itemize{ - \item{"feature" (default; by row/feature scaling):}{ The plots for each individual feature are scaled to the maximum expression of the feature across the conditions provided to 'split.by'.} - \item{"all" (universal scaling):}{ The plots for all features and conditions are scaled to the maximum expression value for the feature with the highest overall expression.} - \item{NULL (no scaling):}{ Each individual plot is scaled to the maximum expression value of the feature in the condition provided to 'split.by'. Be aware setting NULL will result in color scales that are not comparable between plots.} + \item \dQuote{feature} (default; by row/feature scaling): The plots for + each individual feature are scaled to the maximum expression of the + feature across the conditions provided to \code{split.by} + \item \dQuote{all} (universal scaling): The plots for all features and + conditions are scaled to the maximum expression value for the feature + with the highest overall expression + \item \code{NULL} (no scaling): Each individual plot is scaled to the + maximum expression value of the feature in the condition provided to + \code{split.by}; be aware setting \code{NULL} will result in color + scales that are not comparable between plots }} \item{min.cutoff, max.cutoff}{Vector of minimum and maximum cutoff diff --git a/man/reexports.Rd b/man/reexports.Rd index 08ced67a3..fa7258d42 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -71,8 +71,8 @@ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ - \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}} - \item{generics}{\code{\link[generics]{components}}} + + \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}} }} From f3b8781565eb0e8f40dd66bc85ad576b636b85fd Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Mon, 23 Oct 2023 18:29:40 -0400 Subject: [PATCH 971/979] Fix for rd_methods --- NAMESPACE | 1 - R/integration5.R | 2 +- R/roxygen.R | 2 -- man/IntegrateLayers.Rd | 2 +- 4 files changed, 2 insertions(+), 5 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index dcdd73c3f..b2f259f45 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -176,7 +176,6 @@ export("Project<-") export("SCTResults<-") export("Tool<-") export("VariableFeatures<-") -export(.rd_methods) export(AddAzimuthResults) export(AddMetaData) export(AddModuleScore) diff --git a/R/integration5.R b/R/integration5.R index 42b37942d..b190267fe 100644 --- a/R/integration5.R +++ b/R/integration5.R @@ -549,7 +549,7 @@ attr(x = JointPCAIntegration, which = 'Seurat.method') <- 'integration' #' #' @section Integration Method Functions: #' The following integration method functions are available: -#' \Sexpr[stage=render,results=rd]{Seurat::.rd_methods("integration")} +#' \Sexpr[stage=render,results=rd]{Seurat:::.rd_methods("integration")} #' #' @export #' diff --git a/R/roxygen.R b/R/roxygen.R index 41579e2b5..0fe3d0e49 100644 --- a/R/roxygen.R +++ b/R/roxygen.R @@ -5,8 +5,6 @@ NULL #' @importFrom utils lsf.str #' @importFrom rlang is_scalar_character #' -#' @export -#' .rd_methods <- function(method = 'integration') { methods <- sapply( X = grep(pattern = '^package:', x = search(), value = TRUE), diff --git a/man/IntegrateLayers.Rd b/man/IntegrateLayers.Rd index 0396b77ee..5ac814d34 100644 --- a/man/IntegrateLayers.Rd +++ b/man/IntegrateLayers.Rd @@ -41,7 +41,7 @@ Integrate Layers \section{Integration Method Functions}{ The following integration method functions are available: -\Sexpr[stage=render,results=rd]{Seurat::.rd_methods("integration")} +\Sexpr[stage=render,results=rd]{Seurat:::.rd_methods("integration")} } \seealso{ From 1b43c98a608160f81b3b8c6454a26e7fb97b4f94 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Mon, 23 Oct 2023 18:31:27 -0400 Subject: [PATCH 972/979] More doc fixes --- R/preprocessing.R | 10 ++++++---- man/FindVariableFeatures.Rd | 10 ++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index c78a1507d..193444480 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -3670,10 +3670,12 @@ SubsetByBarcodeInflections <- function(object) { #' @param binning.method Specifies how the bins should be computed. Available #' methods are: #' \itemize{ -#' \item{equal_width:}{ each bin is of equal width along the x-axis [default]} -#' \item{equal_frequency:}{ each bin contains an equal number of features (can -#' increase statistical power to detect overdispersed features at high -#' expression values, at the cost of reduced resolution along the x-axis)} +#' \item \dQuote{\code{equal_width}}: each bin is of equal width along the +#' x-axis (default) +#' \item \dQuote{\code{equal_frequency}}: each bin contains an equal number +#' of features (can increase statistical power to detect overdispersed +#' eatures at high expression values, at the cost of reduced resolution +#' along the x-axis) #' } #' @param verbose show progress bar for calculations #' diff --git a/man/FindVariableFeatures.Rd b/man/FindVariableFeatures.Rd index 6944c5ace..e4a3deac9 100644 --- a/man/FindVariableFeatures.Rd +++ b/man/FindVariableFeatures.Rd @@ -102,10 +102,12 @@ is 20)} \item{binning.method}{Specifies how the bins should be computed. Available methods are: \itemize{ - \item{equal_width:}{ each bin is of equal width along the x-axis [default]} - \item{equal_frequency:}{ each bin contains an equal number of features (can - increase statistical power to detect overdispersed features at high - expression values, at the cost of reduced resolution along the x-axis)} + \item \dQuote{\code{equal_width}}: each bin is of equal width along the + x-axis (default) + \item \dQuote{\code{equal_frequency}}: each bin contains an equal number + of features (can increase statistical power to detect overdispersed + eatures at high expression values, at the cost of reduced resolution + along the x-axis) }} \item{verbose}{show progress bar for calculations} From 59b9ec7cc64a563e61dacfef8bf7471224baae3e Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Mon, 23 Oct 2023 18:34:49 -0400 Subject: [PATCH 973/979] redocument exports --- man/reexports.Rd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/man/reexports.Rd b/man/reexports.Rd index fa7258d42..08ced67a3 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -71,8 +71,8 @@ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ - \item{generics}{\code{\link[generics]{components}}} + \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}} - \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject:NNIndex]{Index}}, \code{\link[SeuratObject:NNIndex]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}} + \item{generics}{\code{\link[generics]{components}}} }} From 48ea7a739dffc513c8ef4415d14053227ff13c69 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Tue, 24 Oct 2023 16:41:08 -0400 Subject: [PATCH 974/979] Update CRAN comments --- cran-comments.md | 76 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 69 insertions(+), 7 deletions(-) diff --git a/cran-comments.md b/cran-comments.md index 1bea36b47..85bffe000 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,16 +1,78 @@ -# Seurat v4.4.0 +# Seurat v5.0.0 ## Test environments -* local Ubuntu 20.04 install, R 4.3.1 -* win-builder (release, devel) +* local ubuntu 20.04 install, R 4.1.3 +* win-builder (oldrelease, release, devel) +* mac-builder (release) ## R CMD check results -There were no ERRORs or WARNINGs. + +There were no ERRORs or WARNINGs + +There were two NOTEs + +> * checking CRAN incoming feasibility ... NOTE +> Maintainer: 'Rahul Satija ' + +> New maintainer: +> Rahul Satija +> Old maintainer(s): +> Paul Hoffman + +> Suggests or Enhances not in mainstream repositories: +> BPCells, presto +> Availability using Additional_repositories specification: +> presto yes https://satijalab.r-universe.dev +> BPCells no ? +> ? ? https://bnprks.r-universe.dev +> Additional repositories with no packages: +> https://bnprks.r-universe.dev + +> Packages suggested but not available for checking: 'BPCells', 'presto' + +BPCells and presto are hosted on R-universe and used conditionally in Seurat. + ## Downstream dependencies -There no packages that depend on Seurat +The following reverse dependencies are impacted by this release of Seurat: + +- AnanseSeurat + - Failure in examples and test because of changes in how default objects are created in Seurat. + - Functionality impacted. The author was made aware of these changes: https://github.com/JGASmits/AnanseSeurat/issues/34 + +- CAMML + - Failure in example because of changes in how default objects are created in Seurat. + - Functionality impacted. The author was made aware of these changes over email + +- Canek + - Failure in tests because of changes in how default objects are created in Seurat. + - Functionality impacted. The author was made aware of these changes: https://github.com/MartinLoza/Canek/issues/20 + +- clustree: + - Failure in tests because of changes in data accessor methods in Seurat. + - Functionality impacted. The author was made aware of these changes: https://github.com/lazappi/clustree/issues/93 + - Author has incorporated changes + +- CSCDRNA + - Failure in tests because of changes in data accessor methods in Seurat. + - Functionality impacted. The author was made aware of these changes: https://github.com/empiricalbayes/CSCDRNA/issues/1 + +- scCustomize + - Failure in example because of changes in how default objects are created in Seurat. + - Functionality impacted. The author was made aware of these changes: https://github.com/samuel-marsh/scCustomize/issues/131 + +- SCpubr: + - Failure in example because of changes in how default objects are created in Seurat. + - Functionality impacted. The author was made aware of these changes: https://github.com/enblacar/SCpubr/issues/42 -There are forty-five packages that import Seurat: AnanseSeurat, APL, bbknnR, benchdamic, CAMML, CIDER, COTAN, CSCDRNA, Dino, DR.SC, DWLS, ggsector, gsdensity, infercnv, IRISFGM, mixhvg, Nebulosa, pipeComp, PRECAST, ProFAST, rPanglaoDB, scAnnotate, scBFA, scBubbletree, scCB2, scDataviz, scDiffCom, scFeatures, scGate, scMappR, scperturbR, scpoisson, SCRIP, scRNAseqApp, scRNAstat, scTreeViz, SignacX, singleCellTK, SoupX, Spaniel, SPECK, speckle, SpotClean, stJoincount, and STREAK; this update does not impact their functionality +- Signac + - Faulure in new tests because of SeuratObject changing the order of the results, but not the actual values. + - Functionality not impacted. The author was made aware of these changes over email and has made changes. -There are fifty-one packages that suggest Seurat: ASURAT, BayesSpace, BisqueRNA, Canek, cellpypes, CIARA, ClustAssess, clustifyr, clustifyrdatahub, clustree, combiroc, conos, countland, CRMetrics, decoupleR, DIscBIO, dittoSeq, dorothea, dyngen, EasyCellType, EpiMix, escape, fcoex, FEAST, fgsea, GeomxTools, grandR, harmony, M3Drop, MOFA2, monocle, muscData, progeny, RESET, rliger, SCORPIUS, SCpubr, scRepertoire, scTensor, Signac, SimBenchData, SimBu, spatialHeatmap, SPOTlight, TAPseq, TCGAbiolinks, tidybulk, treefit, tricycle, UCell, and VAM; this update does not impact their functionality. +- tidyseurat + - Faulure in new tests because of SeuratObject changing the order of the results, but not the actual values. + - Functionality not impacted. The author was made aware of these changes: https://github.com/stemangiola/tidyseurat/issues/74 +- VAM + - Failure in tests because of changes in data accessor methods in Seurat. + - Functionality impacted. The author was made aware of these changes over email From 6e59e6578e2d66698632d585d0e04df801a2babc Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Tue, 24 Oct 2023 16:45:02 -0400 Subject: [PATCH 975/979] Update CRAN comments --- cran-comments.md | 2 ++ src/RcppExports.cpp | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/cran-comments.md b/cran-comments.md index 85bffe000..99ac0c99e 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -19,6 +19,8 @@ There were two NOTEs > Old maintainer(s): > Paul Hoffman +New maintainer is Rahul Satija, the email address has remained the same. + > Suggests or Enhances not in mainstream repositories: > BPCells, presto > Availability using Additional_repositories specification: diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 7a3302c6b..540e5c2d8 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -402,7 +402,7 @@ BEGIN_RCPP END_RCPP } -RcppExport SEXP isnull(SEXP); +RcppExport SEXP isnull(void *); static const R_CallMethodDef CallEntries[] = { {"_Seurat_RunModularityClusteringCpp", (DL_FUNC) &_Seurat_RunModularityClusteringCpp, 9}, From 50bd53e727e75dd2424d96b4f29550de2b708362 Mon Sep 17 00:00:00 2001 From: zskylarli Date: Tue, 24 Oct 2023 18:16:32 -0400 Subject: [PATCH 976/979] updated changelog for v5 --- NEWS.md | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 52 insertions(+), 7 deletions(-) diff --git a/NEWS.md b/NEWS.md index 5d881c29f..8c7e3a863 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,20 +1,65 @@ -# Unreleased +# Seurat 5.0.0 (2023-10-24) + +## Added +- Added `BridgeCellsRepresentation` to construct a dictionary representation for each unimodal dataset. +- Added `BuildNicheAssay` to construct a new assay where each feature is a cell label. The values represent the sum of a particular cell label neighboring a given cell. +- Added `CalcDispersion` to calculate the dispersion of features. +- Added `CCAIntegration` to perform Seurat-CCA Integration. +- Added `CountSketch` to generate a CountSketch random matrix. +- Added `CreateCategoryMatrix` to create a one-hot matrix for a given label. +- Added `DISP` to find variable features based on dispersion. +- Added `FastRPCAIntegration` as a convenience wrapper function around the following three functions that are often run together when performing integration. +- Added `FetchResiduals_reference` as a temporary function to get residuals from the reference. +- Added `FetchResiduals` to call sctransform::get_residuals. +- Added `FetchResidualSCTModel` to calculate Pearson residuals of features not in the scale.data. +- Added `FindBridgeAnchor` to find bridge anchors between two unimodal datasets. +- Added `FindBridgeIntegrationAnchors` to find a set of anchors for integration between unimodal query and the other unimodal reference using a pre-computed BridgeReferenceSet. +- Added `FindBridgeTransferAnchors` to find a set of anchors for label transfer between unimodal query and the other unimodal reference using a pre-computed BridgeReferenceSet. +- Added `GaussianSketch` to perform Gaussian sketching. +- Added `HarmonyIntegration` to perform Harmony integration. +- Added `IntegrateLayers` to integrate layers in an assay object. +- Added `JointPCAIntegration` to perform Seurat-Joint PCA Integration. +- Added `LeverageScore` to compute the leverage scores for a given object. +- Added `LoadCurioSeeker` to load Curio Seeker data. +- Added `MVP` to find variable features based on mean.var.plot. +- Added `NNtoGraph` to convert the Neighbor class to an asymmetrical Graph class. +- Added `PrepareBridgeReference` to preprocess the multi-omic bridge and unimodal reference datasets into an extended reference. +- Added `ProjectCellEmbeddings` to project query data onto the reference dimensional reduction. +- Added `ProjectData` to project high-dimensional single-cell RNA expression data from a full dataset onto the lower-dimensional embedding of the sketch of the dataset. +- Added `ProjectDimReduc` to project query data to reference dimensional reduction. +- Added `ProjectIntegration` to integrate embeddings from the integrated sketched.assay. +- Added `PseudobulkExpression` to normalize the count data present in a given assay. +- Added `Read10X_probe_metadata` to read the probe metadata from a 10x Genomics probe barcode matrix file in HDF5 format. +- Added `RPCAIntegration` to perform Seurat-RPCA Integration. +- Added `RunGraphLaplacian` to run a graph Laplacian dimensionality reduction. +- Added `SelectIntegrationFeatures5` to select integration features for v5 assays. +- Added `SelectSCTIntegrationFeatures` to select SCT integration features. +- Added `SketchData` to use sketching methods to downsample high-dimensional single-cell RNA expression data for help with scalability for large datasets. +- Added `TransferSketchLabels` to transfer cell type labels from a sketched dataset to a full dataset based on the similarities in the lower-dimensional space. +- Added `UnSketchEmbeddings` to transfer embeddings from sketched cells to the full data. +- Added `VST` to apply a variance stabilizing transformation for selection of variable features. +- Added `writing-integration` to provide functionality that allows users to implement any integration method in Seurat. + +## Changes +- Changed `FindTransferAnchors` so that anchor filtering is not performed by default +- Changed `merge` so that layers will be added to a single Seurat object instead of combining raw count matrices + +# Seurat 4.4.0 (2023-09-27) ## Added - Added parallelization support with speed improvements for `PrepSCTFindMarkers` +- Fix bug in `LoadNanostring`([#7566](https://github.com/satijalab/seurat/pull/7566)) ## Changes - Fix bug in `as.Seurat.SingleCellExperiment()` ([#6692](https://github.com/satijalab/seurat/issues/6692)) - Support for Visium probe information introduced in Spaceranger 2.1 ([#7141](https://github.com/satijalab/seurat/pull/7141)) -- Add `LoadCurioSeeker` to load sequencing-based spatial datasets generated using the Curio Seeker ([#744](https://github.com/satijalab/seurat-private/pull/744)) -- Fix fold change calculation for assays ([#739](https://github.com/satijalab/seurat-private/pull/739)) +- Add `LoadCurioSeeker` to load sequencing-based spatial datasets generated using the Curio Seeker +- Fix fold change calculation for assays ([#7095](https://github.com/satijalab/seurat/issues/7095)) - Fix `pt.size` bug when rasterization is set to true ([#7379](https://github.com/satijalab/seurat/issues/7379)) - Fix `FoldChange` and `FindMarkers` to support all normalization approaches ([#7115](https://github.com/satijalab/seurat/pull/7115),[#7110](https://github.com/satijalab/seurat/issues/7110),[#7095](https://github.com/satijalab/seurat/issues/7095),[#6976](https://github.com/satijalab/seurat/issues/6976),[#6654](https://github.com/satijalab/seurat/issues/6654),[#6701](https://github.com/satijalab/seurat/issues/6701),[#6773](https://github.com/satijalab/seurat/issues/6773), [#7107](https://github.com/satijalab/seurat/issues/7107)) - Fix for handling newer ParseBio formats in `ReadParseBio` ([#7565](https://github.com/satijalab/seurat/pull/7565)) -- Fix bug in `ReadMtx()` to add back missing parameters -- Fix `SCTransform()` for V5 assays to retain gene attributes ([#7557](https://github.com/satijalab/seurat/issues/7557)) -- Fix `LeverageScore()` for objects with few features ([#7650](https://github.com/satijalab/seurat/issues/7650) - +- Fix for handling rasterization by default ([#7842](https://github.com/satijalab/seurat/pull/7842)) + # Seurat 4.3.0 (2022-11-18) ## Added From d408743c870a50f6e7cc077ba537d9220656254b Mon Sep 17 00:00:00 2001 From: zskylarli Date: Tue, 24 Oct 2023 18:27:48 -0400 Subject: [PATCH 977/979] grammar convention fix for changelog --- NEWS.md | 85 +++++++++++++++++++++++++++++---------------------------- 1 file changed, 44 insertions(+), 41 deletions(-) diff --git a/NEWS.md b/NEWS.md index 8c7e3a863..542496416 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,53 +1,53 @@ # Seurat 5.0.0 (2023-10-24) ## Added -- Added `BridgeCellsRepresentation` to construct a dictionary representation for each unimodal dataset. -- Added `BuildNicheAssay` to construct a new assay where each feature is a cell label. The values represent the sum of a particular cell label neighboring a given cell. -- Added `CalcDispersion` to calculate the dispersion of features. -- Added `CCAIntegration` to perform Seurat-CCA Integration. -- Added `CountSketch` to generate a CountSketch random matrix. -- Added `CreateCategoryMatrix` to create a one-hot matrix for a given label. -- Added `DISP` to find variable features based on dispersion. -- Added `FastRPCAIntegration` as a convenience wrapper function around the following three functions that are often run together when performing integration. -- Added `FetchResiduals_reference` as a temporary function to get residuals from the reference. -- Added `FetchResiduals` to call sctransform::get_residuals. -- Added `FetchResidualSCTModel` to calculate Pearson residuals of features not in the scale.data. -- Added `FindBridgeAnchor` to find bridge anchors between two unimodal datasets. -- Added `FindBridgeIntegrationAnchors` to find a set of anchors for integration between unimodal query and the other unimodal reference using a pre-computed BridgeReferenceSet. -- Added `FindBridgeTransferAnchors` to find a set of anchors for label transfer between unimodal query and the other unimodal reference using a pre-computed BridgeReferenceSet. -- Added `GaussianSketch` to perform Gaussian sketching. -- Added `HarmonyIntegration` to perform Harmony integration. -- Added `IntegrateLayers` to integrate layers in an assay object. -- Added `JointPCAIntegration` to perform Seurat-Joint PCA Integration. -- Added `LeverageScore` to compute the leverage scores for a given object. -- Added `LoadCurioSeeker` to load Curio Seeker data. -- Added `MVP` to find variable features based on mean.var.plot. -- Added `NNtoGraph` to convert the Neighbor class to an asymmetrical Graph class. -- Added `PrepareBridgeReference` to preprocess the multi-omic bridge and unimodal reference datasets into an extended reference. -- Added `ProjectCellEmbeddings` to project query data onto the reference dimensional reduction. -- Added `ProjectData` to project high-dimensional single-cell RNA expression data from a full dataset onto the lower-dimensional embedding of the sketch of the dataset. -- Added `ProjectDimReduc` to project query data to reference dimensional reduction. -- Added `ProjectIntegration` to integrate embeddings from the integrated sketched.assay. -- Added `PseudobulkExpression` to normalize the count data present in a given assay. -- Added `Read10X_probe_metadata` to read the probe metadata from a 10x Genomics probe barcode matrix file in HDF5 format. -- Added `RPCAIntegration` to perform Seurat-RPCA Integration. -- Added `RunGraphLaplacian` to run a graph Laplacian dimensionality reduction. -- Added `SelectIntegrationFeatures5` to select integration features for v5 assays. -- Added `SelectSCTIntegrationFeatures` to select SCT integration features. -- Added `SketchData` to use sketching methods to downsample high-dimensional single-cell RNA expression data for help with scalability for large datasets. -- Added `TransferSketchLabels` to transfer cell type labels from a sketched dataset to a full dataset based on the similarities in the lower-dimensional space. -- Added `UnSketchEmbeddings` to transfer embeddings from sketched cells to the full data. -- Added `VST` to apply a variance stabilizing transformation for selection of variable features. -- Added `writing-integration` to provide functionality that allows users to implement any integration method in Seurat. +- Add `BridgeCellsRepresentation` to construct a dictionary representation for each unimodal dataset. +- Add `BuildNicheAssay` to construct a new assay where each feature is a cell label. The values represent the sum of a particular cell label neighboring a given cell. +- Add `CalcDispersion` to calculate the dispersion of features. +- Add `CCAIntegration` to perform Seurat-CCA Integration. +- Add `CountSketch` to generate a CountSketch random matrix. +- Add `CreateCategoryMatrix` to create a one-hot matrix for a given label. +- Add `DISP` to find variable features based on dispersion. +- Add `FastRPCAIntegration` as a convenience wrapper function around the following three functions that are often run together when performing integration. +- Add `FetchResiduals_reference` as a temporary function to get residuals from the reference. +- Add `FetchResiduals` to call sctransform::get_residuals. +- Add `FetchResidualSCTModel` to calculate Pearson residuals of features not in the scale.data. +- Add `FindBridgeAnchor` to find bridge anchors between two unimodal datasets. +- Add `FindBridgeIntegrationAnchors` to find a set of anchors for integration between unimodal query and the other unimodal reference using a pre-computed BridgeReferenceSet. +- Add `FindBridgeTransferAnchors` to find a set of anchors for label transfer between unimodal query and the other unimodal reference using a pre-computed BridgeReferenceSet. +- Add `GaussianSketch` to perform Gaussian sketching. +- Add `HarmonyIntegration` to perform Harmony integration. +- Add `IntegrateLayers` to integrate layers in an assay object. +- Add `JointPCAIntegration` to perform Seurat-Joint PCA Integration. +- Add `LeverageScore` to compute the leverage scores for a given object. +- Add `LoadCurioSeeker` to load Curio Seeker data. +- Add `MVP` to find variable features based on mean.var.plot. +- Add `NNtoGraph` to convert the Neighbor class to an asymmetrical Graph class. +- Add `PrepareBridgeReference` to preprocess the multi-omic bridge and unimodal reference datasets into an extended reference. +- Add `ProjectCellEmbeddings` to project query data onto the reference dimensional reduction. +- Add `ProjectData` to project high-dimensional single-cell RNA expression data from a full dataset onto the lower-dimensional embedding of the sketch of the dataset. +- Add `ProjectDimReduc` to project query data to reference dimensional reduction. +- Add `ProjectIntegration` to integrate embeddings from the integrated sketched.assay. +- Add `PseudobulkExpression` to normalize the count data present in a given assay. +- Add `Read10X_probe_metadata` to read the probe metadata from a 10x Genomics probe barcode matrix file in HDF5 format. +- Add `RPCAIntegration` to perform Seurat-RPCA Integration. +- Add `RunGraphLaplacian` to run a graph Laplacian dimensionality reduction. +- Add `SelectIntegrationFeatures5` to select integration features for v5 assays. +- Add `SelectSCTIntegrationFeatures` to select SCT integration features. +- Add `SketchData` to use sketching methods to downsample high-dimensional single-cell RNA expression data for help with scalability for large datasets. +- Add `TransferSketchLabels` to transfer cell type labels from a sketched dataset to a full dataset based on the similarities in the lower-dimensional space. +- Add `UnSketchEmbeddings` to transfer embeddings from sketched cells to the full data. +- Add `VST` to apply a variance stabilizing transformation for selection of variable features. +- Add `writing-integration` to provide functionality that allows users to implement any integration method in Seurat. ## Changes -- Changed `FindTransferAnchors` so that anchor filtering is not performed by default -- Changed `merge` so that layers will be added to a single Seurat object instead of combining raw count matrices +- Change `FindTransferAnchors` so that anchor filtering is not performed by default +- Change `merge` so that layers will be added to a single Seurat object instead of combining raw count matrices # Seurat 4.4.0 (2023-09-27) ## Added -- Added parallelization support with speed improvements for `PrepSCTFindMarkers` +- Add parallelization support with speed improvements for `PrepSCTFindMarkers` - Fix bug in `LoadNanostring`([#7566](https://github.com/satijalab/seurat/pull/7566)) ## Changes @@ -59,6 +59,9 @@ - Fix `FoldChange` and `FindMarkers` to support all normalization approaches ([#7115](https://github.com/satijalab/seurat/pull/7115),[#7110](https://github.com/satijalab/seurat/issues/7110),[#7095](https://github.com/satijalab/seurat/issues/7095),[#6976](https://github.com/satijalab/seurat/issues/6976),[#6654](https://github.com/satijalab/seurat/issues/6654),[#6701](https://github.com/satijalab/seurat/issues/6701),[#6773](https://github.com/satijalab/seurat/issues/6773), [#7107](https://github.com/satijalab/seurat/issues/7107)) - Fix for handling newer ParseBio formats in `ReadParseBio` ([#7565](https://github.com/satijalab/seurat/pull/7565)) - Fix for handling rasterization by default ([#7842](https://github.com/satijalab/seurat/pull/7842)) +- Fix bug in `ReadMtx()` to add back missing parameters +- Fix `SCTransform()` for V5 assays to retain gene attributes ([#7557](https://github.com/satijalab/seurat/issues/7557)) +- Fix `LeverageScore()` for objects with few features ([#7650](https://github.com/satijalab/seurat/issues/7650) # Seurat 4.3.0 (2022-11-18) From 827ac067d4e81dccfe69660135389c30f60ff0ca Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Wed, 25 Oct 2023 09:35:03 -0400 Subject: [PATCH 978/979] Update NEWS --- NEWS.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index 542496416..28449acdc 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# Seurat 5.0.0 (2023-10-24) +# Seurat 5.0.0 (2023-10-25) ## Added - Add `BridgeCellsRepresentation` to construct a dictionary representation for each unimodal dataset. @@ -38,11 +38,11 @@ - Add `TransferSketchLabels` to transfer cell type labels from a sketched dataset to a full dataset based on the similarities in the lower-dimensional space. - Add `UnSketchEmbeddings` to transfer embeddings from sketched cells to the full data. - Add `VST` to apply a variance stabilizing transformation for selection of variable features. -- Add `writing-integration` to provide functionality that allows users to implement any integration method in Seurat. ## Changes - Change `FindTransferAnchors` so that anchor filtering is not performed by default - Change `merge` so that layers will be added to a single Seurat object instead of combining raw count matrices +- Deprecate `slot` parameter in favor of `layers` in accessor and set methods # Seurat 4.4.0 (2023-09-27) From a4075e78d52a67e5f901459c6eb024bb8b3c9c44 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Wed, 25 Oct 2023 09:48:16 -0400 Subject: [PATCH 979/979] Update README --- README.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 7dd89c3a7..370cd40b7 100644 --- a/README.md +++ b/README.md @@ -4,16 +4,14 @@ [![CRAN Downloads](https://cranlogs.r-pkg.org/badges/Seurat)](https://cran.r-project.org/package=Seurat) -# Seurat v5 beta +# Seurat v5 Seurat is an R toolkit for single cell genomics, developed and maintained by the Satija Lab at NYGC. -We are excited to release an initial beta version of Seurat v5! This updates introduces new functionality for spatial, multimodal, and scalable single-cell analysis. +We are excited to release Seurat v5! This updates introduces new functionality for spatial, multimodal, and scalable single-cell analysis. Seurat v5 is backwards-compatible with previous versions, so that users will continue to be able to re-run existing workflows. -As v5 is still in beta, the CRAN installation install.packages("Seurat") will continue to install Seurat v4, but users can opt-in to test Seurat v5 by following the instructions in our [INSTALL PAGE](https://satijalab.org/seurat/articles/install). - Instructions, documentation, and tutorials can be found at: * https://satijalab.org/seurat
    + + + + + + + + +
    +

    load package

    + + + +
    library(Seurat)
    + + +
    Loading required package: SeuratObject
    +Loading required package: sp
    +
    +Attaching package: ‘SeuratObject’
    +
    +The following object is masked from ‘package:base’:
    +
    +    intersect
    +
    +Registered S3 method overwritten by 'data.table':
    +  method           from
    +  print.data.table     
    +Registered S3 method overwritten by 'htmlwidgets':
    +  method           from         
    +  print.htmlwidget tools:rstudio
    + + +
    library(BPCells)
    + + + +
    +
    +

    load matrix

    + + + +
    time0_loadMatrix <- system.time({ 
    +  #mat <- open_matrix_dir('/brahms/haoy/test/pbmc_150k_sparse/')
    +  #meta <- readRDS('/brahms/haoy/seurat5/S5_object/ParseBio_PBMC_meta_100K.rds')
    +  mat <- open_matrix_dir('/brahms/haoy/test/pbmc_ParseBio_sparse//')
    +  meta <- readRDS('/brahms/haoy/seurat5/S5_object/ParseBio_PBMC_meta.rds')
    +})
    + + + +
    +
    +

    sketch object

    + + + +
    options(Seurat.object.assay.version = "v5",   Seurat.object.assay.calcn = T)
    +time1_normalize <- system.time({
    +  object <- CreateSeuratObject(counts = mat, meta.data = meta)
    +  object <- NormalizeData(object)
    +})
    + + + +
    +
    +

    integrate sketched assay

    + + + +
    
    +time5_SketchIntegration  <- system.time({
    +  DefaultAssay(object) <- 'sketch'
    +  object <- FindVariableFeatures(object)
    +  features <- SelectIntegrationFeatures5(object)
    +  object <- ScaleData(object, features =  features)
    +  object <- RunPCA(object, features =  features)
    +  DefaultAssay(object) <- 'sketch'
    +  object <- IntegrateLayers(object, 
    +                            method = RPCAIntegration,
    +                            orig = 'pca',
    +                            new.reduction = 'integrated.rpca',
    +                            dims = 1:30, 
    +                            k.anchor = 20,
    +                            reference = which(Layers(object, search = 'data') == 'data.H_3060'))
    +})
    +object <- RunUMAP(object,  reduction = 'integrated.rpca', dims = 1:30, return.model = T)
    +plot.s1 <- DimPlot(object, group.by = 'sample', reduction = 'umap') + NoLegend()
    +plot.s2 <- DimPlot(object, group.by = 'celltype.weight', reduction = 'umap') + NoLegend()
    +
    +plot.s1 + plot.s2
    +
    + + + +
    +
    +

    proporgate embeddings to full data

    + + + +
    time6_UnSketch <- system.time({
    +  object <- IntegrateSketchEmbeddings(object = object,
    +                                       atoms = 'sketch',
    +                                       orig = 'RNA',
    +                                       reduction = 'integrated.rpca' ,
    +                                       layers = Layers(object = object[['RNA']], search = 'data'),
    +                                      features = features  )
    + 
    +})
    + 
    +object <- RunUMAP(object,  reduction = 'integrated.rpca.orig', dims = 1:30 , reduction.name = 'umap.orig', reduction.key = 'Uorig_')
    +
    + + + + + + +
    p1<- DimPlot(object, reduction = 'umap.orig', group.by = 'sample',alpha = 0.1) + NoLegend()
    +p2<- DimPlot(object, reduction = 'umap.orig', group.by = 'celltype.weight', label = T, alpha = 0.1) + NoLegend()
    +p1+p2
    + + + +
    +
    +

    computing time summary

    + + + +
    all_T  <- ls(pattern = 'time')
    +overall <- sum(sapply(all_T, function(x) round(get(x)['elapsed'], digits = 3)))/60
    +
    +
    +for (i in 1:length(all_T)) {
    +  time.i <- get(all_T[i])['elapsed']
    +  if (time.i > 60) {
    +     print(paste(all_T[i], round(time.i/60, digits = 1), 'mins'))
    +  } else {
    +     print(paste(all_T[i], round(time.i, digits = 1), 'secs'))
    +  }
    +}
    +print(paste('Total time ', round(overall, digits = 3), 'mins' ))
    + + + + +
    + +
    LS0tCnRpdGxlOiAiU2V1cmF0IDU6IExhcmdlIGRhdGFzZXQgYW5hbHlzaXMiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCiMjIGxvYWQgcGFja2FnZQoKYGBge3IsIHdhcm5pbmc9RiwgbWVzc2FnZT1GfQpsaWJyYXJ5KFNldXJhdCkKbGlicmFyeShCUENlbGxzKQpgYGAKCiMjIGxvYWQgbWF0cml4CmBgYHtyLCB3YXJuaW5nPUYsIG1lc3NhZ2U9Rn0KdGltZTBfbG9hZE1hdHJpeCA8LSBzeXN0ZW0udGltZSh7IAogICNtYXQgPC0gb3Blbl9tYXRyaXhfZGlyKCcvYnJhaG1zL2hhb3kvdGVzdC9wYm1jXzE1MGtfc3BhcnNlLycpCiAgI21ldGEgPC0gcmVhZFJEUygnL2JyYWhtcy9oYW95L3NldXJhdDUvUzVfb2JqZWN0L1BhcnNlQmlvX1BCTUNfbWV0YV8xMDBLLnJkcycpCiAgbWF0IDwtIG9wZW5fbWF0cml4X2RpcignL2JyYWhtcy9oYW95L3Rlc3QvcGJtY19QYXJzZUJpb19zcGFyc2UvLycpCiAgbWV0YSA8LSByZWFkUkRTKCcvYnJhaG1zL2hhb3kvc2V1cmF0NS9TNV9vYmplY3QvUGFyc2VCaW9fUEJNQ19tZXRhLnJkcycpCn0pCmBgYAoKIyMgc2tldGNoIG9iamVjdApgYGB7cix3YXJuaW5nPUYsIG1lc3NhZ2U9Rn0Kb3B0aW9ucyhTZXVyYXQub2JqZWN0LmFzc2F5LnZlcnNpb24gPSAidjUiLCAgIFNldXJhdC5vYmplY3QuYXNzYXkuY2FsY24gPSBUKQp0aW1lMV9ub3JtYWxpemUgPC0gc3lzdGVtLnRpbWUoewogIG9iamVjdCA8LSBDcmVhdGVTZXVyYXRPYmplY3QoY291bnRzID0gbWF0LCBtZXRhLmRhdGEgPSBtZXRhKQogIG9iamVjdCA8LSBOb3JtYWxpemVEYXRhKG9iamVjdCkKfSkKIAp0aW1lMl9zcGxpdC5tYXQgIDwtIHN5c3RlbS50aW1lKHsKICBvcHRpb25zKFNldXJhdC5vYmplY3QuYXNzYXkuY2FsY24gPSBGQUxTRSkgCiAgb2JqZWN0W1snUk5BJ11dIDwtIHNwbGl0KG9iamVjdFtbJ1JOQSddXSwgZiA9ICBtZXRhJHNhbXBsZSkKfSkKCgp0aW1lM19GaW5kVmFyaWFibGUgIDwtIHN5c3RlbS50aW1lKHsKICBvYmplY3QgPC0gRmluZFZhcmlhYmxlRmVhdHVyZXMob2JqZWN0LCBsYXllciA9ICdjb3VudHMnKQp9KQoKdGltZTRfTGV2ZXJhZ2VTY29yZVNhbXBsaW5nICA8LSBzeXN0ZW0udGltZSh7CiAgb2JqZWN0IDwtIExldmVyYWdlU2NvcmUob2JqZWN0KQogIG9iamVjdCA8LSBMZXZlcmFnZVNjb3JlU2FtcGxpbmcob2JqZWN0ID0gb2JqZWN0LCBuY2VsbHMgPSA1MDAwLCBjYXN0ID0gJ2RnQ01hdHJpeCcpCn0pCmBgYAoKIyMgaW50ZWdyYXRlIHNrZXRjaGVkIGFzc2F5CmBgYHtyfQoKdGltZTVfU2tldGNoSW50ZWdyYXRpb24gIDwtIHN5c3RlbS50aW1lKHsKICBEZWZhdWx0QXNzYXkob2JqZWN0KSA8LSAnc2tldGNoJwogIG9iamVjdCA8LSBGaW5kVmFyaWFibGVGZWF0dXJlcyhvYmplY3QpCiAgZmVhdHVyZXMgPC0gU2VsZWN0SW50ZWdyYXRpb25GZWF0dXJlczUob2JqZWN0KQogIG9iamVjdCA8LSBTY2FsZURhdGEob2JqZWN0LCBmZWF0dXJlcyA9ICBmZWF0dXJlcykKICBvYmplY3QgPC0gUnVuUENBKG9iamVjdCwgZmVhdHVyZXMgPSAgZmVhdHVyZXMpCiAgRGVmYXVsdEFzc2F5KG9iamVjdCkgPC0gJ3NrZXRjaCcKICBvYmplY3QgPC0gSW50ZWdyYXRlTGF5ZXJzKG9iamVjdCwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICBtZXRob2QgPSBSUENBSW50ZWdyYXRpb24sCiAgICAgICAgICAgICAgICAgICAgICAgICAgICBvcmlnID0gJ3BjYScsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICBuZXcucmVkdWN0aW9uID0gJ2ludGVncmF0ZWQucnBjYScsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICBkaW1zID0gMTozMCwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICBrLmFuY2hvciA9IDIwLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgcmVmZXJlbmNlID0gd2hpY2goTGF5ZXJzKG9iamVjdCwgc2VhcmNoID0gJ2RhdGEnKSA9PSAnZGF0YS5IXzMwNjAnKSkKfSkKb2JqZWN0IDwtIFJ1blVNQVAob2JqZWN0LCAgcmVkdWN0aW9uID0gJ2ludGVncmF0ZWQucnBjYScsIGRpbXMgPSAxOjMwLCByZXR1cm4ubW9kZWwgPSBUKQpwbG90LnMxIDwtIERpbVBsb3Qob2JqZWN0LCBncm91cC5ieSA9ICdzYW1wbGUnLCByZWR1Y3Rpb24gPSAndW1hcCcpICsgTm9MZWdlbmQoKQpwbG90LnMyIDwtIERpbVBsb3Qob2JqZWN0LCBncm91cC5ieSA9ICdjZWxsdHlwZS53ZWlnaHQnLCByZWR1Y3Rpb24gPSAndW1hcCcpICsgTm9MZWdlbmQoKQoKcGxvdC5zMSArIHBsb3QuczIKCmBgYAoKCiMjIHByb3BvcmdhdGUgZW1iZWRkaW5ncyB0byBmdWxsIGRhdGEKYGBge3J9CnRpbWU2X1VuU2tldGNoIDwtIHN5c3RlbS50aW1lKHsKICBvYmplY3QgPC0gSW50ZWdyYXRlU2tldGNoRW1iZWRkaW5ncyhvYmplY3QgPSBvYmplY3QsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGF0b21zID0gJ3NrZXRjaCcsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG9yaWcgPSAnUk5BJywKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgcmVkdWN0aW9uID0gJ2ludGVncmF0ZWQucnBjYScgLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBsYXllcnMgPSBMYXllcnMob2JqZWN0ID0gb2JqZWN0W1snUk5BJ11dLCBzZWFyY2ggPSAnZGF0YScpLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGZlYXR1cmVzID0gZmVhdHVyZXMgICkKIAp9KQogCm9iamVjdCA8LSBSdW5VTUFQKG9iamVjdCwgIHJlZHVjdGlvbiA9ICdpbnRlZ3JhdGVkLnJwY2Eub3JpZycsIGRpbXMgPSAxOjMwICwgcmVkdWN0aW9uLm5hbWUgPSAndW1hcC5vcmlnJywgcmVkdWN0aW9uLmtleSA9ICdVb3JpZ18nKQoKYGBgCgpgYGB7cn0KcDE8LSBEaW1QbG90KG9iamVjdCwgcmVkdWN0aW9uID0gJ3VtYXAub3JpZycsIGdyb3VwLmJ5ID0gJ3NhbXBsZScsYWxwaGEgPSAwLjEpICsgTm9MZWdlbmQoKQpwMjwtIERpbVBsb3Qob2JqZWN0LCByZWR1Y3Rpb24gPSAndW1hcC5vcmlnJywgZ3JvdXAuYnkgPSAnY2VsbHR5cGUud2VpZ2h0JywgbGFiZWwgPSBULCBhbHBoYSA9IDAuMSkgKyBOb0xlZ2VuZCgpCnAxK3AyCmBgYAoKIyMgY29tcHV0aW5nIHRpbWUgc3VtbWFyeQpgYGB7cn0KYWxsX1QgIDwtIGxzKHBhdHRlcm4gPSAndGltZScpCm92ZXJhbGwgPC0gc3VtKHNhcHBseShhbGxfVCwgZnVuY3Rpb24oeCkgcm91bmQoZ2V0KHgpWydlbGFwc2VkJ10sIGRpZ2l0cyA9IDMpKSkvNjAKCgpmb3IgKGkgaW4gMTpsZW5ndGgoYWxsX1QpKSB7CiAgdGltZS5pIDwtIGdldChhbGxfVFtpXSlbJ2VsYXBzZWQnXQogIGlmICh0aW1lLmkgPiA2MCkgewogICAgIHByaW50KHBhc3RlKGFsbF9UW2ldLCByb3VuZCh0aW1lLmkvNjAsIGRpZ2l0cyA9IDEpLCAnbWlucycpKQogIH0gZWxzZSB7CiAgICAgcHJpbnQocGFzdGUoYWxsX1RbaV0sIHJvdW5kKHRpbWUuaSwgZGlnaXRzID0gMSksICdzZWNzJykpCiAgfQp9CnByaW50KHBhc3RlKCdUb3RhbCB0aW1lICcsIHJvdW5kKG92ZXJhbGwsIGRpZ2l0cyA9IDMpLCAnbWlucycgKSkKYGBgCgo=
    + + + +