diff --git a/DESCRIPTION b/DESCRIPTION index 1f599474d..16535fd76 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.2.1 -Date: 2022-11-07 +Version: 4.3.0 +Date: 2022-11-18 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( @@ -10,8 +10,10 @@ Authors@R: c( person(given = "Jeff", family = "Farrell", email = "jfarrell@g.harvard.edu", role = "ctb"), person(given = "Christoph", family = "Hafemeister", email = "chafemeister@nygenome.org", role = "ctb", comment = c(ORCID = "0000-0001-6365-8254")), person(given = "Yuhan", family = "Hao", email = "yhao@nygenome.org", role = "ctb", comment = c(ORCID = "0000-0002-1810-0822")), + person(given = "Austin", family = "Hartman", email = "ahartman@nygenome.org", role = "ctb", comment = c(ORCID = "0000-0001-7278-1852")), person(given = "Paul", family = "Hoffman", email = "seurat@nygenome.org", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-7693-8957")), person(given = "Jaison", family = "Jain", email = "jjain@nygenome.org", role = "ctb", comment = c(ORCID = "0000-0002-9478-5018")), + person(given = "Madeline", family = "Kowalski", email = "mkowalski@nygenome.org", role = "ctb", comment = c(ORCID = "0000-0002-5655-7620")), person(given = "Efthymia", family = "Papalexi", email = "epapalexi@nygenome.org", role = "ctb", comment = c(ORCID = "0000-0001-5898-694X")), person(given = "Patrick", family = "Roelli", email = "proelli@nygenome.org", role = "ctb"), person(given = "Rahul", family = "Satija", email = "rsatija@nygenome.org", role = "ctb", comment = c(ORCID = "0000-0001-9448-8833")), @@ -55,6 +57,7 @@ Imports: pbapply, plotly (>= 4.9.0), png, + progressr, RANN, RColorBrewer, Rcpp (>= 1.0.7), @@ -95,7 +98,7 @@ Collate: 'tree.R' 'utilities.R' 'zzz.R' -RoxygenNote: 7.2.1 +RoxygenNote: 7.2.2 Encoding: UTF-8 Suggests: ape, @@ -120,4 +123,6 @@ Suggests: metap, enrichR, mixtools, - ggrastr + ggrastr, + data.table, + R.utils diff --git a/NAMESPACE b/NAMESPACE index 0aeabf3d8..4984dc2be 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -103,6 +103,9 @@ S3method(as.sparse,H5Group) S3method(dim,STARmap) S3method(dim,SlideSeq) S3method(dim,VisiumV1) +S3method(fortify,Centroids) +S3method(fortify,Molecules) +S3method(fortify,Segmentation) S3method(levels,SCTAssay) S3method(merge,SCTAssay) S3method(subset,AnchorSet) @@ -110,6 +113,8 @@ S3method(subset,SCTAssay) S3method(subset,STARmap) S3method(subset,SlideSeq) S3method(subset,VisiumV1) +export("%iff%") +export("%||%") export("DefaultAssay<-") export("Idents<-") export("Index<-") @@ -208,6 +213,8 @@ export(IFeaturePlot) export(ISpatialDimPlot) export(ISpatialFeaturePlot) export(Idents) +export(ImageDimPlot) +export(ImageFeaturePlot) export(Images) export(Index) export(Indices) @@ -226,8 +233,13 @@ export(LabelPoints) export(LinkedDimPlot) export(LinkedFeaturePlot) export(Load10X_Spatial) +export(LoadAkoya) export(LoadAnnoyIndex) +export(LoadHuBMAPCODEX) +export(LoadNanostring) export(LoadSTARmap) +export(LoadVizgen) +export(LoadXenium) export(Loadings) export(LocalStruct) export(LogNormalize) @@ -270,10 +282,15 @@ export(Radius) export(Read10X) export(Read10X_Image) export(Read10X_h5) +export(ReadAkoya) export(ReadMtx) +export(ReadNanostring) export(ReadParseBio) export(ReadSTARsolo) export(ReadSlideSeq) +export(ReadVitessce) +export(ReadVizgen) +export(ReadXenium) export(Reductions) export(RegroupIdents) export(RelativeCounts) @@ -314,6 +331,7 @@ export(SingleCorPlot) export(SingleDimPlot) export(SingleExIPlot) export(SingleImageMap) +export(SingleImagePlot) export(SingleRasterMap) export(SingleSpatialPlot) export(SpatialDimPlot) @@ -396,6 +414,10 @@ importFrom(RcppAnnoy,AnnoyEuclidean) importFrom(RcppAnnoy,AnnoyHamming) importFrom(RcppAnnoy,AnnoyManhattan) importFrom(Rtsne,Rtsne) +importFrom(SeuratObject,"%!NA%") +importFrom(SeuratObject,"%NA%") +importFrom(SeuratObject,"%iff%") +importFrom(SeuratObject,"%||%") importFrom(SeuratObject,"DefaultAssay<-") importFrom(SeuratObject,"Idents<-") importFrom(SeuratObject,"Index<-") @@ -408,16 +430,24 @@ importFrom(SeuratObject,"Tool<-") importFrom(SeuratObject,"VariableFeatures<-") importFrom(SeuratObject,AddMetaData) importFrom(SeuratObject,Assays) +importFrom(SeuratObject,AttachDeps) +importFrom(SeuratObject,Boundaries) importFrom(SeuratObject,Cells) importFrom(SeuratObject,CellsByIdentities) importFrom(SeuratObject,Command) importFrom(SeuratObject,CreateAssayObject) +importFrom(SeuratObject,CreateCentroids) importFrom(SeuratObject,CreateDimReducObject) +importFrom(SeuratObject,CreateFOV) +importFrom(SeuratObject,CreateSegmentation) importFrom(SeuratObject,CreateSeuratObject) importFrom(SeuratObject,DefaultAssay) +importFrom(SeuratObject,DefaultBoundary) importFrom(SeuratObject,DefaultDimReduc) +importFrom(SeuratObject,DefaultFOV) importFrom(SeuratObject,Distances) importFrom(SeuratObject,Embeddings) +importFrom(SeuratObject,Features) importFrom(SeuratObject,FetchData) importFrom(SeuratObject,GetAssayData) importFrom(SeuratObject,GetImage) @@ -430,10 +460,13 @@ importFrom(SeuratObject,Indices) importFrom(SeuratObject,IsGlobal) importFrom(SeuratObject,JS) importFrom(SeuratObject,Key) +importFrom(SeuratObject,Keys) importFrom(SeuratObject,Loadings) importFrom(SeuratObject,LogSeuratCommand) importFrom(SeuratObject,Misc) +importFrom(SeuratObject,Molecules) importFrom(SeuratObject,Neighbors) +importFrom(SeuratObject,Overlay) importFrom(SeuratObject,PackageCheck) importFrom(SeuratObject,Project) importFrom(SeuratObject,Radius) @@ -485,6 +518,7 @@ importFrom(ggplot2,element_rect) importFrom(ggplot2,element_text) importFrom(ggplot2,facet_grid) importFrom(ggplot2,facet_wrap) +importFrom(ggplot2,fortify) importFrom(ggplot2,geom_abline) importFrom(ggplot2,geom_bar) importFrom(ggplot2,geom_blank) @@ -519,6 +553,7 @@ importFrom(ggplot2,margin) importFrom(ggplot2,position_dodge) importFrom(ggplot2,position_jitterdodge) importFrom(ggplot2,scale_alpha) +importFrom(ggplot2,scale_alpha_manual) importFrom(ggplot2,scale_alpha_ordinal) importFrom(ggplot2,scale_color_brewer) importFrom(ggplot2,scale_color_distiller) @@ -597,6 +632,7 @@ importFrom(igraph,graph_from_adjacency_matrix) importFrom(igraph,plot.igraph) importFrom(irlba,irlba) importFrom(jsonlite,fromJSON) +importFrom(jsonlite,read_json) importFrom(leiden,leiden) importFrom(lmtest,lrtest) importFrom(matrixStats,rowAnyNAs) @@ -606,11 +642,13 @@ importFrom(matrixStats,rowSums2) importFrom(methods,"slot<-") importFrom(methods,.hasSlot) importFrom(methods,as) +importFrom(methods,getMethod) importFrom(methods,is) importFrom(methods,new) importFrom(methods,setAs) importFrom(methods,setClass) importFrom(methods,setClassUnion) +importFrom(methods,setGeneric) importFrom(methods,setMethod) importFrom(methods,setOldClass) importFrom(methods,setValidity) @@ -631,12 +669,15 @@ importFrom(plotly,layout) importFrom(plotly,plot_ly) importFrom(plotly,raster2uri) importFrom(png,readPNG) +importFrom(progressr,progressor) importFrom(reticulate,import) importFrom(reticulate,py_module_available) importFrom(reticulate,py_set_seed) importFrom(rlang,"!!") importFrom(rlang,as_label) importFrom(rlang,invoke) +importFrom(rlang,is_na) +importFrom(rlang,sym) importFrom(scales,brewer_pal) importFrom(scales,hue_pal) importFrom(scales,rescale) @@ -706,6 +747,7 @@ importFrom(stats,var) importFrom(stats,wilcox.test) importFrom(tibble,tibble) importFrom(tools,file_ext) +importFrom(tools,file_path_sans_ext) importFrom(utils,argsAnywhere) importFrom(utils,capture.output) importFrom(utils,file_test) diff --git a/NEWS.md b/NEWS.md index b9c21ce76..a2d554b69 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,12 @@ -# Seurat 4.2.1 (2022-11-07) +# Seurat 4.3.0 (2022-11-18) + +## Added +- Add support for imaging-based spatial datasets + +## Changes +- Fix bug in `FindMarkers()` when run post Integration/Transfer ([#6856](https://github.com/satijalab/seurat/issues/6586)) + +# Seurat 4.2.1 (2022-11-08) ## Changes - Replaced import from `spatstat.core` with `spatstat.explore` diff --git a/R/convenience.R b/R/convenience.R index aee4b254a..e2848908b 100644 --- a/R/convenience.R +++ b/R/convenience.R @@ -7,6 +7,208 @@ NULL # Functions #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#' @param fov Name to store FOV as +#' @param assay Name to store expression matrix as +#' @inheritDotParams ReadAkoya +#' +#' @return \code{LoadAkoya}: A \code{\link[SeuratObject]{Seurat}} object +#' +#' @importFrom SeuratObject Cells CreateFOV CreateSeuratObject +#' +#' @export +#' +#' @rdname ReadAkoya +#' +LoadAkoya <- function( + filename, + type = c('inform', 'processor', 'qupath'), + fov, + assay = 'Akoya', + ... +) { + # read in matrix and centroids + data <- ReadAkoya(filename = filename, type = type) + # convert centroids into coords object + coords <- suppressWarnings(expr = CreateFOV( + coords = data$centroids, + type = 'centroids', + key = 'fov', + assay = assay + )) + colnames(x = data$metadata) <- suppressWarnings( + expr = make.names(names = colnames(x = data$metadata)) + ) + # build Seurat object from matrix + obj <- CreateSeuratObject( + counts = data$matrix, + assay = assay, + meta.data = data$metadata + ) + # make sure coords only contain cells in seurat object + coords <- subset(x = coords, cells = Cells(x = obj)) + suppressWarnings(expr = obj[[fov]] <- coords) # add image to seurat object + # Add additional assays + for (i in setdiff(x = names(x = data), y = c('matrix', 'centroids', 'metadata'))) { + suppressWarnings(expr = obj[[i]] <- CreateAssayObject(counts = data[[i]])) + } + return(obj) +} + +#' @inheritParams ReadAkoya +#' @param data.dir Path to a directory containing Vitessce cells +#' and clusters JSONs +#' +#' @return \code{LoadHuBMAPCODEX}: A \code{\link[SeuratObject]{Seurat}} object +#' +#' @importFrom SeuratObject Cells CreateFOV CreateSeuratObject +#' +#' @export +#' +#' @rdname ReadVitessce +#' +LoadHuBMAPCODEX <- function(data.dir, fov, assay = 'CODEX') { + data <- ReadVitessce( + counts = file.path(data.dir, "reg1_stitched_expressions.clusters.json"), + coords = file.path(data.dir, "reg1_stitched_expressions.cells.json"), + type = "segmentations" + ) + # Create spatial and Seurat objects + coords <- CreateFOV( + coords = data$segmentations, + molecules = data$molecules, + assay = assay + ) + obj <- CreateSeuratObject(counts = data$counts, assay = assay) + # make sure spatial coords only contain cells in seurat object + coords <- subset(x = coords, cells = Cells(x = obj)) + obj[[fov]] <- coords + return(obj) +} + +#' @inheritParams ReadAkoya +#' @param data.dir Path to folder containing Nanostring SMI outputs +#' +#' @return \code{LoadNanostring}: A \code{\link[SeuratObject]{Seurat}} object +#' +#' @importFrom SeuratObject Cells CreateCentroids CreateFOV +#' CreateSegmentation CreateSeuratObject +#' +#' @export +#' +#' @rdname ReadNanostring +#' +LoadNanostring <- function(data.dir, fov, assay = 'Nanostring') { + data <- ReadNanostring( + data.dir = data.dir, + type = c("centroids", "segmentations") + ) + segs <- CreateSegmentation(data$segmentations) + cents <- CreateCentroids(data$centroids) + segmentations.data <- list( + "centroids" = cents, + "segmentation" = segs + ) + coords <- CreateFOV( + coords = segmentations.data, + type = c("segmentation", "centroids"), + molecules = data$pixels, + assay = assay + ) + obj <- CreateSeuratObject(counts = data$matrix, assay = assay) + + # subset both object and coords based on the cells shared by both + cells <- intersect( + Cells(x = coords, boundary = "segmentation"), + Cells(x = coords, boundary = "centroids") + ) + cells <- intersect(Cells(obj), cells) + coords <- subset(x = coords, cells = cells) + obj[[fov]] <- coords + return(obj) +} + +#' @return \code{LoadVizgen}: A \code{\link[SeuratObject]{Seurat}} object +#' +#' @importFrom SeuratObject Cells CreateCentroids CreateFOV +#' CreateSegmentation CreateSeuratObject +#' +#' @export +#' +#' @rdname ReadVizgen +#' +LoadVizgen <- function(data.dir, fov, assay = 'Vizgen', z = 3L) { + data <- ReadVizgen( + data.dir = data.dir, + filter = "^Blank-", + type = c("centroids", "segmentations"), + z = z + ) + segs <- CreateSegmentation(data$segmentations) + cents <- CreateCentroids(data$centroids) + segmentations.data <- list( + "centroids" = cents, + "segmentation" = segs + ) + coords <- CreateFOV( + coords = segmentations.data, + type = c("segmentation", "centroids"), + molecules = data$microns, + assay = assay + ) + obj <- CreateSeuratObject(counts = data$transcripts, assay = assay) + # only consider the cells we have counts and a segmentation for + # Cells which don't have a segmentation are probably found in other z slices. + coords <- subset( + x = coords, + cells = intersect( + x = Cells(x = coords[["segmentation"]]), + y = Cells(x = obj) + ) + ) + # add coords to seurat object + obj[[fov]] <- coords + return(obj) +} + +#' @return \code{LoadXenium}: A \code{\link[SeuratObject]{Seurat}} object +#' +#' @param data.dir Path to folder containing Nanostring SMI outputs +#' @param fov FOV name +#' @param assay Assay name +#' +#' @importFrom SeuratObject Cells CreateCentroids CreateFOV +#' CreateSegmentation CreateSeuratObject +#' +#' @export +#' +#' @rdname ReadXenium +#' +LoadXenium <- function(data.dir, fov = 'fov', assay = 'Xenium') { + data <- ReadXenium( + data.dir = data.dir, + type = c("centroids", "segmentations"), + ) + + segmentations.data <- list( + "centroids" = CreateCentroids(data$centroids), + "segmentation" = CreateSegmentation(data$segmentations) + ) + coords <- CreateFOV( + coords = segmentations.data, + type = c("segmentation", "centroids"), + molecules = data$microns, + assay = assay + ) + + xenium.obj <- CreateSeuratObject(counts = data$matrix[["Gene Expression"]], assay = assay) + xenium.obj[["BlankCodeword"]] <- CreateAssayObject(counts = data$matrix[["Blank Codeword"]]) + xenium.obj[["ControlCodeword"]] <- CreateAssayObject(counts = data$matrix[["Negative Control Codeword"]]) + xenium.obj[["ControlProbe"]] <- CreateAssayObject(counts = data$matrix[["Negative Control Probe"]]) + + xenium.obj[[fov]] <- coords + return(xenium.obj) +} + #' @param ... Extra parameters passed to \code{DimHeatmap} #' #' @rdname DimHeatmap diff --git a/R/differential_expression.R b/R/differential_expression.R index 59fd1ccee..c96e3fea4 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -989,9 +989,16 @@ FindMarkers.Seurat <- function( command = norm.command, value = "normalization.method" ) - } else { + } else if (length(x = intersect(x = c("FindIntegrationAnchors", "FindTransferAnchors"), y = Command(object = object)))) { + command <- intersect(x = c("FindIntegrationAnchors", "FindTransferAnchors"), y = Command(object = object))[1] + Command( + object = object, + command = command, + value = "normalization.method" + ) + } else { NULL - } + } de.results <- FindMarkers( object = data.use, slot = slot, diff --git a/R/preprocessing.R b/R/preprocessing.R index f5cc18ce3..45baa4254 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -1,4 +1,5 @@ #' @include generics.R +#' @importFrom progressr progressor #' NULL @@ -6,6 +7,11 @@ NULL # Functions #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +globalVariables( + names = c('fov', 'cell_ID', 'qv'), + package = 'Seurat', + add = TRUE +) #' Calculate the Barcode Distribution Inflection #' #' This function calculates an adaptive inflection point ("knee") of the barcode distribution @@ -786,6 +792,7 @@ Read10X <- function( strip.suffix = FALSE ) { full.data <- list() + has_dt <- requireNamespace("data.table", quietly = TRUE) && requireNamespace("R.utils", quietly = TRUE) for (i in seq_along(along.with = data.dir)) { run <- data.dir[i] if (!dir.exists(paths = run)) { @@ -814,7 +821,12 @@ Read10X <- function( stop("Expression matrix file missing. Expecting ", basename(path = matrix.loc)) } data <- readMM(file = matrix.loc) - cell.barcodes <- read.table(file = barcode.loc, header = FALSE, sep = '\t', row.names = NULL) + if (has_dt) { + cell.barcodes <- as.data.frame(data.table::fread(barcode.loc, header = FALSE)) + } else { + cell.barcodes <- read.table(file = barcode.loc, header = FALSE, sep = '\t', row.names = NULL) + } + if (ncol(x = cell.barcodes) > 1) { cell.names <- cell.barcodes[, cell.column] } else { @@ -837,11 +849,17 @@ Read10X <- function( } else { colnames(x = data) <- paste0(names(x = data.dir)[i], "_", cell.names) } - feature.names <- read.delim( - file = ifelse(test = pre_ver_3, yes = gene.loc, no = features.loc), - header = FALSE, - stringsAsFactors = FALSE - ) + + if (has_dt) { + feature.names <- as.data.frame(data.table::fread(ifelse(test = pre_ver_3, yes = gene.loc, no = features.loc), header = FALSE)) + } else { + feature.names <- read.delim( + file = ifelse(test = pre_ver_3, yes = gene.loc, no = features.loc), + header = FALSE, + stringsAsFactors = FALSE + ) + } + if (any(is.na(x = feature.names[, gene.column]))) { warning( 'Some features names are NA. Replacing NA names with ID from the opposite column requested', @@ -1046,6 +1064,322 @@ Read10X_Image <- function(image.dir, image.name = "tissue_lowres_image.png", fil )) } +#' Read and Load Akoya CODEX data +#' +#' @param filename Path to matrix generated by upstream processing. +#' @param type Specify which type matrix is being provided. +#' \itemize{ +#' \item \dQuote{\code{processor}}: matrix generated by CODEX Processor +#' \item \dQuote{\code{inform}}: matrix generated by inForm +#' \item \dQuote{\code{qupath}}: matrix generated by QuPath +#' } +#' @param filter A pattern to filter features by; pass \code{NA} to +#' skip feature filtering +#' @param inform.quant When \code{type} is \dQuote{\code{inform}}, the +#' quantification level to read in +#' +#' @return \code{ReadAkoya}: A list with some combination of the following values +#' \itemize{ +#' \item \dQuote{\code{matrix}}: a +#' \link[Matrix:dgCMatrix-class]{sparse matrix} with expression data; cells +#' are columns and features are rows +#' \item \dQuote{\code{centroids}}: a data frame with cell centroid +#' coordinates in three columns: \dQuote{x}, \dQuote{y}, and \dQuote{cell} +#' \item \dQuote{\code{metadata}}: a data frame with cell-level meta data; +#' includes all columns in \code{filename} that aren't in +#' \dQuote{\code{matrix}} or \dQuote{\code{centroids}} +#' } +#' When \code{type} is \dQuote{\code{inform}}, additional expression matrices +#' are returned and named using their segmentation type (eg. +#' \dQuote{nucleus}, \dQuote{membrane}). The \dQuote{Entire Cell} segmentation +#' type is returned in the \dQuote{\code{matrix}} entry of the list +#' +#' @export +#' +#' @order 1 +#' +#' @concept preprocessing +#' +#' @template section-progressr +#' +#' @templateVar pkg data.table +#' @template note-reqdpkg +#' +ReadAkoya <- function( + filename, + type = c('inform', 'processor', 'qupath'), + filter = 'DAPI|Blank|Empty', + inform.quant = c('mean', 'total', 'min', 'max', 'std') +) { + if (!requireNamespace("data.table", quietly = TRUE)) { + stop("Please install 'data.table' for this function") + } + # Check arguments + if (!file.exists(filename)) { + stop(paste("Can't file file:", filename)) + } + type <- tolower(x = type[1L]) + type <- match.arg(arg = type) + # outs <- list(matrix = NULL, centroids = NULL) + ratio <- getOption(x = 'Seurat.input.sparse_ratio', default = 0.4) + p <- progressor() + # Preload matrix + p(message = "Preloading Akoya matrix", class = 'sticky', amount = 0) + sep <- switch(EXPR = type, 'inform' = '\t', ',') + mtx <- data.table::fread( + file = filename, + sep = sep, + data.table = FALSE, + verbose = FALSE + ) + # Assemble outputs + p( + message = paste0("Parsing matrix in '", type, "' format"), + class = 'sticky', + amount = 0 + ) + outs <- switch( + EXPR = type, + 'processor' = { + # Create centroids data frame + p( + message = 'Creating centroids coordinates', + class = 'sticky', + amount = 0 + ) + centroids <- data.frame( + x = mtx[['x:x']], + y = mtx[['y:y']], + cell = as.character(x = mtx[['cell_id:cell_id']]), + stringsAsFactors = FALSE + ) + rownames(x = mtx) <- as.character(x = mtx[['cell_id:cell_id']]) + # Create metadata data frame + p(message = 'Creating meta data', class = 'sticky', amount = 0) + md <- mtx[, !grepl(pattern = '^cyc', x = colnames(x = mtx)), drop = FALSE] + colnames(x = md) <- vapply( + X = strsplit(x = colnames(x = md), split = ':'), + FUN = '[[', + FUN.VALUE = character(length = 1L), + 2L + ) + # Create expression matrix + p(message = 'Creating expression matrix', class = 'sticky', amount = 0) + mtx <- mtx[, grepl(pattern = '^cyc', x = colnames(x = mtx)), drop = FALSE] + colnames(x = mtx) <- vapply( + X = strsplit(x = colnames(x = mtx), split = ':'), + FUN = '[[', + FUN.VALUE = character(length = 1L), + 2L + ) + if (!is.na(x = filter)) { + p( + message = paste0("Filtering features with pattern '", filter, "'"), + class = 'sticky', + amount = 0 + ) + mtx <- mtx[, !grepl(pattern = filter, x = colnames(x = mtx)), drop = FALSE] + } + mtx <- t(x = mtx) + if ((sum(mtx == 0) / length(x = mtx)) > ratio) { + p( + message = 'Converting expression to sparse matrix', + class = 'sticky', + amount = 0 + ) + mtx <- as.sparse(x = mtx) + } + list(matrix = mtx, centroids = centroids, metadata = md) + }, + 'inform' = { + inform.quant <- tolower(x = inform.quant[1L]) + inform.quant <- match.arg(arg = inform.quant) + expr.key <- c( + mean = 'Mean', + total = 'Total', + min = 'Min', + max = 'Max', + std = 'Std Dev' + )[inform.quant] + expr.pattern <- '\\(Normalized Counts, Total Weighting\\)' + rownames(x = mtx) <- mtx[['Cell ID']] + mtx <- mtx[, setdiff(x = colnames(x = mtx), y = 'Cell ID'), drop = FALSE] + # Create centroids + p( + message = 'Creating centroids coordinates', + class = 'sticky', + amount = 0 + ) + centroids <- data.frame( + x = mtx[['Cell X Position']], + y = mtx[['Cell Y Position']], + cell = rownames(x = mtx), + stringsAsFactors = FALSE + ) + # Create metadata + p(message = 'Creating meta data', class = 'sticky', amount = 0) + cols <- setdiff( + x = grep( + pattern = expr.pattern, + x = colnames(x = mtx), + value = TRUE, + invert = TRUE + ), + y = paste('Cell', c('X', 'Y'), 'Position') + ) + md <- mtx[, cols, drop = FALSE] + # Create expression matrices + exprs <- data.frame( + cols = grep( + pattern = paste(expr.key, expr.pattern), + x = colnames(x = mtx), + value = TRUE + ) + ) + exprs$feature <- vapply( + X = trimws(x = gsub( + pattern = paste(expr.key, expr.pattern), + replacement = '', + x = exprs$cols + )), + FUN = function(x) { + x <- unlist(x = strsplit(x = x, split = ' ')) + x <- x[length(x = x)] + return(gsub(pattern = '\\(|\\)', replacement = '', x = x)) + }, + FUN.VALUE = character(length = 1L) + ) + exprs$class <- tolower(x = vapply( + X = strsplit(x = exprs$cols, split = ' '), + FUN = '[[', + FUN.VALUE = character(length = 1L), + 1L + )) + classes <- unique(x = exprs$class) + outs <- vector( + mode = 'list', + length = length(x = classes) + 2L + ) + names(x = outs) <- c( + 'matrix', + 'centroids', + 'metadata', + setdiff(x = classes, y = 'entire') + ) + outs$centroids <- centroids + outs$metadata <- md + # browser() + for (i in classes) { + p( + message = paste( + 'Creating', + switch(EXPR = i, 'entire' = 'entire cell', i), + 'expression matrix' + ), + class = 'sticky', + amount = 0 + ) + df <- exprs[exprs$class == i, , drop = FALSE] + expr <- mtx[, df$cols] + colnames(x = expr) <- df$feature + if (!is.na(x = filter)) { + p( + message = paste0("Filtering features with pattern '", filter, "'"), + class = 'sticky', + amount = 0 + ) + expr <- expr[, !grepl(pattern = filter, x = colnames(x = expr)), drop = FALSE] + } + expr <- t(x = expr) + if ((sum(expr == 0, na.rm = TRUE) / length(x = expr)) > ratio) { + p( + message = paste( + 'Converting', + switch(EXPR = i, 'entire' = 'entire cell', i), + 'expression to sparse matrix' + ), + class = 'sticky', + amount = 0 + ) + expr <- as.sparse(x = expr) + } + outs[[switch(EXPR = i, 'entire' = 'matrix', i)]] <- expr + } + outs + }, + 'qupath' = { + rownames(x = mtx) <- as.character(x = seq_len(length.out = nrow(x = mtx))) + # Create centroids + p( + message = 'Creating centroids coordinates', + class = 'sticky', + amount = 0 + ) + xpos <- sort( + x = grep(pattern = 'Centroid X', x = colnames(x = mtx), value = TRUE), + decreasing = TRUE + )[1L] + ypos <- sort( + x = grep(pattern = 'Centroid Y', x = colnames(x = mtx), value = TRUE), + decreasing = TRUE + )[1L] + centroids <- data.frame( + x = mtx[[xpos]], + y = mtx[[ypos]], + cell = rownames(x = mtx), + stringsAsFactors = FALSE + ) + # Create metadata + p(message = 'Creating meta data', class = 'sticky', amount = 0) + cols <- setdiff( + x = grep( + pattern = 'Cell: Mean', + x = colnames(x = mtx), + ignore.case = TRUE, + value = TRUE, + invert = TRUE + ), + y = c(xpos, ypos) + ) + md <- mtx[, cols, drop = FALSE] + # Create expression matrix + p(message = 'Creating expression matrix', class = 'sticky', amount = 0) + idx <- which(x = grepl( + pattern = 'Cell: Mean', + x = colnames(x = mtx), + ignore.case = TRUE + )) + mtx <- mtx[, idx, drop = FALSE] + colnames(x = mtx) <- vapply( + X = strsplit(x = colnames(x = mtx), split = ':'), + FUN = '[[', + FUN.VALUE = character(length = 1L), + 1L + ) + if (!is.na(x = filter)) { + p( + message = paste0("Filtering features with pattern '", filter, "'"), + class = 'sticky', + amount = 0 + ) + mtx <- mtx[, !grepl(pattern = filter, x = colnames(x = mtx)), drop = FALSE] + } + mtx <- t(x = mtx) + if ((sum(mtx == 0) / length(x = mtx)) > ratio) { + p( + message = 'Converting expression to sparse matrix', + class = 'sticky', + amount = 0 + ) + mtx <- as.sparse(x = mtx) + } + list(matrix = mtx, centroids = centroids, metadata = md) + }, + stop("Unknown matrix type: ", type) + ) + return(outs) +} + #' Load in data from remote or local mtx files #' #' Enables easy loading of sparse data matrices @@ -1259,6 +1593,529 @@ ReadMtx <- function( return(data) } +#' Read and Load Nanostring SMI data +#' +#' @param data.dir Directory containing all Nanostring SMI files with +#' default filenames +#' @param mtx.file Path to Nanostring cell x gene matrix CSV +#' @param metadata.file Contains metadata including cell center, area, +#' and stain intensities +#' @param molecules.file Path to molecules file +#' @param segmentations.file Path to segmentations CSV +#' @param type Type of cell spatial coordinate matrices to read; choose one +#' or more of: +#' \itemize{ +#' \item \dQuote{centroids}: cell centroids in pixel coordinate space +#' \item \dQuote{segmentations}: cell segmentations in pixel coordinate space +#' } +#' @param mol.type Type of molecule spatial coordinate matrices to read; +#' choose one or more of: +#' \itemize{ +#' \item \dQuote{pixels}: molecule coordinates in pixel space +#' } +#' @param metadata Type of available metadata to read; +#' choose zero or more of: +#' \itemize{ +#' \item \dQuote{Area}: number of pixels in cell segmentation +#' \item \dQuote{fov}: cell's fov +#' \item \dQuote{Mean.MembraneStain}: mean membrane stain intensity +#' \item \dQuote{Mean.DAPI}: mean DAPI stain intensity +#' \item \dQuote{Mean.G}: mean green channel stain intensity +#' \item \dQuote{Mean.Y}: mean yellow channel stain intensity +#' \item \dQuote{Mean.R}: mean red channel stain intensity +#' \item \dQuote{Max.MembraneStain}: max membrane stain intensity +#' \item \dQuote{Max.DAPI}: max DAPI stain intensity +#' \item \dQuote{Max.G}: max green channel stain intensity +#' \item \dQuote{Max.Y}: max yellow stain intensity +#' \item \dQuote{Max.R}: max red stain intensity +#' } +#' @param mols.filter Filter molecules that match provided string +#' @param genes.filter Filter genes from cell x gene matrix that match +#' provided string +#' @param fov.filter Only load in select FOVs. Nanostring SMI data contains +#' 30 total FOVs. +#' @param subset.counts.matrix If the counts matrix should be built from +#' molecule coordinates for a specific segmentation; One of: +#' \itemize{ +#' \item \dQuote{Nuclear}: nuclear segmentations +#' \item \dQuote{Cytoplasm}: cell cytoplasm segmentations +#' \item \dQuote{Membrane}: cell membrane segmentations +#' } +#' @param cell.mols.only If TRUE, only load molecules within a cell +#' +#' @return \code{ReadNanostring}: A list with some combination of the +#' following values: +#' \itemize{ +#' \item \dQuote{\code{matrix}}: a +#' \link[Matrix:dgCMatrix-class]{sparse matrix} with expression data; cells +#' are columns and features are rows +#' \item \dQuote{\code{centroids}}: a data frame with cell centroid +#' coordinates in three columns: \dQuote{x}, \dQuote{y}, and \dQuote{cell} +#' \item \dQuote{\code{pixels}}: a data frame with molecule pixel coordinates +#' in three columns: \dQuote{x}, \dQuote{y}, and \dQuote{gene} +#' } +#' +#' @importFrom future.apply future_lapply +#' +#' @export +#' +#' @order 1 +#' +#' @concept preprocessing +#' +#' @template section-progressr +#' @template section-future +#' +#' @templateVar pkg data.table +#' @template note-reqdpkg +#' +ReadNanostring <- function( + data.dir, + mtx.file = NULL, + metadata.file = NULL, + molecules.file = NULL, + segmentations.file = NULL, + type = 'centroids', + mol.type = 'pixels', + metadata = NULL, + mols.filter = NA_character_, + genes.filter = NA_character_, + fov.filter = NULL, + subset.counts.matrix = NULL, + cell.mols.only = TRUE +) { + if (!requireNamespace("data.table", quietly = TRUE)) { + stop("Please install 'data.table' for this function") + } + + # Argument checking + type <- match.arg( + arg = type, + choices = c('centroids', 'segmentations'), + several.ok = TRUE + ) + mol.type <- match.arg( + arg = mol.type, + choices = c('pixels'), + several.ok = TRUE + ) + if (!is.null(metadata)) { + metadata <- match.arg( + arg = metadata, + choices = c( + "Area", "fov", "Mean.MembraneStain", "Mean.DAPI", "Mean.G", + "Mean.Y", "Mean.R", "Max.MembraneStain", "Max.DAPI", "Max.G", + "Max.Y", "Max.R" + ), + several.ok = TRUE + ) + } + + use.dir <- all(vapply( + X = c(mtx.file, metadata.file, molecules.file), + FUN = function(x) { + return(is.null(x = x) || is.na(x = x)) + }, + FUN.VALUE = logical(length = 1L) + )) + + if (use.dir && !dir.exists(paths = data.dir)) { + stop("Cannot find Nanostring directory ", data.dir) + } + # Identify input files + files <- c( + matrix = mtx.file %||% '[_a-zA-Z0-9]*_exprMat_file.csv', + metadata.file = metadata.file %||% '[_a-zA-Z0-9]*_metadata_file.csv', + molecules.file = molecules.file %||% '[_a-zA-Z0-9]*_tx_file.csv', + segmentations.file = segmentations.file %||% '[_a-zA-Z0-9]*-polygons.csv' + ) + + files <- vapply( + X = files, + FUN = function(x) { + x <- as.character(x = x) + if (isTRUE(x = dirname(path = x) == '.')) { + fnames <- list.files( + path = data.dir, + pattern = x, + recursive = FALSE, + full.names = TRUE + ) + return(sort(x = fnames, decreasing = TRUE)[1L]) + } else { + return(x) + } + }, + FUN.VALUE = character(length = 1L), + USE.NAMES = TRUE + ) + files[!file.exists(files)] <- NA_character_ + + if (all(is.na(x = files))) { + stop("Cannot find Nanostring input files in ", data.dir) + } + # Checking for loading spatial coordinates + if (!is.na(x = files[['metadata.file']])) { + pprecoord <- progressor() + pprecoord( + message = "Preloading cell spatial coordinates", + class = 'sticky', + amount = 0 + ) + md <- data.table::fread( + file = files[['metadata.file']], + sep = ',', + data.table = FALSE, + verbose = FALSE + ) + + # filter metadata file by FOVs + if (!is.null(x = fov.filter)) { + md <- md[md$fov %in% fov.filter,] + } + pprecoord(type = 'finish') + } + if (!is.na(x = files[['segmentations.file']])) { + ppresegs <- progressor() + ppresegs( + message = "Preloading cell segmentation vertices", + class = 'sticky', + amount = 0 + ) + segs <- data.table::fread( + file = files[['segmentations.file']], + sep = ',', + data.table = FALSE, + verbose = FALSE + ) + + # filter metadata file by FOVs + if (!is.null(x = fov.filter)) { + segs <- segs[segs$fov %in% fov.filter,] + } + ppresegs(type = 'finish') + } + # Check for loading of molecule coordinates + if (!is.na(x = files[['molecules.file']])) { + ppremol <- progressor() + ppremol( + message = "Preloading molecule coordinates", + class = 'sticky', + amount = 0 + ) + mx <- data.table::fread( + file = files[['molecules.file']], + sep = ',', + verbose = FALSE + ) + + # filter molecules file by FOVs + if (!is.null(x = fov.filter)) { + mx <- mx[mx$fov %in% fov.filter,] + } + + # Molecules outside of a cell have a cell_ID of 0 + if (cell.mols.only) { + mx <- mx[mx$cell_ID != 0,] + } + + if (!is.na(x = mols.filter)) { + ppremol( + message = paste("Filtering molecules with pattern", mols.filter), + class = 'sticky', + amount = 0 + ) + mx <- mx[!grepl(pattern = mols.filter, x = mx$target), , drop = FALSE] + } + ppremol(type = 'finish') + mols <- rep_len(x = files[['molecules.file']], length.out = length(x = mol.type)) + names(x = mols) <- mol.type + files <- c(files, mols) + files <- files[setdiff(x = names(x = files), y = 'molecules.file')] + } + files <- files[!is.na(x = files)] + + outs <- list("matrix"=NULL, "pixels"=NULL, "centroids"=NULL) + if (!is.null(metadata)) { + outs <- append(outs, list("metadata" = NULL)) + } + if ("segmentations" %in% type) { + outs <- append(outs, list("segmentations" = NULL)) + } + + for (otype in names(x = outs)) { + outs[[otype]] <- switch( + EXPR = otype, + 'matrix' = { + ptx <- progressor() + ptx(message = 'Reading counts matrix', class = 'sticky', amount = 0) + if (!is.null(subset.counts.matrix)) { + tx <- build.cellcomp.matrix(mols.df=mx, class=subset.counts.matrix) + } else { + tx <- data.table::fread( + file = files[[otype]], + sep = ',', + data.table = FALSE, + verbose = FALSE + ) + # Combination of Cell ID (for non-zero cell_IDs) and FOV are assumed to be unique. Used to create barcodes / rownames. + bcs <- paste0(as.character(tx$cell_ID), "_", tx$fov) + rownames(x = tx) <- bcs + # remove all rows which represent counts of mols not assigned to a cell for each FOV + tx <- tx[!tx$cell_ID == 0,] + # filter fovs from counts matrix + if (!is.null(x = fov.filter)) { + tx <- tx[tx$fov %in% fov.filter,] + } + tx <- subset(tx, select = -c(fov, cell_ID)) + } + + tx <- as.data.frame(t(x = as.matrix(x = tx[, -1, drop = FALSE]))) + if (!is.na(x = genes.filter)) { + ptx( + message = paste("Filtering genes with pattern", genes.filter), + class = 'sticky', + amount = 0 + ) + tx <- tx[!grepl(pattern = genes.filter, x = rownames(x = tx)), , drop = FALSE] + } + # only keep cells with counts greater than 0 + tx <- tx[, which(colSums(tx) != 0)] + ratio <- getOption(x = 'Seurat.input.sparse_ratio', default = 0.4) + + if ((sum(tx == 0) / length(x = tx)) > ratio) { + ptx( + message = 'Converting counts to sparse matrix', + class = 'sticky', + amount = 0 + ) + tx <- as.sparse(x = tx) + } + + ptx(type = 'finish') + + tx + }, + 'centroids' = { + pcents <- progressor() + pcents( + message = 'Creating centroid coordinates', + class = 'sticky', + amount = 0 + ) + pcents(type = 'finish') + data.frame( + x = md$CenterX_global_px, + y = md$CenterY_global_px, + cell = paste0(as.character(md$cell_ID), "_", md$fov), + stringsAsFactors = FALSE + ) + }, + 'segmentations' = { + pcents <- progressor() + pcents( + message = 'Creating segmentation coordinates', + class = 'sticky', + amount = 0 + ) + pcents(type = 'finish') + data.frame( + x = segs$x_global_px, + y = segs$y_global_px, + cell = paste0(as.character(segs$cellID), "_", segs$fov), # cell_ID column in this file doesn't have an underscore + stringsAsFactors = FALSE + ) + }, + 'metadata' = { + pmeta <- progressor() + pmeta( + message = 'Loading metadata', + class = 'sticky', + amount = 0 + ) + pmeta(type = 'finish') + df <- md[,metadata] + df$cell <- paste0(as.character(md$cell_ID), "_", md$fov) + df + }, + 'pixels' = { + ppixels <- progressor() + ppixels( + message = 'Creating pixel-level molecule coordinates', + class = 'sticky', + amount = 0 + ) + df <- data.frame( + x = mx$x_global_px, + y = mx$y_global_px, + gene = mx$target, + stringsAsFactors = FALSE + ) + ppixels(type = 'finish') + df + }, + # 'microns' = { + # pmicrons <- progressor() + # pmicrons( + # message = "Creating micron-level molecule coordinates", + # class = 'sticky', + # amount = 0 + # ) + # df <- data.frame( + # x = mx$global_x, + # y = mx$global_y, + # gene = mx$gene, + # stringsAsFactors = FALSE + # ) + # pmicrons(type = 'finish') + # df + # }, + stop("Unknown Nanostring input type: ", outs[[otype]]) + ) + } + return(outs) +} + +#' Read and Load 10x Genomics Xenium in-situ data +#' +#' @param data.dir Directory containing all Xenium output files with +#' default filenames +#' @param outs Types of molecular outputs to read; choose one or more of: +#' \itemize{ +#' \item \dQuote{matrix}: the counts matrix +#' \item \dQuote{microns}: molecule coordinates +#' } +#' @param type Type of cell spatial coordinate matrices to read; choose one +#' or more of: +#' \itemize{ +#' \item \dQuote{centroids}: cell centroids in pixel coordinate space +#' \item \dQuote{segmentations}: cell segmentations in pixel coordinate space +#' } +#' @param mols.qv.threshold Remove transcript molecules with +#' a QV less than this threshold. QV >= 20 is the standard threshold +#' used to construct the cell x gene count matrix. +#' +#' @return \code{ReadXenium}: A list with some combination of the +#' following values: +#' \itemize{ +#' \item \dQuote{\code{matrix}}: a +#' \link[Matrix:dgCMatrix-class]{sparse matrix} with expression data; cells +#' are columns and features are rows +#' \item \dQuote{\code{centroids}}: a data frame with cell centroid +#' coordinates in three columns: \dQuote{x}, \dQuote{y}, and \dQuote{cell} +#' \item \dQuote{\code{pixels}}: a data frame with molecule pixel coordinates +#' in three columns: \dQuote{x}, \dQuote{y}, and \dQuote{gene} +#' } +#' +#' +#' @export +#' @concept preprocessing +#' +ReadXenium <- function( + data.dir, + outs = c("matrix", "microns"), + type = "centroids", + mols.qv.threshold = 20 +) { + # Argument checking + type <- match.arg( + arg = type, + choices = c("centroids", "segmentations"), + several.ok = TRUE + ) + + outs <- match.arg( + arg = outs, + choices = c("matrix", "microns"), + several.ok = TRUE + ) + + outs <- c(outs, type) + + has_dt <- requireNamespace("data.table", quietly = TRUE) && requireNamespace("R.utils", quietly = TRUE) + + data <- sapply(outs, function(otype) { + switch( + EXPR = otype, + 'matrix' = { + pmtx <- progressor() + pmtx(message = 'Reading counts matrix', class = 'sticky', amount = 0) + matrix <- suppressWarnings(Read10X(data.dir = file.path(data.dir, "cell_feature_matrix/"))) + pmtx(type = "finish") + matrix + }, + 'centroids' = { + pcents <- progressor() + pcents( + message = 'Loading cell centroids', + class = 'sticky', + amount = 0 + ) + if (has_dt) { + cell_info <- as.data.frame(data.table::fread(file.path(data.dir, "cells.csv.gz"))) + } else { + cell_info <- read.csv(file.path(data.dir, "cells.csv.gz")) + } + cell_centroid_df <- data.frame( + x = cell_info$x_centroid, + y = cell_info$y_centroid, + cell = cell_info$cell_id, + stringsAsFactors = FALSE + ) + pcents(type = 'finish') + cell_centroid_df + }, + 'segmentations' = { + psegs <- progressor() + psegs( + message = 'Loading cell segmentations', + class = 'sticky', + amount = 0 + ) + + # load cell boundaries + if (has_dt) { + cell_boundaries_df <- as.data.frame(data.table::fread(file.path(data.dir, "cell_boundaries.csv.gz"))) + } else { + cell_boundaries_df <- read.csv(file.path(data.dir, "cell_boundaries.csv.gz"), stringsAsFactors = FALSE) + } + names(cell_boundaries_df) <- c("cell", "x", "y") + psegs(type = "finish") + cell_boundaries_df + }, + 'microns' = { + pmicrons <- progressor() + pmicrons( + message = "Loading molecule coordinates", + class = 'sticky', + amount = 0 + ) + + # molecules + if (has_dt) { + tx_dt <- as.data.frame(data.table::fread(file.path(data.dir, "transcripts.csv.gz"))) + transcripts <- subset(tx_dt, qv >= mols.qv.threshold) + } else { + transcripts <- read.csv(file.path(data.dir, "transcripts.csv.gz")) + transcripts <- subset(transcripts, qv >= mols.qv.threshold) + } + + df <- + data.frame( + x = transcripts$x_location, + y = transcripts$y_location, + gene = transcripts$feature_name, + stringsAsFactors = FALSE + ) + pmicrons(type = 'finish') + df + }, + stop("Unknown Xenium input type: ", otype) + ) + }, USE.NAMES = TRUE) + return(data) +} + #' Load Slide-seq spatial data #' #' @param coord.file Path to csv file containing bead coordinate positions @@ -1290,6 +2147,649 @@ ReadSlideSeq <- function(coord.file, assay = 'Spatial') { return(slide.seq) } +#' Read Data From Vitessce +#' +#' Read in data from Vitessce-formatted JSON files +#' +#' @param counts Path or URL to a Vitessce-formatted JSON file with +#' expression data; should end in \dQuote{\code{.genes.json}} or +#' \dQuote{\code{.clusters.json}}; pass \code{NULL} to skip +#' @param coords Path or URL to a Vitessce-formatted JSON file with cell/spot +#' spatial coordinates; should end in \dQuote{\code{.cells.json}}; +#' pass \code{NULL} to skip +#' @param molecules Path or URL to a Vitessce-formatted JSON file with molecule +#' spatial coordinates; should end in \dQuote{\code{.molecules.json}}; +#' pass \code{NULL} to skip +#' @param type Type of cell/spot spatial coordinates to return, +#' choose one or more from: +#' \itemize{ +#' \item \dQuote{segmentations} cell/spot segmentations +#' \item \dQuote{centroids} cell/spot centroids +#' } +#' @param filter A character to filter molecules by, pass \code{NA} to skip +#' molecule filtering +#' +#' @return \code{ReadVitessce}: A list with some combination of the +#' following values: +#' \itemize{ +#' \item \dQuote{\code{counts}}: if \code{counts} is not \code{NULL}, an +#' expression matrix with cells as columns and features as rows +#' \item \dQuote{\code{centroids}}: if \code{coords} is not \code{NULL} and +#' \code{type} is contains\dQuote{centroids}, a data frame with cell centroids +#' in three columns: \dQuote{x}, \dQuote{y}, and \dQuote{cell} +#' \item \dQuote{\code{segmentations}}: if \code{coords} is not \code{NULL} and +#' \code{type} contains \dQuote{centroids}, a data frame with cell +#' segmentations in three columns: \dQuote{x}, \dQuote{y} and \dQuote{cell} +#' \item \dQuote{\code{molecules}}: if \code{molecules} is not \code{NULL}, a +#' data frame with molecule spatial coordinates in three columns: \dQuote{x}, +#' \dQuote{y}, and \dQuote{gene} +#' } +#' +#' @importFrom jsonlite read_json +#' @importFrom tools file_ext file_path_sans_ext +#' +#' @export +#' +#' @order 1 +#' +#' @concept preprocessing +#' +#' @template section-progressr +#' +#' @templateVar pkg jsonlite +#' @template note-reqdpkg +#' +#' @examples +#' \dontrun{ +#' coords <- ReadVitessce( +#' counts = +#' "https://s3.amazonaws.com/vitessce-data/0.0.31/master_release/wang/wang.genes.json", +#' coords = +#' "https://s3.amazonaws.com/vitessce-data/0.0.31/master_release/wang/wang.cells.json", +#' molecules = +#' "https://s3.amazonaws.com/vitessce-data/0.0.31/master_release/wang/wang.molecules.json" +#' ) +#' names(coords) +#' coords$counts[1:10, 1:10] +#' head(coords$centroids) +#' head(coords$segmentations) +#' head(coords$molecules) +#' } +#' +ReadVitessce <- function( + counts = NULL, + coords = NULL, + molecules = NULL, + type = c('segmentations', 'centroids'), + filter = NA_character_ +) { + if (!requireNamespace('jsonlite', quietly = TRUE)) { + stop("Please install 'jsonlite' for this function") + } + type <- match.arg(arg = type, several.ok = TRUE) + nouts <- c( + counts %iff% 'counts', + coords %iff% type, + molecules %iff% 'molecules' + ) + outs <- vector(mode = 'list', length = length(x = nouts)) + names(x = outs) <- nouts + if (!is.null(x = coords)) { + ppreload <- progressor() + ppreload(message = "Preloading coordinates", class = 'sticky', amount = 0) + cells <- read_json(path = coords) + ppreload(type = 'finish') + } + for (i in nouts) { + outs[[i]] <- switch( + EXPR = i, + 'counts' = { + counts.type <- file_ext(x = basename(path = file_path_sans_ext( + x = counts + ))) + cts <- switch( + EXPR = counts.type, + 'clusters' = .ReadVitessceClusters(counts = counts), + 'genes' = .ReadVitessceGenes(counts = counts), + stop("Unknown Vitessce counts filetype: '", counts.type, "'") + ) + pcts <- progressor() + if (!is.na(x = filter)) { + pcts( + message = paste("Filtering genes with pattern", filter), + class = 'sticky', + amount = 0 + ) + cts <- cts[!grepl(pattern = filter, x = rownames(x = cts)), , drop = FALSE] + } + ratio <- getOption(x = 'Seurat.input.sparse_ratio', default = 0.4) + if ((sum(cts == 0) / length(x = cts)) > ratio) { + pcts( + message = 'Converting counts to sparse matrix', + class = 'sticky', + amount = 0 + ) + cts <- as.sparse(x = cts) + } + pcts(type = 'finish') + cts + }, + 'centroids' = { + pcents <- progressor(steps = length(x = cells)) + pcents(message = "Reading centroids", class = 'sticky', amount = 0) + centroids <- lapply( + X = names(x = cells), + FUN = function(x) { + cents <- cells[[x]]$xy + names(x = cents) <- c('x', 'y') + cents <- as.data.frame(x = cents) + cents$cell <- x + pcents() + return(cents) + } + ) + pcents(type = 'finish') + do.call(what = 'rbind', args = centroids) + }, + 'segmentations' = { + psegs <- progressor(steps = length(x = cells)) + psegs(message = "Reading segmentations", class = 'sticky', amount = 0) + segmentations <- lapply( + X = names(x = cells), + FUN = function(x) { + poly <- cells[[x]]$poly + poly <- lapply(X = poly, FUN = unlist) + poly <- as.data.frame(x = do.call(what = 'rbind', args = poly)) + colnames(x = poly) <- c('x', 'y') + poly$cell <- x + psegs() + return(poly) + } + ) + psegs(type = 'finish') + do.call(what = 'rbind', args = segmentations) + }, + 'molecules' = { + pmols1 <- progressor() + pmols1(message = "Reading molecules", class = 'sticky', amount = 0) + pmols1(type = 'finish') + mols <- read_json(path = molecules) + pmols2 <- progressor(steps = length(x = mols)) + mols <- lapply( + X = names(x = mols), + FUN = function(m) { + x <- mols[[m]] + x <- lapply(X = x, FUN = unlist) + x <- as.data.frame(x = do.call(what = 'rbind', args = x)) + colnames(x = x) <- c('x', 'y') + x$gene <- m + pmols2() + return(x) + } + ) + mols <- do.call(what = 'rbind', args = mols) + pmols2(type = 'finish') + if (!is.na(x = filter)) { + pmols3 <- progressor() + pmols3( + message = paste("Filtering molecules with pattern", filter), + class = 'sticky', + amount = 0 + ) + pmols3(type = 'finish') + mols <- mols[!grepl(pattern = filter, x = mols$gene), , drop = FALSE] + } + mols + }, + stop("Unknown data type: ", i) + ) + } + return(outs) +} + +#' Read and Load MERFISH Input from Vizgen +#' +#' Read and load in MERFISH data from Vizgen-formatted files +#' +#' @inheritParams ReadVitessce +#' @param data.dir Path to the directory with Vizgen MERFISH files; requires at +#' least one of the following files present: +#' \itemize{ +#' \item \dQuote{\code{cell_by_gene.csv}}: used for reading count matrix +#' \item \dQuote{\code{cell_metadata.csv}}: used for reading cell spatial +#' coordinate matrices +#' \item \dQuote{\code{detected_transcripts.csv}}: used for reading molecule +#' spatial coordinate matrices +#' } +#' @param transcripts Optional file path for counts matrix; pass \code{NA} to +#' suppress reading counts matrix +#' @param spatial Optional file path for spatial metadata; pass \code{NA} to +#' suppress reading spatial coordinates. If \code{spatial} is provided and +#' \code{type} is \dQuote{segmentations}, uses \code{dirname(spatial)} instead of +#' \code{data.dir} to find HDF5 files +#' @param molecules Optional file path for molecule coordinates file; pass +#' \code{NA} to suppress reading spatial molecule information +#' @param type Type of cell spatial coordinate matrices to read; choose one +#' or more of: +#' \itemize{ +#' \item \dQuote{segmentations}: cell segmentation vertices; requires +#' \href{https://cran.r-project.org/package=hdf5r}{\pkg{hdf5r}} to be +#' installed and requires a directory \dQuote{\code{cell_boundaries}} within +#' \code{data.dir}. Within \dQuote{\code{cell_boundaries}}, there must be +#' one or more HDF5 file named \dQuote{\code{feature_data_##.hdf5}} +#' \item \dQuote{centroids}: cell centroids in micron coordinate space +#' \item \dQuote{boxes}: cell box outlines in micron coordinate space +#' } +#' @param mol.type Type of molecule spatial coordinate matrices to read; +#' choose one or more of: +#' \itemize{ +#' \item \dQuote{pixels}: molecule coordinates in pixel space +#' \item \dQuote{microns}: molecule coordinates in micron space +#' } +#' @param metadata Type of available metadata to read; +#' choose zero or more of: +#' \itemize{ +#' \item \dQuote{volume}: estimated cell volume +#' \item \dQuote{fov}: cell's fov +#' } +#' @param z Z-index to load; must be between 0 and 6, inclusive +#' +#' @return \code{ReadVizgen}: A list with some combination of the +#' following values: +#' \itemize{ +#' \item \dQuote{\code{transcripts}}: a +#' \link[Matrix:dgCMatrix-class]{sparse matrix} with expression data; cells +#' are columns and features are rows +#' \item \dQuote{\code{segmentations}}: a data frame with cell polygon outlines in +#' three columns: \dQuote{x}, \dQuote{y}, and \dQuote{cell} +#' \item \dQuote{\code{centroids}}: a data frame with cell centroid +#' coordinates in three columns: \dQuote{x}, \dQuote{y}, and \dQuote{cell} +#' \item \dQuote{\code{boxes}}: a data frame with cell box outlines in three +#' columns: \dQuote{x}, \dQuote{y}, and \dQuote{cell} +#' \item \dQuote{\code{microns}}: a data frame with molecule micron +#' coordinates in three columns: \dQuote{x}, \dQuote{y}, and \dQuote{gene} +#' \item \dQuote{\code{pixels}}: a data frame with molecule pixel coordinates +#' in three columns: \dQuote{x}, \dQuote{y}, and \dQuote{gene} +#' \item \dQuote{\code{metadata}}: a data frame with the cell-level metadata +#' requested by \code{metadata} +#' } +#' +#' @importFrom future.apply future_lapply +#' +#' @export +#' +#' @order 1 +#' +#' @concept preprocessing +#' +#' @template section-progressr +#' @template section-future +#' +#' @templateVar pkg data.table +#' @template note-reqdpkg +#' +ReadVizgen <- function( + data.dir, + transcripts = NULL, + spatial = NULL, + molecules = NULL, + type = 'segmentations', + mol.type = 'microns', + metadata = NULL, + filter = NA_character_, + z = 3L +) { + # TODO: handle multiple segmentations per z-plane + if (!requireNamespace("data.table", quietly = TRUE)) { + stop("Please install 'data.table' for this function") + } + # hdf5r is only used for loading polygon boundaries + # Not needed for all Vizgen input + hdf5 <- requireNamespace("hdf5r", quietly = TRUE) + # Argument checking + type <- match.arg( + arg = type, + choices = c('segmentations', 'centroids', 'boxes'), + several.ok = TRUE + ) + mol.type <- match.arg( + arg = mol.type, + choices = c('pixels', 'microns'), + several.ok = TRUE + ) + if (!is.null(x = metadata)) { + metadata <- match.arg( + arg = metadata, + choices = c("volume", "fov"), + several.ok = TRUE + ) + } + if (!z %in% seq.int(from = 0L, to = 6L)) { + stop("The z-index must be in the range [0, 6]") + } + use.dir <- all(vapply( + X = c(transcripts, spatial, molecules), + FUN = function(x) { + return(is.null(x = x) || is.na(x = x)) + }, + FUN.VALUE = logical(length = 1L) + )) + if (use.dir && !dir.exists(paths = data.dir)) { + stop("Cannot find Vizgen directory ", data.dir) + } + # Identify input files + files <- c( + transcripts = transcripts %||% 'cell_by_gene[_a-zA-Z0-9]*.csv', + spatial = spatial %||% 'cell_metadata[_a-zA-Z0-9]*.csv', + molecules = molecules %||% 'detected_transcripts[_a-zA-Z0-9]*.csv' + ) + files[is.na(x = files)] <- NA_character_ + h5dir <- file.path( + ifelse( + test = dirname(path = files['spatial']) == '.', + yes = data.dir, + no = dirname(path = files['spatial']) + ), + 'cell_boundaries' + ) + zidx <- paste0('zIndex_', z) + files <- vapply( + X = files, + FUN = function(x) { + x <- as.character(x = x) + if (isTRUE(x = dirname(path = x) == '.')) { + fnames <- list.files( + path = data.dir, + pattern = x, + recursive = FALSE, + full.names = TRUE + ) + return(sort(x = fnames, decreasing = TRUE)[1L]) + } else { + return(x) + } + }, + FUN.VALUE = character(length = 1L), + USE.NAMES = TRUE + ) + files[!file.exists(files)] <- NA_character_ + if (all(is.na(x = files))) { + stop("Cannot find Vizgen input files in ", data.dir) + } + # Checking for loading spatial coordinates + if (!is.na(x = files[['spatial']])) { + pprecoord <- progressor() + pprecoord( + message = "Preloading cell spatial coordinates", + class = 'sticky', + amount = 0 + ) + sp <- data.table::fread( + file = files[['spatial']], + sep = ',', + data.table = FALSE, + verbose = FALSE + # showProgress = progressr:::progressr_in_globalenv(action = 'query') + # showProgress = verbose + ) + pprecoord(type = 'finish') + rownames(x = sp) <- as.character(x = sp[, 1]) + sp <- sp[, -1, drop = FALSE] + # Check to see if we should load segmentations + if ('segmentations' %in% type) { + poly <- if (isFALSE(x = hdf5)) { + warning( + "Cannot find hdf5r; unable to load segmentation vertices", + immediate. = TRUE + ) + FALSE + } else if (!dir.exists(paths = h5dir)) { + warning("Cannot find cell boundary H5 files", immediate. = TRUE) + FALSE + } else { + TRUE + } + if (isFALSE(x = poly)) { + type <- setdiff(x = type, y = 'segmentations') + } + } + spatials <- rep_len(x = files[['spatial']], length.out = length(x = type)) + names(x = spatials) <- type + files <- c(files, spatials) + files <- files[setdiff(x = names(x = files), y = 'spatial')] + } else if (!is.null(x = metadata)) { + warning( + "metadata can only be loaded when spatial coordinates are loaded", + immediate. = TRUE + ) + metadata <- NULL + } + # Check for loading of molecule coordinates + if (!is.na(x = files[['molecules']])) { + ppremol <- progressor() + ppremol( + message = "Preloading molecule coordinates", + class = 'sticky', + amount = 0 + ) + mx <- data.table::fread( + file = files[['molecules']], + sep = ',', + verbose = FALSE + # showProgress = verbose + ) + mx <- mx[mx$global_z == z, , drop = FALSE] + if (!is.na(x = filter)) { + ppremol( + message = paste("Filtering molecules with pattern", filter), + class = 'sticky', + amount = 0 + ) + mx <- mx[!grepl(pattern = filter, x = mx$gene), , drop = FALSE] + } + ppremol(type = 'finish') + mols <- rep_len(x = files[['molecules']], length.out = length(x = mol.type)) + names(x = mols) <- mol.type + files <- c(files, mols) + files <- files[setdiff(x = names(x = files), y = 'molecules')] + } + files <- files[!is.na(x = files)] + # Read input data + outs <- vector(mode = 'list', length = length(x = files)) + names(x = outs) <- names(x = files) + if (!is.null(metadata)) { + outs <- c(outs, list(metadata = NULL)) + } + for (otype in names(x = outs)) { + outs[[otype]] <- switch( + EXPR = otype, + 'transcripts' = { + ptx <- progressor() + ptx(message = 'Reading counts matrix', class = 'sticky', amount = 0) + tx <- data.table::fread( + file = files[[otype]], + sep = ',', + data.table = FALSE, + verbose = FALSE + ) + rownames(x = tx) <- as.character(x = tx[, 1]) + tx <- t(x = as.matrix(x = tx[, -1, drop = FALSE])) + if (!is.na(x = filter)) { + ptx( + message = paste("Filtering genes with pattern", filter), + class = 'sticky', + amount = 0 + ) + tx <- tx[!grepl(pattern = filter, x = rownames(x = tx)), , drop = FALSE] + } + ratio <- getOption(x = 'Seurat.input.sparse_ratio', default = 0.4) + if ((sum(tx == 0) / length(x = tx)) > ratio) { + ptx( + message = 'Converting counts to sparse matrix', + class = 'sticky', + amount = 0 + ) + tx <- as.sparse(x = tx) + } + ptx(type = 'finish') + tx + }, + 'centroids' = { + pcents <- progressor() + pcents( + message = 'Creating centroid coordinates', + class = 'sticky', + amount = 0 + ) + pcents(type = 'finish') + data.frame( + x = sp$center_x, + y = sp$center_y, + cell = rownames(x = sp), + stringsAsFactors = FALSE + ) + }, + 'segmentations' = { + ppoly <- progressor(steps = length(x = unique(x = sp$fov))) + ppoly( + message = "Creating polygon coordinates", + class = 'sticky', + amount = 0 + ) + pg <- future_lapply( + X = unique(x = sp$fov), + FUN = function(f, ...) { + fname <- file.path(h5dir, paste0('feature_data_', f, '.hdf5')) + if (!file.exists(fname)) { + warning( + "Cannot find HDF5 file for field of view ", + f, + immediate. = TRUE + ) + return(NULL) + } + hfile <- hdf5r::H5File$new(filename = fname, mode = 'r') + on.exit(expr = hfile$close_all()) + cells <- rownames(x = subset(x = sp, subset = fov == f)) + df <- lapply( + X = cells, + FUN = function(x) { + return(tryCatch( + expr = { + cc <- hfile[['featuredata']][[x]][[zidx]][['p_0']][['coordinates']]$read() + cc <- as.data.frame(x = t(x = cc)) + colnames(x = cc) <- c('x', 'y') + cc$cell <- x + cc + }, + error = function(...) { + return(NULL) + } + )) + } + ) + ppoly() + return(do.call(what = 'rbind', args = df)) + } + ) + ppoly(type = 'finish') + pg <- do.call(what = 'rbind', args = pg) + npg <- length(x = unique(x = pg$cell)) + if (npg < nrow(x = sp)) { + warning( + nrow(x = sp) - npg, + " cells missing polygon information", + immediate. = TRUE + ) + } + pg + }, + 'boxes' = { + pbox <- progressor(steps = nrow(x = sp)) + pbox(message = "Creating box coordinates", class = 'sticky', amount = 0) + bx <- future_lapply( + X = rownames(x = sp), + FUN = function(cell) { + row <- sp[cell, ] + df <- expand.grid( + x = c(row$min_x, row$max_x), + y = c(row$min_y, row$max_y), + cell = cell, + KEEP.OUT.ATTRS = FALSE, + stringsAsFactors = FALSE + ) + df <- df[c(1, 3, 4, 2), , drop = FALSE] + pbox() + return(df) + } + ) + pbox(type = 'finish') + do.call(what = 'rbind', args = bx) + }, + 'metadata' = { + pmeta <- progressor() + pmeta( + message = 'Loading metadata', + class = 'sticky', + amount = 0 + ) + pmeta(type = 'finish') + sp[, metadata, drop = FALSE] + }, + 'pixels' = { + ppixels <- progressor() + ppixels( + message = 'Creating pixel-level molecule coordinates', + class = 'sticky', + amount = 0 + ) + df <- data.frame( + x = mx$x, + y = mx$y, + gene = mx$gene, + stringsAsFactors = FALSE + ) + # if (!is.na(x = filter)) { + # ppixels( + # message = paste("Filtering molecules with pattern", filter), + # class = 'sticky', + # amount = 0 + # ) + # df <- df[!grepl(pattern = filter, x = df$gene), , drop = FALSE] + # } + ppixels(type = 'finish') + df + }, + 'microns' = { + pmicrons <- progressor() + pmicrons( + message = "Creating micron-level molecule coordinates", + class = 'sticky', + amount = 0 + ) + df <- data.frame( + x = mx$global_x, + y = mx$global_y, + gene = mx$gene, + stringsAsFactors = FALSE + ) + # if (!is.na(x = filter)) { + # pmicrons( + # message = paste("Filtering molecules with pattern", filter), + # class = 'sticky', + # amount = 0 + # ) + # df <- df[!grepl(pattern = filter, x = df$gene), , drop = FALSE] + # } + pmicrons(type = 'finish') + df + }, + stop("Unknown MERFISH input type: ", type) + ) + } + return(outs) +} + #' Normalize raw data to fractions #' #' Normalize count data to relative counts per cell by dividing by the total @@ -2948,6 +4448,113 @@ ScaleData.Seurat <- function( # Internal #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#' Read Vitessce Expression Data +#' +#' @inheritParams ReadVitessce +#' +#' @return An expression matrix with cells as columns and features as rows +#' +#' @name vitessce-helpers +#' @rdname vitessce-helpers +#' +#' @importFrom jsonlite read_json +#' +#' @keywords internal +#' +#' @noRd +#' +.ReadVitessceGenes <- function(counts) { + p1 <- progressor() + p1( + message = "Reading counts in Vitessce genes format", + class = 'sticky', + amount = 0 + ) + p1(type = 'finish') + cts <- read_json(path = counts) + p2 <- progressor(steps = length(x = cts)) + cts <- lapply( + X = names(x = cts), + FUN = function(x) { + expr <- cts[[x]]$cells + expr <- as.matrix(x = expr) + colnames(x = expr) <- x + p2() + return(expr) + } + ) + p2(type = 'finish') + cts <- Reduce( + f = function(x, y) { + a <- merge(x = x, y = y, by = 0, all = TRUE) + rownames(x = a) <- a$Row.names + a$Row.names <- NULL + return(as.matrix(x = a)) + }, + x = cts + ) + cts[is.na(x = cts)] <- 0 + return(t(x = cts)) +} + +#' @name vitessce-helpers +#' @rdname vitessce-helpers +#' +#' @importFrom jsonlite read_json +#' +#' @keywords internal +#' +#' @noRd +#' +.ReadVitessceClusters <- function(counts) { + p1 <- progressor() + p1( + message = "Reading counts in Vitessce clusters format", + class = 'sticky', + amount = 0 + ) + p1(type = 'finish') + cts <- read_json(path = counts) + # p2 <- progressor(steps = length(x = cts)) + cells <- unlist(x = cts$cols) + features <- unlist(x = cts$rows) + cts <- lapply(X = cts[['matrix']], FUN = unlist) + cts <- t(x = as.data.frame(x = cts)) + dimnames(x = cts) <- list(features, cells) + return(cts) +} + + +#' @name nanostring-helpers +#' @rdname nanostring-helpers +#' +#' @return data frame containing counts for cells based on a single class of segmentation (eg Nuclear) +#' +#' @keywords internal +#' +#' @noRd +#' +build.cellcomp.matrix <- function(mols.df, class=NULL) { + if (!is.null(class)) { + if (!(class %in% c("Nuclear", "Membrane", "Cytoplasm"))) { + stop(paste("Cannot subset matrix based on segmentation:", class)) + } + mols.df <- mols.df[mols.df$CellComp == class,] # subset based on cell class + } + mols.df$bc <- paste0(as.character(mols.df$cell_ID), "_", as.character(mols.df$fov)) + ncol <- length(unique(mols.df$target)) + nrow <- length(unique(mols.df$bc)) # will mols.df already have a cell barcode column at this point + mtx <- matrix(data=rep(0, nrow*ncol), nrow=nrow, ncol=ncol) + colnames(mtx) <- unique(mols.df$target) + rownames(mtx) <- unique(mols.df$bc) + for (row in 1:nrow(mols.df)) { + mol <- mols.df[row, "target"] + bc <- mols.df[row, "bc"] + mtx[bc, mol] <- mtx[bc, mol] + 1 + } + return(as.data.frame(mtx)) +} + # Bin spatial regions into grid and average expression values # # @param dat Expression data diff --git a/R/reexports.R b/R/reexports.R index 0763d0a0b..4a12a6aa5 100644 --- a/R/reexports.R +++ b/R/reexports.R @@ -145,6 +145,16 @@ NULL # Functions and Generics #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#' @importFrom SeuratObject %||% +#' @export +#' +SeuratObject::`%||%` + +#' @importFrom SeuratObject %iff% +#' @export +#' +SeuratObject::`%iff%` + #' @importFrom SeuratObject AddMetaData #' @export #' diff --git a/R/utilities.R b/R/utilities.R index faac63d5a..b546bc5d0 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -1607,40 +1607,86 @@ as.data.frame.Matrix <- function( # Internal #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Set a default value if an object is null -# -# @param lhs An object to set if it's null -# @param rhs The value to provide if x is null -# -# @return rhs if lhs is null, else lhs -# -# @author Hadley Wickham -# @references https://adv-r.hadley.nz/functions.html#missing-arguments -# -`%||%` <- function(lhs, rhs) { - if (!is.null(x = lhs)) { - return(lhs) - } else { - return(rhs) +.AsList <- function(x) { + x <- as.list(x = x) + return(sapply( + X = unique(x = names(x = x)), + FUN = function(i) { + return(unlist( + x = x[which(x = names(x = x) == i)], + recursive = FALSE, + use.names = FALSE + )) + }, + simplify = FALSE, + USE.NAMES = TRUE + )) +} + +#' @importFrom ggplot2 cut_number +#' +.Cut <- function(min, max, n) { + breaks <- levels(x = cut_number(x = c(min, max), n = n)) + breaks <- gsub(pattern = '.*,', replacement = '', x = breaks) + breaks <- gsub(pattern = ']$', replacement = '', x = breaks) + as.numeric(x = breaks) +} + +.FindE <- function(x) { + x <- as.character(x = x) + if (grepl(pattern = 'e', x = x)) { + return(as.integer(x = gsub(pattern = '.*e', replacement = '', x = x))) + } else if (grepl(pattern = '^0\\.', x = x)) { + x <- unlist(x = strsplit( + x = gsub(pattern = '.*\\.', replacement = '', x = x), + split = '' + )) + idx <- which(x = x != '0') + return(-idx) } + stop("Invalid format") } -# Set a default value if an object is NOT null -# -# @param lhs An object to set if it's NOT null -# @param rhs The value to provide if x is NOT null -# -# @return lhs if lhs is null, else rhs -# -# @author Hadley Wickham -# @references https://adv-r.hadley.nz/functions.html#missing-arguments -# -`%iff%` <- function(lhs, rhs) { - if (!is.null(x = lhs)) { - return(rhs) - } else { - return(lhs) +#' @importFrom SeuratObject Boundaries +#' +.BoundariesByImage <- function(object, fov, boundaries) { + if (!is.list(x = boundaries)) { + if (is.null(x = names(x = boundaries))) { + boundaries <- rep_len(x = list(boundaries), length.out = length(x = fov)) + names(x = boundaries) <- fov + } else { + boundaries <- .AsList(x = boundaries) + } + } + if (any(!nchar(x = names(x = boundaries)))) { + missing <- setdiff(x = fov, y = names(x = boundaries)) + idx <- which(x = !nchar(x = names(x = boundaries))) + boundaries <- c( + boundaries[intersect(x = names(x = boundaries), y = fov)], + rep_len(x = boundaries[idx], length.out = length(x = missing)) + ) + names(x = boundaries)[!nchar(x = names(x = boundaries))] <- missing + } + if (any(!fov %in% names(x = boundaries))) { + for (i in setdiff(x = fov, y = names(x = boundaries))) { + boundaries[[i]] <- Boundaries(object = object[[i]])[1L] + } + } + fov <- union(x = fov, y = names(x = boundaries)) + if (length(x = boundaries) != length(x = fov)) { + fov <- intersect(x = fov, y = names(x = boundaries)) + } + boundaries <- boundaries[fov] + for (i in fov) { + boundaries[[i]] <- Filter( + f = function(x) { + return(x %in% Boundaries(object = object[[i]]) || is_na(x = x)) + }, + x = boundaries[[i]] + ) } + boundaries <- Filter(f = length, x = boundaries) + return(boundaries) } # Generate chunk points diff --git a/R/visualization.R b/R/visualization.R index 20f9d3e9d..d73e84464 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -1,9 +1,22 @@ #' @importFrom utils globalVariables -#' @importFrom ggplot2 ggproto GeomViolin +#' @importFrom ggplot2 fortify GeomViolin ggproto #' @importFrom SeuratObject DefaultDimReduc #' NULL +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Generics +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#' @importFrom methods setGeneric +#' +setGeneric( + name = '.PrepImageData', + def = function(data, cells, ...) { + standardGeneric(f = '.PrepImageData') + } +) + #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Heatmaps #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -2132,7 +2145,8 @@ PolyDimPlot <- function( #' Polygon FeaturePlot #' -#' Plot cells as polygons, rather than single points. Color cells by any value accessible by \code{\link{FetchData}}. +#' Plot cells as polygons, rather than single points. Color cells by any value +#' accessible by \code{\link{FetchData}}. #' #' @inheritParams FeaturePlot #' @param poly.data Name of the polygon dataframe in the misc slot @@ -2243,6 +2257,778 @@ PolyFeaturePlot <- function( # Spatial Plots #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#' Spatial Cluster Plots +#' +#' Visualize clusters or other categorical groupings in a spatial context +#' +#' @inheritParams DimPlot +#' @inheritParams SingleImagePlot +#' @param object A \code{\link[SeuratObject]{Seurat}} object +#' @param fov Name of FOV to plot +#' @param boundaries A vector of segmentation boundaries per image to plot; +#' can be a character vector, a named character vector, or a named list. +#' Names should be the names of FOVs and values should be the names of +#' segmentation boundaries +#' @param molecules A vector of molecules to plot +#' @param nmols Max number of each molecule specified in `molecules` to plot +#' @param dark.background Set plot background to black +#' @param crop Crop the plots to area with cells only +#' @param overlap Overlay boundaries from a single image to create a single +#' plot; if \code{TRUE}, then boundaries are stacked in the order they're +#' given (first is lowest) +#' @param axes Keep axes and panel background +#' @param combine Combine plots into a single +#' \code{patchwork} ggplot object.If \code{FALSE}, +#' return a list of ggplot objects +#' @param coord.fixed Plot cartesian coordinates with fixed aspect ratio +#' +#' @return If \code{combine = TRUE}, a \code{patchwork} +#' ggplot object; otherwise, a list of ggplot objects +#' +#' @importFrom rlang !! is_na sym +#' @importFrom patchwork wrap_plots +#' @importFrom ggplot2 element_blank facet_wrap vars +#' @importFrom SeuratObject DefaultFOV Cells +#' DefaultBoundary FetchData Images Overlay +#' +#' @export +#' +ImageDimPlot <- function( + object, + fov = NULL, + boundaries = NULL, + group.by = NULL, + split.by = NULL, + cols = NULL, + shuffle.cols = FALSE, + size = 0.5, + molecules = NULL, + mols.size = 0.1, + mols.cols = NULL, + mols.alpha = 1.0, + nmols = 1000, + alpha = 1.0, + border.color = 'white', + border.size = NULL, + na.value = 'grey50', + dark.background = TRUE, + crop = FALSE, + cells = NULL, + overlap = FALSE, + axes = FALSE, + combine = TRUE, + coord.fixed = TRUE +) { + cells <- cells %||% Cells(x = object) + # Determine FOV to use + fov <- fov %||% DefaultFOV(object = object) + fov <- Filter( + f = function(x) { + return( + x %in% Images(object = object) && + inherits(x = object[[x]], what = 'FOV') + ) + }, + x = fov + ) + if (!length(x = fov)) { + stop("No compatible spatial coordinates present") + } + # Identify boundaries to use + boundaries <- boundaries %||% sapply( + X = fov, + FUN = function(x) { + return(DefaultBoundary(object = object[[x]])) + }, + simplify = FALSE, + USE.NAMES = TRUE + ) + boundaries <- .BoundariesByImage( + object = object, + fov = fov, + boundaries = boundaries + ) + fov <- names(x = boundaries) + overlap <- rep_len(x = overlap, length.out = length(x = fov)) + crop <- rep_len(x = crop, length.out = length(x = fov)) + names(x = crop) <- fov + # Prepare plotting data + group.by <- boundaries %!NA% group.by %||% 'ident' + vars <- c(group.by, split.by) + md <- if (!is_na(x = vars)) { + FetchData( + object = object, + vars = vars[!is.na(x = vars)], + cells = cells + ) + } else { + NULL + } + pnames <- unlist(x = lapply( + X = seq_along(along.with = fov), + FUN = function(i) { + return(if (isTRUE(x = overlap[i])) { + fov[i] + } else { + paste(fov[i], boundaries[[i]], sep = '_') + }) + } + )) + pdata <- vector(mode = 'list', length = length(x = pnames)) + names(x = pdata) <- pnames + for (i in names(x = pdata)) { + ul <- unlist(x = strsplit(x = i, split = '_')) + img <- paste(ul[1:length(ul)-1], collapse = '_') + # Apply overlap + lyr <- ul[length(ul)] + if (is.na(x = lyr)) { + lyr <- boundaries[[img]] + } + # TODO: Apply crop + pdata[[i]] <- lapply( + X = lyr, + FUN = function(l) { + if (l == 'NA') { + return(NA) + } + df <- fortify(model = object[[img]][[l]]) + df <- df[df$cell %in% cells, , drop = FALSE] + if (!is.null(x = md)) { + df <- merge(x = df, y = md, by.x = 'cell', by.y = 0, all.x = TRUE) + } + df$cell <- paste(l, df$cell, sep = '_') + df$boundary <- l + return(df) + } + ) + pdata[[i]] <- if (!is_na(x = pdata[[i]])) { + do.call(what = 'rbind', args = pdata[[i]]) + } else { + unlist(x = pdata[[i]]) + } + } + # Fetch molecule information + if (!is.null(x = molecules)) { + molecules <- .MolsByFOV( + object = object, + fov = fov, + molecules = molecules + ) + mdata <- vector(mode = 'list', length = length(x = fov)) + names(x = mdata) <- fov + for (img in names(x = mdata)) { + idata <- object[[img]] + if (!img %in% names(x = molecules)) { + mdata[[img]] <- NULL + next + } + if (isTRUE(x = crop[img])) { + idata <- Overlay(x = idata, y = idata) + } + imols <- gsub( + pattern = paste0('^', Key(object = idata)), + replacement = '', + x = molecules[[img]] + ) + mdata[[img]] <- FetchData( + object = idata, + vars = imols, + nmols = nmols + ) + } + } else { + mdata <- NULL + } + # Build the plots + plots <- vector( + mode = 'list', + length = length(x = pdata) * ifelse( + test = length(x = group.by), + yes = length(x = group.by), + no = 1L + ) + ) + idx <- 1L + for (group in group.by) { + for (i in seq_along(along.with = pdata)) { + img <- unlist(x = strsplit(x = names(x = pdata)[i], split = '_'))[1L] + p <- SingleImagePlot( + data = pdata[[i]], + col.by = pdata[[i]] %!NA% group, + molecules = mdata[[img]], + cols = cols, + shuffle.cols = shuffle.cols, + size = size, + alpha = alpha, + mols.size = mols.size, + mols.cols = mols.cols, + mols.alpha = mols.alpha, + border.color = border.color, + border.size = border.size, + na.value = na.value, + dark.background = dark.background + ) + if (!is.null(x = split.by)) { + p <- p + facet_wrap( + facets = vars(!!sym(x = split.by)) + ) + } + if (!isTRUE(x = axes)) { + p <- p + NoAxes(panel.background = element_blank()) + } + if (!anyDuplicated(x = pdata[[i]]$cell)) { + p <- p + guides(fill = guide_legend(override.aes = list(size=4L, alpha=1))) + } + if (isTRUE(coord.fixed)) { + p <- p + coord_fixed() + } + plots[[idx]] <- p + idx <- idx + 1L + } + } + if (isTRUE(x = combine)) { + plots <- wrap_plots(plots) + } + return(plots) +} + +#' Spatial Feature Plots +#' +#' Visualize expression in a spatial context +#' +#' @inheritParams FeaturePlot +#' @inheritParams ImageDimPlot +#' @param scale Set color scaling across multiple plots; choose from: +#' \itemize{ +#' \item \dQuote{\code{feature}}: Plots per-feature are scaled across splits +#' \item \dQuote{\code{all}}: Plots per-feature are scaled across all features +#' \item \dQuote{\code{none}}: Plots are not scaled; \strong{note}: setting +#' \code{scale} to \dQuote{\code{none}} will result in color scales that are +#' \emph{not} comparable between plots +#' } +#' Ignored if \code{blend = TRUE} +#' +#' @inherit ImageDimPlot return +#' +#' @importFrom patchwork wrap_plots +#' @importFrom cowplot theme_cowplot +#' @importFrom ggplot2 dup_axis element_blank element_text facet_wrap guides +#' labs margin vars scale_y_continuous theme +#' @importFrom SeuratObject DefaultFOV Cells DefaultBoundary +#' FetchData Images Overlay +#' +#' @export +#' +ImageFeaturePlot <- function( + object, + features, + fov = NULL, + boundaries = NULL, + cols = if (isTRUE(x = blend)) { + c("lightgrey", "#ff0000", "#00ff00") + } else { + c("lightgrey", "firebrick1") + }, + size = 0.5, + min.cutoff = NA, + max.cutoff = NA, + split.by = NULL, + molecules = NULL, + mols.size = 0.1, + mols.cols = NULL, + nmols = 1000, + alpha = 1.0, + border.color = 'white', + border.size = NULL, + dark.background = TRUE, + blend = FALSE, + blend.threshold = 0.5, + crop = FALSE, + cells = NULL, + scale = c('feature', 'all', 'none'), + overlap = FALSE, + axes = FALSE, + combine = TRUE, + coord.fixed = TRUE +) { + cells <- cells %||% Cells(x = object) + scale <- scale[[1L]] + scale <- match.arg(arg = scale) + # Set a theme to remove right-hand Y axis lines + # Also sets right-hand Y axis text label formatting + no.right <- theme( + axis.line.y.right = element_blank(), + axis.ticks.y.right = element_blank(), + axis.text.y.right = element_blank(), + axis.title.y.right = element_text( + face = "bold", + size = 14, + margin = margin(r = 7) + ) + ) + # Determine fov to use + fov <- fov %||% DefaultFOV(object = object) + fov <- Filter( + f = function(x) { + return( + x %in% Images(object = object) && + inherits(x = object[[x]], what = 'FOV') + ) + }, + x = fov + ) + if (!length(x = fov)) { + stop("No compatible spatial coordinates present") + } + # Identify boundaries to use + boundaries <- boundaries %||% sapply( + X = fov, + FUN = function(x) { + return(DefaultBoundary(object = object[[x]])) + }, + simplify = FALSE, + USE.NAMES = TRUE + ) + boundaries <- .BoundariesByImage( + object = object, + fov = fov, + boundaries = boundaries + ) + fov <- names(x = boundaries) + # Check overlaps/crops + if (isTRUE(x = blend) || !is.null(x = split.by)) { + type <- ifelse(test = isTRUE(x = 'blend'), yes = 'Blended', no = 'Split') + if (length(x = fov) != 1L) { + fov <- fov[1L] + warning( + type, + ' image feature plots can only be done on a single image, using "', + fov, + '"', + call. = FALSE, + immediate. = TRUE + ) + } + if (any(!overlap) && length(x = boundaries[[fov]]) > 1L) { + warning( + type, + " image feature plots require overlapped segmentations", + call. = FALSE, + immediate. = TRUE + ) + } + overlap <- TRUE + } + overlap <- rep_len(x = overlap, length.out = length(x = fov)) + crop <- rep_len(x = crop, length.out = length(x = fov)) + names(x = crop) <- names(x = overlap) <- fov + # Checks for blending + if (isTRUE(x = blend)) { + if (length(x = features) != 2L) { + stop("Blended feature plots only works with two features") + } + default.colors <- eval(expr = formals(fun = ImageFeaturePlot)$cols) + cols <- switch( + EXPR = as.character(x = length(x = cols)), + '0' = { + warning("No colors provided, using default colors", immediate. = TRUE) + default.colors + }, + '1' = { + warning( + "Only one color provided, assuming specified is double-negative and augmenting with default colors", + immediate. = TRUE + ) + c(cols, default.colors[2:3]) + }, + '2' = { + warning( + "Only two colors provided, assuming specified are for features and augmenting with '", + default.colors[1], + "' for double-negatives", + immediate. = TRUE + ) + c(default.colors[1], cols) + }, + '3' = cols, + { + warning( + "More than three colors provided, using only first three", + immediate. = TRUE + ) + cols[1:3] + } + ) + } + # Get feature, splitting data + md <- FetchData( + object = object, + vars = c(features, split.by[1L]), + cells = cells + ) + split.by <- intersect(x = split.by, y = colnames(x = md)) + if (!length(x = split.by)) { + split.by <- NULL + } + imax <- ifelse( + test = is.null(x = split.by), + yes = ncol(x = md), + no = ncol(x = md) - length(x = split.by) + ) + features <- colnames(x = md)[1:imax] + # Determine cutoffs + min.cutoff <- mapply( + FUN = function(cutoff, feature) { + return(ifelse( + test = is.na(x = cutoff), + yes = min(md[[feature]]), + no = cutoff + )) + }, + cutoff = min.cutoff, + feature = features + ) + max.cutoff <- mapply( + FUN = function(cutoff, feature) { + return(ifelse( + test = is.na(x = cutoff), + yes = max(md[[feature]]), + no = cutoff + )) + }, + cutoff = max.cutoff, + feature = features + ) + check.lengths <- unique(x = vapply( + X = list(features, min.cutoff, max.cutoff), + FUN = length, + FUN.VALUE = numeric(length = 1) + )) + if (length(x = check.lengths) != 1) { + stop("There must be the same number of minimum and maximum cuttoffs as there are features") + } + brewer.gran <- ifelse( + test = length(x = cols) == 1, + yes = brewer.pal.info[cols, ]$maxcolors, + no = length(x = cols) + ) + # Apply cutoffs + for (i in seq_along(along.with = features)) { + f <- features[[i]] + data.feature <- md[[f]] + min.use <- SetQuantile(cutoff = min.cutoff[i], data = data.feature) + max.use <- SetQuantile(cutoff = max.cutoff[i], data = data.feature) + data.feature[data.feature < min.use] <- min.use + data.feature[data.feature > max.use] <- max.use + if (brewer.gran != 2) { + data.feature <- if (all(data.feature == 0)) { + rep_len(x = 0, length.out = length(x = data.feature)) + } else { + as.numeric(x = as.factor(x = cut( + x = as.numeric(x = data.feature), + breaks = brewer.gran + ))) + } + } + md[[f]] <- data.feature + } + # Figure out splits + if (is.null(x = split.by)) { + split.by <- RandomName() + md[[split.by]] <- factor(x = split.by) + } + if (!is.factor(x = md[[split.by]])) { + md[[split.by]] <- factor(x = md[[split.by]]) + } + # Apply blends + if (isTRUE(x = blend)) { + md <- lapply( + X = levels(x = md[[split.by]]), + FUN = function(x) { + df <- md[as.character(x = md[[split.by]]) == x, , drop = FALSE] + no.expression <- features[colMeans(x = df[, features]) == 0] + if (length(x = no.expression)) { + stop( + "The following features have no value: ", + paste(no.expression, collapse = ', ') + ) + } + return(cbind( + df[, split.by, drop = FALSE], + BlendExpression(data = df[, features]) + )) + } + ) + md <- do.call(what = 'rbind', args = md) + features <- setdiff(x = colnames(x = md), y = split.by) + } + # Prepare plotting data + pnames <- unlist(x = lapply( + X = seq_along(along.with = fov), + FUN = function(i) { + return(if (isTRUE(x = overlap[i])) { + fov[i] + } else { + paste(fov[i], boundaries[[i]], sep = '_') + }) + } + )) + pdata <- vector(mode = 'list', length = length(x = pnames)) + names(x = pdata) <- pnames + for (i in names(x = pdata)) { + ul <- unlist(x = strsplit(x = i, split = '_')) + img <- paste(ul[1:length(ul)-1], collapse = '_') + # Apply overlap + lyr <- ul[length(ul)] + if (is.na(x = lyr)) { + lyr <- boundaries[[img]] + } + pdata[[i]] <- lapply( + X = lyr, + FUN = function(l) { + df <- fortify(model = object[[img]][[l]]) + df <- df[df$cell %in% cells, , drop = FALSE] + if (!is.null(x = md)) { + df <- merge(x = df, y = md, by.x = 'cell', by.y = 0, all.x = TRUE) + } + df$cell <- paste(l, df$cell, sep = '_') + df$boundary <- l + return(df) + } + ) + pdata[[i]] <- if (!is_na(x = pdata[[i]])) { + do.call(what = 'rbind', args = pdata[[i]]) + } else { + unlist(x = pdata[[i]]) + } + } + # Fetch molecule information + if (!is.null(x = molecules)) { + molecules <- .MolsByFOV( + object = object, + fov = fov, + molecules = molecules + ) + mdata <- vector(mode = 'list', length = length(x = fov)) + names(x = mdata) <- fov + for (img in names(x = mdata)) { + idata <- object[[img]] + if (!img %in% names(x = molecules)) { + mdata[[img]] <- NULL + next + } + if (isTRUE(x = crop[img])) { + idata <- Overlay(x = idata, y = idata) + } + imols <- gsub( + pattern = paste0('^', Key(object = idata)), + replacement = '', + x = molecules[[img]] + ) + mdata[[img]] <- FetchData( + object = idata, + vars = imols, + nmols = nmols + ) + } + } else { + mdata <- NULL + } + # Set blended colors + if (isTRUE(x = blend)) { + ncol <- 4 + color.matrix <- BlendMatrix( + two.colors = cols[2:3], + col.threshold = blend.threshold, + negative.color = cols[1] + ) + cols <- cols[2:3] + colors <- list( + color.matrix[, 1], + color.matrix[1, ], + as.vector(x = color.matrix) + ) + blend.legend <- BlendMap(color.matrix = color.matrix) + } + limits <- switch( + EXPR = scale, + 'all' = range(unlist(x = md[, features])), + NULL + ) + # Build the plots + plots <- vector( + mode = 'list', + length = length(x = levels(x = md[[split.by]])) + ) + names(x = plots) <- levels(x = md[[split.by]]) + for (i in seq_along(along.with = levels(x = md[[split.by]]))) { + ident <- levels(x = md[[split.by]])[i] + plots[[ident]] <- vector(mode = 'list', length = length(x = pdata)) + names(x = plots[[ident]]) <- names(x = pdata) + if (isTRUE(x = blend)) { + blend.key <- suppressMessages( + expr = blend.legend + + scale_y_continuous( + sec.axis = dup_axis(name = ifelse( + test = length(x = levels(x = md[[split.by]])) > 1, + yes = ident, + no = '' + )), + expand = c(0, 0) + ) + + labs( + x = features[1L], + y = features[2L], + title = if (i == 1L) { + paste('Color threshold:', blend.threshold) + } else { + NULL + } + ) + + no.right + ) + } + for (j in seq_along(along.with = pdata)) { + key <- names(x = pdata)[j] + img <- unlist(x = strsplit(x = key, split = '_'))[1L] + plots[[ident]][[key]] <- vector( + mode = 'list', + length = length(x = features) + ifelse( + test = isTRUE(x = blend), + yes = 1L, + no = 0L + ) + ) + data.plot <- pdata[[j]][as.character(x = pdata[[j]][[split.by]]) == ident, , drop = FALSE] + for (y in seq_along(along.with = features)) { + feature <- features[y] + # Get blended colors + cols.use <- if (isTRUE(x = blend)) { + cc <- as.numeric(x = as.character(x = data.plot[, feature])) + 1 + colors[[y]][sort(unique(x = cc))] + } else { + NULL + } + colnames(data.plot) <- gsub("-", "_", colnames(data.plot)) + p <- SingleImagePlot( + data = data.plot, + col.by = gsub("-", "_", feature), + size = size, + col.factor = blend, + cols = cols.use, + molecules = mdata[[img]], + mols.size = mols.size, + mols.cols = mols.cols, + alpha = alpha, + border.color = border.color, + border.size = border.size, + dark.background = dark.background + ) + + CenterTitle() + labs(fill=feature) + # Remove fill guides for blended plots + if (isTRUE(x = blend)) { + p <- p + guides(fill = 'none') + } + if (isTRUE(coord.fixed)) { + p <- p + coord_fixed() + } + # Remove axes + if (!isTRUE(x = axes)) { + p <- p + NoAxes(panel.background = element_blank()) + } else if (isTRUE(x = blend) || length(x = levels(x = md[[split.by]])) > 1L) { + if (y != 1L) { + p <- p + theme( + axis.line.y = element_blank(), + axis.ticks.y = element_blank(), + axis.text.y = element_blank(), + axis.title.y.left = element_blank() + ) + } + if (i != length(x = levels(x = md[[split.by]]))) { + p <- p + theme( + axis.line.x = element_blank(), + axis.ticks.x = element_blank(), + axis.text.x = element_blank(), + axis.title.x = element_blank() + ) + } + } + # Add colors for unblended plots + if (!isTRUE(x = blend)) { + if (length(x = cols) == 1L) { + p <- p + scale_fill_brewer(palette = cols) + } else { + cols.grad <- cols + fexp <- data.plot[data.plot[[split.by]] == ident, feature, drop = TRUE] + fexp <- unique(x = fexp) + if (length(x = fexp) == 1L) { + warning( + "All cells have the same value (", + fexp, + ") of ", + feature, + call. = FALSE, + immediate. = TRUE + ) + if (fexp == 0) { + cols.grad <- cols.grad[1L] + } + } + # Check if we're scaling the colorbar across splits + if (scale == 'feature') { + limits <- range(pdata[[j]][[feature]]) + } + p <- p + ggplot2::scale_fill_gradientn( + colors = cols.grad, + guide = 'colorbar', + limits = limits + ) + } + } + # Add some labels + p <- p + if (i == 1L) { + ggplot2::labs(title = feature) + } else { + ggplot2::labs(title = NULL) + } + plots[[ident]][[key]][[y]] <- p + } + if (isTRUE(x = blend)) { + plots[[ident]][[key]][[length(x = plots[[ident]][[key]])]] <- blend.key + } else if (length(x = levels(x = md[[split.by]])) > 1L) { + plots[[ident]][[key]][[y]] <- suppressMessages( + expr = plots[[ident]][[key]][[y]] + + scale_y_continuous(sec.axis = dup_axis(name = ident)) + + no.right + ) + } + } + plots[[ident]] <- unlist( + x = plots[[ident]], + recursive = FALSE, + use.names = FALSE + ) + } + plots <- unlist(x = plots, recursive = FALSE, use.names = FALSE) + if (isTRUE(x = combine)) { + if (isTRUE(x = blend) || length(x = levels(x = md[[split.by]])) > 1L) { + plots <- wrap_plots( + plots, + ncol = ifelse( + test = isTRUE(x = blend), + yes = 4L, + no = length(x = features) + ), + nrow = length(x = levels(x = md[[split.by]])), + guides = 'collect' + ) + } else { + plots <- wrap_plots(plots) + } + } + return(plots) +} + #' Visualize spatial and clustering (dimensional reduction) data in a linked, #' interactive framework #' @@ -2947,7 +3733,7 @@ ISpatialFeaturePlot <- function( #' default, ggplot2 assigns colors #' @param image.alpha Adjust the opacity of the background images. Set to 0 to #' remove. -#' @param crop Crop the plot in to focus on points plotted. Set to FALSE to show +#' @param crop Crop the plot in to focus on points plotted. Set to \code{FALSE} to show #' entire background image. #' @param slot If plotting a feature, which data slot to pull from (counts, #' data, or scale.data) @@ -3895,10 +4681,10 @@ PlotClusterTree <- function(object, direction = "downwards", ...) { #' @param balanced Return an equal number of genes with + and - scores. If #' FALSE (default), returns the top genes ranked by the scores absolute values #' @param ncol Number of columns to display -#' @param combine Combine plots into a single \code{\link[patchwork]{patchwork}ed} +#' @param combine Combine plots into a single \code{patchwork} #' ggplot object. If \code{FALSE}, return a list of ggplot objects #' -#' @return A \code{\link[patchwork]{patchwork}ed} ggplot object if +#' @return A \code{patchwork} ggplot object if #' \code{combine = TRUE}; otherwise, a list of ggplot objects #' #' @importFrom patchwork wrap_plots @@ -4472,7 +5258,7 @@ CustomPalette <- function( return(rgb(red = r, green = g, blue = b)) } -#' Discrete colour palettes from the pals package +#' Discrete colour palettes from pals #' #' These are included here because pals depends on a number of compiled #' packages, and this can lead to increases in run time for Travis, @@ -4485,8 +5271,9 @@ CustomPalette <- function( #' #' @param n Number of colours to be generated. #' @param palette Options are -#' "alphabet", "alphabet2", "glasbey", "polychrome", and "stepped". +#' "alphabet", "alphabet2", "glasbey", "polychrome", "stepped", and "parade". #' Can be omitted and the function will use the one based on the requested n. +#' @param shuffle Shuffle the colors in the selected palette. #' #' @return A vector of colors #' @@ -4498,7 +5285,7 @@ CustomPalette <- function( #' @export #' @concept visualization #' -DiscretePalette <- function(n, palette = NULL) { +DiscretePalette <- function(n, palette = NULL, shuffle = FALSE) { palettes <- list( alphabet = c( "#F0A0FF", "#0075DC", "#993F00", "#4C005C", "#191919", "#005C31", @@ -4535,8 +5322,26 @@ DiscretePalette <- function(n, palette = NULL) { "#CCAA7A", "#E6D2B8", "#54990F", "#78B33E", "#A3CC7A", "#CFE6B8", "#0F8299", "#3E9FB3", "#7ABECC", "#B8DEE6", "#3D0F99", "#653EB3", "#967ACC", "#C7B8E6", "#333333", "#666666", "#999999", "#CCCCCC" + ), + parade = c( + '#ff6969', '#9b37ff', '#cd3737', '#69cdff', '#ffff69', '#69cdcd', + '#9b379b', '#3737cd', '#ffff9b', '#cdff69', '#ff9b37', '#37ffff', + '#9b69ff', '#37cd69', '#ff3769', '#ff3737', '#37ff9b', '#cdcd37', + '#3769cd', '#37cdff', '#9b3737', '#ff699b', '#9b9bff', '#cd9b37', + '#69ff37', '#cd3769', '#cd69cd', '#cd6937', '#3737ff', '#cdcd69', + '#ff9b69', '#cd37cd', '#9bff37', '#cd379b', '#cd6969', '#69ff9b', + '#ff379b', '#9bff9b', '#6937ff', '#69cd37', '#cdff37', '#9bff69', + '#9b37cd', '#ff37ff', '#ff37cd', '#ffff37', '#37cd9b', '#379bff', + '#ffcd37', '#379b37', '#ff9bff', '#379b9b', '#69ffcd', '#379bcd', + '#ff69ff', '#ff9b9b', '#37ff69', '#ff6937', '#6969ff', '#699bff', + '#ffcd69', '#69ffff', '#37ff37', '#6937cd', '#37cd37', '#3769ff', + '#cd69ff', '#6969cd', '#9bcd37', '#69ff69', '#37cdcd', '#cd37ff', + '#37379b', '#37ffcd', '#69cd69', '#ff69cd', '#9bffff', '#9b9b37' ) ) + if (is.null(x = n)) { + return(names(x = palettes)) + } if (is.null(x = palette)) { if (n <= 26) { palette <- "alphabet" @@ -4550,7 +5355,11 @@ DiscretePalette <- function(n, palette = NULL) { if (n > length(x = palette.vec)) { warning("Not enough colours in specified palette") } - palette.vec[seq_len(length.out = n)] + if (isTRUE(shuffle)) { + palette.vec <- sample(palette.vec) + } + palette <- palette.vec[seq_len(length.out = n)] + return(palette) } #' @rdname CellSelector @@ -5321,10 +6130,145 @@ WhiteBackground <- function(...) { return(white.theme) } +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Fortify Methods +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#' Prepare Coordinates for Spatial Plots +#' +#' @inheritParams SeuratObject::GetTissueCoordinates +#' @param model A \code{\linkS4class{Segmentation}}, +#' \code{\linkS4class{Centroids}}, +#' or \code{\linkS4class{Molecules}} object +#' @param data Extra data to be used for annotating the cell segmentations; the +#' easiest way to pass data is a one-column +#' \code{\link[base:data.frame]{data frame}} with the values to color by and +#' the cell names are rownames +#' @param ... Arguments passed to other methods +#' +#' @name fortify-Spatial +#' @rdname fortify-Spatial +#' +#' @importFrom SeuratObject GetTissueCoordinates +#' +#' @keywords internal +#' +#' @method fortify Centroids +#' @export +#' +#' @aliases fortify +#' +fortify.Centroids <- function(model, data, ...) { + df <- GetTissueCoordinates(object = model, full = FALSE) + if (missing(x = data)) { + data <- NULL + } + data <- .PrepImageData(data = data, cells = lengths(x = model), ...) + df <- cbind(df, data) + return(df) +} + +#' @rdname fortify-Spatial +#' @method fortify Molecules +#' +#' @importFrom SeuratObject FetchData +#' +#' @export +#' +fortify.Molecules <- function( + model, + data, + nmols = NULL, + seed = NA_integer_, + ... +) { + return(FetchData(object = model, vars = data, nmols = nmols, seed = seed, ...)) +} + +#' @rdname fortify-Spatial +#' @method fortify Segmentation +#' @export +#' +fortify.Segmentation <- function(model, data, ...) { + df <- GetTissueCoordinates(object = model, full = TRUE) + if (missing(x = data)) { + data <- NULL + } + data <- .PrepImageData(data = data, cells = lengths(x = model), ...) + df <- cbind(df, data) + return(df) +} + #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Internal #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#' @importFrom SeuratObject Features Key Keys Molecules +#' +.MolsByFOV <- function(object, fov, molecules) { + keys <- Key(object = object)[fov] + keyed.mols <- sapply( + X = names(x = keys), + FUN = function(img) { + if (is.null(x = Molecules(object = object[[img]]))) { + return(NULL) + } + key <- keys[img] + mols <- grep(pattern = paste0('^', key), x = molecules, value = TRUE) + names(x = mols) <- mols + mols <- gsub(pattern = paste0('^', key), replacement = '', x = mols) + keyed <- sapply( + X = SeuratObject::Keys(object = object[[img]]), + FUN = function(x) { + return(grep(pattern = paste0('^', x), x = mols, value = TRUE)) + } + ) + keyed <- unlist(x = keyed) + names(x = keyed) <- gsub( + pattern = '^.*\\.', + replacement = '', + x = names(x = keyed) + ) + missing <- mols[!mols %in% keyed] + missing <- missing[missing %in% Features(x = object[[img]])] + if (length(x = missing)) { + # TODO: replace with default molecules + default <- Molecules(object = object[[img]])[1L] + mn <- names(x = missing) + missing <- paste0( + SeuratObject::Key(object = object[[img]][[default]]), + missing + ) + names(x = missing) <- mn + } + return(c(missing, keyed)) + }, + simplify = FALSE, + USE.NAMES = TRUE + ) + found <- names(x = unlist(x = keyed.mols)) + found <- gsub(pattern = '^.*\\.', replacement = '', x = found) + missing <- setdiff(x = molecules, y = found) + names(x = missing) <- missing + for (img in fov) { + imissing <- missing + for (i in seq_along(along.with = imissing)) { + for (lkey in Keys(object = object[[img]])) { + imissing[[i]] <- gsub( + pattern = paste0('^', lkey), + replacement = '', + x = imissing[[i]] + ) + } + } + imissing <- names(x = imissing[imissing %in% Features(x = object[[img]])]) + keyed.mols[[img]] <- c(keyed.mols[[img]], imissing) + } + keyed.mols <- Filter(f = length, x = keyed.mols) + keyed.mols <- sapply(X = keyed.mols, FUN = unname, simplify = FALSE) + return(keyed.mols) +} + # Calculate bandwidth for use in ggplot2-based smooth scatter plots # # Inspired by MASS::bandwidth.nrd and graphics:::.smoothScatterCalcDensity @@ -7469,6 +8413,246 @@ SingleImageMap <- function(data, order = NULL, title = NULL) { title(main = title) } +#' Single Spatial Plot +#' +#' @param data A data frame with at least the following columns: +#' \itemize{ +#' \item \dQuote{\code{x}}: Spatial-resolved \emph{x} coordinates, will be +#' plotted on the \emph{y}-axis +#' \item \dQuote{\code{y}}: Spatially-resolved \emph{y} coordinates, will be +#' plotted on the \emph{x}-axis +#' \item \dQuote{\code{cell}}: Cell name +#' \item \dQuote{\code{boundary}}: Segmentation boundary label; when plotting +#' multiple segmentation layers, the order of boundary transparency is set by +#' factor levels for this column +#' } +#' Can pass \code{NA} to \code{data} suppress segmentation visualization +#' @param col.by Name of column in \code{data} to color cell segmentations by; +#' pass \code{NA} to suppress coloring +#' @param col.factor Are the colors a factor or discrete? +#' @param cols Colors for cell segmentations; can be one of the +#' following: +#' \itemize{ +#' \item \code{NULL} for default ggplot2 colors +#' \item A numeric value or name of a +#' \link[RColorBrewer:RColorBrewer]{color brewer palette} +#' \item Name of a palette for \code{\link{DiscretePalette}} +#' \item A vector of colors equal to the length of unique levels of +#' \code{data$col.by} +#' } +#' @param shuffle.cols Randomly shuffle colors when a palette or +#' vector of colors is provided to \code{cols} +#' @param size Point size for cells when plotting centroids +#' @param molecules A data frame with spatially-resolved molecule coordinates; +#' should have the following columns: +#' \itemize{ +#' \item \dQuote{\code{x}}: Spatial-resolved \emph{x} coordinates, will be +#' plotted on the \emph{y}-axis +#' \item \dQuote{\code{y}}: Spatially-resolved \emph{y} coordinates, will be +#' plotted on the \emph{x}-axis +#' \item \dQuote{\code{molecule}}: Molecule name +#' } +#' @param mols.size Point size for molecules +#' @param mols.cols A vector of color for molecules. The "Set1" palette from +#' RColorBrewer is used by default. +#' @param mols.alpha Alpha value for molecules, should be between 0 and 1 +#' @param alpha Alpha value, should be between 0 and 1; when plotting multiple +#' boundaries, \code{alpha} is equivalent to max alpha +#' @param border.color Color of cell segmentation border; pass \code{NA} +#' to suppress borders for segmentation-based plots +#' @param border.size Thickness of cell segmentation borders; pass \code{NA} +#' to suppress borders for centroid-based plots +#' @param na.value Color value for \code{NA} segmentations when +#' using custom scale +#' @param ... Ignored +#' +#' @return A ggplot object +#' +#' @importFrom rlang is_na +#' @importFrom SeuratObject %NA% %!NA% +#' @importFrom RColorBrewer brewer.pal.info +#' @importFrom ggplot2 aes_string geom_point geom_polygon ggplot guides +#' guide_legend scale_alpha_manual scale_color_manual scale_fill_brewer +#' scale_fill_manual +#' +#' @keywords internal +#' +#' @export +#' +SingleImagePlot <- function( + data, + col.by = NA, + col.factor = TRUE, + cols = NULL, + shuffle.cols = FALSE, + size = 0.1, + molecules = NULL, + mols.size = 0.1, + mols.cols = NULL, + mols.alpha = 1.0, + alpha = molecules %iff% 0.3 %||% 0.6, + border.color = 'white', + border.size = NULL, + na.value = 'grey50', + dark.background = TRUE, + ... +) { + # Check input data + if (!is_na(x = data)) { + if (!all(c('x', 'y', 'cell', 'boundary') %in% colnames(x = data))) { + stop("Invalid data coordinates") + } + if (!is_na(x = col.by)) { + if (!col.by %in% colnames(x = data)) { + warning( + "Cannot find 'col.by' ('", + col.by, + "') in data coordinates", + immediate. = TRUE + ) + col.by <- NA + } else if (isTRUE(x = col.factor) && !is.factor(x = data[[col.by]])) { + data[[col.by]] <- factor( + x = data[[col.by]], + levels = unique(x = data[[col.by]]) + ) + } else if (isFALSE(x = col.factor) && is.factor(x = data[[col.by]])) { + data[[col.by]] <- as.vector(x = data[[col.by]]) + } + } + if (is_na(x = col.by) && !is.null(x = cols)) { + col.by <- RandomName(length = 7L) + data[[col.by]] <- TRUE + } + if (!is.factor(x = data$boundary)) { + data$boundary <- factor( + x = data$boundary, + levels = unique(x = data$boundary) + ) + } + # Determine alphas + if (is.na(x = alpha)) { + alpha <- 1L + } + alpha.min <- ifelse( + test = alpha < 1L, + yes = 1 * (10 ^ .FindE(x = alpha)), + no = 0.1 + ) + if (alpha.min == alpha) { + alpha.min <- 1 * (10 ^ (.FindE(x = alpha) - 1)) + } + alphas <- .Cut( + min = alpha.min, + max = alpha, + n = length(x = levels(x = data$boundary)) + ) + } + # Assemble plot + plot <- ggplot( + data = data %NA% NULL, + mapping = aes_string( + x = 'y', + y = 'x', + alpha = 'boundary', + fill = col.by %NA% NULL + ) + ) + if (!is_na(x = data)) { + plot <- plot + + scale_alpha_manual(values = alphas) + + if (anyDuplicated(x = data$cell)) { + if (is.null(border.size)) { + border.size <- 0.3 + } + geom_polygon( + mapping = aes_string(group = 'cell'), + color = border.color, + size = border.size + ) + } else { + # Default to no borders when plotting centroids + if (is.null(border.size)) { + border.size <- 0.0 + } + geom_point( + shape = 21, + color = border.color, + stroke = border.size, + size = size + ) + } + if (!is.null(x = cols)) { + plot <- plot + if (is.numeric(x = cols) || cols[1L] %in% rownames(x = brewer.pal.info)) { + palette <- brewer.pal(n = length(x = levels(x = data[[col.by]])), cols) + if (length(palette) < length(x = levels(x = data[[col.by]]))) { + num.blank <- length(x = levels(x = data[[col.by]])) - length(palette) + palette <- c(palette, rep(na.value, num.blank)) + } + if (isTRUE(shuffle.cols)) { + palette <- sample(palette) + } + scale_fill_manual(values = palette, na.value = na.value) + } else if (cols[1] %in% DiscretePalette(n = NULL)) { + scale_fill_manual( + values = DiscretePalette( + n = length(x = levels(x = data[[col.by]])), + palette = cols, + shuffle = shuffle.cols + ), + na.value = na.value + ) + } else { + if (isTRUE(shuffle.cols)) { + cols <- sample(cols) + } + scale_fill_manual(values = cols, na.value = na.value) + } + } + if (length(x = levels(x = data$boundary)) == 1L) { + plot <- plot + guides(alpha = 'none') + } + # Adjust guides + if (isTRUE(x = col.factor) && length(x = levels(x = data[[col.by]])) <= 1L) { + plot <- plot + guides(fill = 'none') + } + } + # Add molecule sets + if (is.data.frame(x = molecules)) { + if (all(c('x', 'y', 'molecule') %in% colnames(x = molecules))) { + if (!is.factor(x = molecules$molecule)) { + molecules$molecule <- factor( + x = molecules$molecule, + levels = unique(x = molecules$molecule) + ) + } + plot <- plot + geom_point( + mapping = aes_string(fill = NULL, alpha = NULL, color = "molecule"), + data = molecules, + size = mols.size, + alpha = mols.alpha, + show.legend = c(color = TRUE, fill = FALSE, alpha = FALSE) + ) + + guides(color = guide_legend(override.aes = list(size = 3L))) + + scale_color_manual( + name = 'Molecules', + values = mols.cols %||% brewer.pal( + n = length(x = levels(x = molecules$molecule)), + name = "Set1" + ), + guide = guide_legend() + ) + } else { + warning("Invalid molecule coordinates", immediate. = TRUE) + } + } + + if (isTRUE(dark.background)) { + plot <- plot + DarkTheme() + } + return(plot) +} + # A single polygon plot # # @param data Data to plot @@ -7751,3 +8935,98 @@ Transform <- function(data, xlim = c(-Inf, Inf), ylim = c(-Inf, Inf)) { colnames(x = data) <- df.names return(data) } + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# S4 Methods +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +setMethod( + f = '.PrepImageData', + signature = c(data = 'data.frame', cells = 'rle'), + definition = function(data, cells, ...) { + data <- sapply( + X = colnames(x = data), + FUN = function(x) { + j <- data[[x]] + names(x = j) <- rownames(x = data) + return(.PrepImageData(data = j, cells = cells, name = x)) + }, + simplify = FALSE, + USE.NAMES = TRUE + ) + return(do.call(what = 'cbind', args = data)) + } +) + +#' @importFrom methods getMethod +setMethod( + f = '.PrepImageData', + signature = c(data = 'factor', cells = 'rle'), + definition = function(data, cells, name, ...) { + f <- getMethod( + f = '.PrepImageData', + signature = c(data = 'vector', cells = 'rle') + ) + return(f(data = data, cells = cells, name = name, ...)) + } +) + +setMethod( + f = '.PrepImageData', + signature = c(data = 'list', cells = 'rle'), + definition = function(data, cells, ...) { + .NotYetImplemented() + } +) + +setMethod( + f = '.PrepImageData', + signature = c(data = 'NULL', cells = 'rle'), + definition = function(data, cells, ...) { + return(SeuratObject::EmptyDF(n = sum(cells$lengths))) + } +) + +setMethod( + f = '.PrepImageData', + signature = c(data = 'vector', cells = 'rle'), + definition = function(data, cells, name, ...) { + name <- as.character(x = name) + if (name %in% c('x', 'y', 'cell')) { + stop("'name' cannot be 'x', 'y', or 'cell'", call. = FALSE) + } + cnames <- cells$values + if (is.null(x = names(x = data))) { + mlen <- min(sapply(X = list(data, cnames), FUN = length)) + names(x = data)[1:mlen] <- cnames[1:mlen] + } + if (anyDuplicated(x = names(x = data))) { + dup <- duplicated(x = names(x = data)) + warning( + sum(dup), + ifelse(test = sum(dup) == 1, yes = ' cell', no = ' cells'), + ' duplicated, using only the first occurance', + call. = FALSE, + immediate. = TRUE + ) + } + if (length(x = data) < length(x = cnames)) { + mcells <- setdiff(x = cnames, y = names(x = data)) + warning( + "Missing data for some cells, filling with NA", + call. = FALSE, + immediate. = TRUE + ) + data[mcells] <- NA + } else if (length(x = data) > length(x = cnames)) { + warning( + "More cells provided than present", + call. = FALSE, + immediate. = TRUE + ) + } + data <- data.frame(rep.int(x = data[cnames], times = cells$lengths)) + colnames(x = data) <- name + return(data) + } +) diff --git a/R/zzz.R b/R/zzz.R index 30a582801..49e317374 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,3 +1,7 @@ +#' @importFrom progressr progressor +#' +NULL + #' @section Package options: #' #' Seurat uses the following [options()] to configure behaviour: @@ -27,6 +31,10 @@ #' "_PACKAGE" +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Options +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + seurat_default_options <- list( Seurat.memsafe = FALSE, Seurat.warn.umap.uwot = TRUE, @@ -36,22 +44,24 @@ seurat_default_options <- list( Seurat.warn.vlnplot.split = TRUE ) -AttachDeps <- function(deps) { - for (d in deps) { - if (!paste0('package:', d) %in% search()) { - packageStartupMessage("Attaching ", d) - attachNamespace(ns = d) - } - } -} +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Hooks +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#' @importFrom SeuratObject AttachDeps +#' .onAttach <- function(libname, pkgname) { - AttachDeps(deps = 'SeuratObject') + AttachDeps(deps = c('SeuratObject')) + return(invisible(x = NULL)) } .onLoad <- function(libname, pkgname) { - op <- options() - toset <- !(names(x = seurat_default_options) %in% names(x = op)) - if (any(toset)) options(seurat_default_options[toset]) - invisible(x = NULL) + toset <- setdiff( + x = names(x = seurat_default_options), + y = names(x = options()) + ) + if (length(x = toset)) { + options(seurat_default_options[toset]) + } + return(invisible(x = NULL)) } diff --git a/README.md b/README.md index 9fa45122e..427ab3a20 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![CRAN Version](https://www.r-pkg.org/badges/version/Seurat)](https://cran.r-project.org/package=Seurat) [![CRAN Downloads](https://cranlogs.r-pkg.org/badges/Seurat)](https://cran.r-project.org/package=Seurat) -# Seurat v4.2.1 +# Seurat v4.3.0 Seurat is an R toolkit for single cell genomics, developed and maintained by the Satija Lab at NYGC. diff --git a/man/DiscretePalette.Rd b/man/DiscretePalette.Rd index fae0c2a7f..8bffab81f 100644 --- a/man/DiscretePalette.Rd +++ b/man/DiscretePalette.Rd @@ -2,16 +2,18 @@ % Please edit documentation in R/visualization.R \name{DiscretePalette} \alias{DiscretePalette} -\title{Discrete colour palettes from the pals package} +\title{Discrete colour palettes from pals} \usage{ -DiscretePalette(n, palette = NULL) +DiscretePalette(n, palette = NULL, shuffle = FALSE) } \arguments{ \item{n}{Number of colours to be generated.} \item{palette}{Options are -"alphabet", "alphabet2", "glasbey", "polychrome", and "stepped". +"alphabet", "alphabet2", "glasbey", "polychrome", "stepped", and "parade". Can be omitted and the function will use the one based on the requested n.} + +\item{shuffle}{Shuffle the colors in the selected palette.} } \value{ A vector of colors diff --git a/man/ImageDimPlot.Rd b/man/ImageDimPlot.Rd new file mode 100644 index 000000000..ce3d3fd5d --- /dev/null +++ b/man/ImageDimPlot.Rd @@ -0,0 +1,106 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/visualization.R +\name{ImageDimPlot} +\alias{ImageDimPlot} +\title{Spatial Cluster Plots} +\usage{ +ImageDimPlot( + object, + fov = NULL, + boundaries = NULL, + group.by = NULL, + split.by = NULL, + cols = NULL, + shuffle.cols = FALSE, + size = 0.5, + molecules = NULL, + mols.size = 0.1, + mols.cols = NULL, + mols.alpha = 1, + nmols = 1000, + alpha = 1, + border.color = "white", + border.size = NULL, + na.value = "grey50", + dark.background = TRUE, + crop = FALSE, + cells = NULL, + overlap = FALSE, + axes = FALSE, + combine = TRUE, + coord.fixed = TRUE +) +} +\arguments{ +\item{object}{A \code{\link[SeuratObject]{Seurat}} object} + +\item{fov}{Name of FOV to plot} + +\item{boundaries}{A vector of segmentation boundaries per image to plot; +can be a character vector, a named character vector, or a named list. +Names should be the names of FOVs and values should be the names of +segmentation boundaries} + +\item{group.by}{Name of one or more metadata columns to group (color) cells by +(for example, orig.ident); pass 'ident' to group by identity class} + +\item{split.by}{Name of a metadata column to split plot by; +see \code{\link{FetchData}} for more details} + +\item{cols}{Vector of colors, each color corresponds to an identity class. This may also be a single character +or numeric value corresponding to a palette as specified by \code{\link[RColorBrewer]{brewer.pal.info}}. +By default, ggplot2 assigns colors. We also include a number of palettes from the pals package. +See \code{\link{DiscretePalette}} for details.} + +\item{shuffle.cols}{Randomly shuffle colors when a palette or +vector of colors is provided to \code{cols}} + +\item{size}{Point size for cells when plotting centroids} + +\item{molecules}{A vector of molecules to plot} + +\item{mols.size}{Point size for molecules} + +\item{mols.cols}{A vector of color for molecules. The "Set1" palette from +RColorBrewer is used by default.} + +\item{mols.alpha}{Alpha value for molecules, should be between 0 and 1} + +\item{nmols}{Max number of each molecule specified in `molecules` to plot} + +\item{alpha}{Alpha value, should be between 0 and 1; when plotting multiple +boundaries, \code{alpha} is equivalent to max alpha} + +\item{border.color}{Color of cell segmentation border; pass \code{NA} +to suppress borders for segmentation-based plots} + +\item{border.size}{Thickness of cell segmentation borders; pass \code{NA} +to suppress borders for centroid-based plots} + +\item{na.value}{Color value for NA points when using custom scale} + +\item{dark.background}{Set plot background to black} + +\item{crop}{Crop the plots to area with cells only} + +\item{cells}{Vector of cells to plot (default is all cells)} + +\item{overlap}{Overlay boundaries from a single image to create a single +plot; if \code{TRUE}, then boundaries are stacked in the order they're +given (first is lowest)} + +\item{axes}{Keep axes and panel background} + +\item{combine}{Combine plots into a single +\code{patchwork} ggplot object.If \code{FALSE}, +return a list of ggplot objects} + +\item{coord.fixed}{Plot cartesian coordinates with fixed aspect ratio} +} +\value{ +If \code{combine = TRUE}, a \code{patchwork} +ggplot object; otherwise, a list of ggplot objects +} +\description{ +Visualize clusters or other categorical groupings in a spatial context +} diff --git a/man/ImageFeaturePlot.Rd b/man/ImageFeaturePlot.Rd new file mode 100644 index 000000000..9a91337eb --- /dev/null +++ b/man/ImageFeaturePlot.Rd @@ -0,0 +1,132 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/visualization.R +\name{ImageFeaturePlot} +\alias{ImageFeaturePlot} +\title{Spatial Feature Plots} +\usage{ +ImageFeaturePlot( + object, + features, + fov = NULL, + boundaries = NULL, + cols = if (isTRUE(x = blend)) { + c("lightgrey", "#ff0000", "#00ff00") + } else { + + c("lightgrey", "firebrick1") + }, + size = 0.5, + min.cutoff = NA, + max.cutoff = NA, + split.by = NULL, + molecules = NULL, + mols.size = 0.1, + mols.cols = NULL, + nmols = 1000, + alpha = 1, + border.color = "white", + border.size = NULL, + dark.background = TRUE, + blend = FALSE, + blend.threshold = 0.5, + crop = FALSE, + cells = NULL, + scale = c("feature", "all", "none"), + overlap = FALSE, + axes = FALSE, + combine = TRUE, + coord.fixed = TRUE +) +} +\arguments{ +\item{object}{Seurat object} + +\item{features}{Vector of features to plot. Features can come from: +\itemize{ + \item An \code{Assay} feature (e.g. a gene name - "MS4A1") + \item A column name from meta.data (e.g. mitochondrial percentage - "percent.mito") + \item A column name from a \code{DimReduc} object corresponding to the cell embedding values + (e.g. the PC 1 scores - "PC_1") +}} + +\item{fov}{Name of FOV to plot} + +\item{boundaries}{A vector of segmentation boundaries per image to plot; +can be a character vector, a named character vector, or a named list. +Names should be the names of FOVs and values should be the names of +segmentation boundaries} + +\item{cols}{The two colors to form the gradient over. Provide as string vector with +the first color corresponding to low values, the second to high. Also accepts a Brewer +color scale or vector of colors. Note: this will bin the data into number of colors provided. +When blend is \code{TRUE}, takes anywhere from 1-3 colors: +\describe{ + \item{1 color:}{Treated as color for double-negatives, will use default colors 2 and 3 for per-feature expression} + \item{2 colors:}{Treated as colors for per-feature expression, will use default color 1 for double-negatives} + \item{3+ colors:}{First color used for double-negatives, colors 2 and 3 used for per-feature expression, all others ignored} +}} + +\item{size}{Point size for cells when plotting centroids} + +\item{min.cutoff, max.cutoff}{Vector of minimum and maximum cutoff values for each feature, +may specify quantile in the form of 'q##' where '##' is the quantile (eg, 'q1', 'q10')} + +\item{split.by}{A factor in object metadata to split the feature plot by, pass 'ident' +to split by cell identity'; similar to the old \code{FeatureHeatmap}} + +\item{molecules}{A vector of molecules to plot} + +\item{mols.size}{Point size for molecules} + +\item{mols.cols}{A vector of color for molecules. The "Set1" palette from +RColorBrewer is used by default.} + +\item{nmols}{Max number of each molecule specified in `molecules` to plot} + +\item{alpha}{Alpha value, should be between 0 and 1; when plotting multiple +boundaries, \code{alpha} is equivalent to max alpha} + +\item{border.color}{Color of cell segmentation border; pass \code{NA} +to suppress borders for segmentation-based plots} + +\item{border.size}{Thickness of cell segmentation borders; pass \code{NA} +to suppress borders for centroid-based plots} + +\item{dark.background}{Set plot background to black} + +\item{blend}{Scale and blend expression values to visualize coexpression of two features} + +\item{blend.threshold}{The color cutoff from weak signal to strong signal; ranges from 0 to 1.} + +\item{crop}{Crop the plots to area with cells only} + +\item{cells}{Vector of cells to plot (default is all cells)} + +\item{scale}{Set color scaling across multiple plots; choose from: +\itemize{ + \item \dQuote{\code{feature}}: Plots per-feature are scaled across splits + \item \dQuote{\code{all}}: Plots per-feature are scaled across all features + \item \dQuote{\code{none}}: Plots are not scaled; \strong{note}: setting + \code{scale} to \dQuote{\code{none}} will result in color scales that are + \emph{not} comparable between plots +} +Ignored if \code{blend = TRUE}} + +\item{overlap}{Overlay boundaries from a single image to create a single +plot; if \code{TRUE}, then boundaries are stacked in the order they're +given (first is lowest)} + +\item{axes}{Keep axes and panel background} + +\item{combine}{Combine plots into a single \code{\link[patchwork]{patchwork}ed} +ggplot object. If \code{FALSE}, return a list of ggplot objects} + +\item{coord.fixed}{Plot cartesian coordinates with fixed aspect ratio} +} +\value{ +If \code{combine = TRUE}, a \code{patchwork} +ggplot object; otherwise, a list of ggplot objects +} +\description{ +Visualize expression in a spatial context +} diff --git a/man/PolyFeaturePlot.Rd b/man/PolyFeaturePlot.Rd index a2b2fc588..59a75466d 100644 --- a/man/PolyFeaturePlot.Rd +++ b/man/PolyFeaturePlot.Rd @@ -44,7 +44,8 @@ may specify quantile in the form of 'q##' where '##' is the quantile (eg, 'q1', Returns a ggplot object } \description{ -Plot cells as polygons, rather than single points. Color cells by any value accessible by \code{\link{FetchData}}. +Plot cells as polygons, rather than single points. Color cells by any value +accessible by \code{\link{FetchData}}. } \concept{spatial} \concept{visualization} diff --git a/man/ReadAkoya.Rd b/man/ReadAkoya.Rd new file mode 100644 index 000000000..adf300e57 --- /dev/null +++ b/man/ReadAkoya.Rd @@ -0,0 +1,87 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/preprocessing.R, R/convenience.R +\name{ReadAkoya} +\alias{ReadAkoya} +\alias{LoadAkoya} +\title{Read and Load Akoya CODEX data} +\usage{ +ReadAkoya( + filename, + type = c("inform", "processor", "qupath"), + filter = "DAPI|Blank|Empty", + inform.quant = c("mean", "total", "min", "max", "std") +) + +LoadAkoya( + filename, + type = c("inform", "processor", "qupath"), + fov, + assay = "Akoya", + ... +) +} +\arguments{ +\item{filename}{Path to matrix generated by upstream processing.} + +\item{type}{Specify which type matrix is being provided. +\itemize{ + \item \dQuote{\code{processor}}: matrix generated by CODEX Processor + \item \dQuote{\code{inform}}: matrix generated by inForm + \item \dQuote{\code{qupath}}: matrix generated by QuPath +}} + +\item{filter}{A pattern to filter features by; pass \code{NA} to +skip feature filtering} + +\item{inform.quant}{When \code{type} is \dQuote{\code{inform}}, the +quantification level to read in} + +\item{fov}{Name to store FOV as} + +\item{assay}{Name to store expression matrix as} + +\item{...}{ + Arguments passed on to \code{\link[=ReadAkoya]{ReadAkoya}} + \describe{ + \item{\code{}}{} + }} +} +\value{ +\code{ReadAkoya}: A list with some combination of the following values +\itemize{ + \item \dQuote{\code{matrix}}: a + \link[Matrix:dgCMatrix-class]{sparse matrix} with expression data; cells + are columns and features are rows + \item \dQuote{\code{centroids}}: a data frame with cell centroid + coordinates in three columns: \dQuote{x}, \dQuote{y}, and \dQuote{cell} + \item \dQuote{\code{metadata}}: a data frame with cell-level meta data; + includes all columns in \code{filename} that aren't in + \dQuote{\code{matrix}} or \dQuote{\code{centroids}} +} +When \code{type} is \dQuote{\code{inform}}, additional expression matrices +are returned and named using their segmentation type (eg. +\dQuote{nucleus}, \dQuote{membrane}). The \dQuote{Entire Cell} segmentation +type is returned in the \dQuote{\code{matrix}} entry of the list + +\code{LoadAkoya}: A \code{\link[SeuratObject]{Seurat}} object +} +\description{ +Read and Load Akoya CODEX data +} +\note{ +This function requires the +\href{https://cran.r-project.org/package=data.table}{\pkg{data.table}} package +to be installed +} +\section{Progress Updates with \pkg{progressr}}{ + +This function uses +\href{https://cran.r-project.org/package=progressr}{\pkg{progressr}} to +render status updates and progress bars. To enable progress updates, wrap +the function call in \code{\link[progressr]{with_progress}} or run +\code{\link[progressr:handlers]{handlers(global = TRUE)}} before running +this function. For more details about \pkg{progressr}, please read +\href{https://progressr.futureverse.org/articles/progressr-intro.html}{\code{vignette("progressr-intro")}} +} + +\concept{preprocessing} diff --git a/man/ReadNanostring.Rd b/man/ReadNanostring.Rd new file mode 100644 index 000000000..bbdc764f4 --- /dev/null +++ b/man/ReadNanostring.Rd @@ -0,0 +1,139 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/preprocessing.R, R/convenience.R +\name{ReadNanostring} +\alias{ReadNanostring} +\alias{LoadNanostring} +\title{Read and Load Nanostring SMI data} +\usage{ +ReadNanostring( + data.dir, + mtx.file = NULL, + metadata.file = NULL, + molecules.file = NULL, + segmentations.file = NULL, + type = "centroids", + mol.type = "pixels", + metadata = NULL, + mols.filter = NA_character_, + genes.filter = NA_character_, + fov.filter = NULL, + subset.counts.matrix = NULL, + cell.mols.only = TRUE +) + +LoadNanostring(data.dir, fov, assay = "Nanostring") +} +\arguments{ +\item{data.dir}{Path to folder containing Nanostring SMI outputs} + +\item{mtx.file}{Path to Nanostring cell x gene matrix CSV} + +\item{metadata.file}{Contains metadata including cell center, area, +and stain intensities} + +\item{molecules.file}{Path to molecules file} + +\item{segmentations.file}{Path to segmentations CSV} + +\item{type}{Type of cell spatial coordinate matrices to read; choose one +or more of: +\itemize{ + \item \dQuote{centroids}: cell centroids in pixel coordinate space + \item \dQuote{segmentations}: cell segmentations in pixel coordinate space +}} + +\item{mol.type}{Type of molecule spatial coordinate matrices to read; +choose one or more of: +\itemize{ + \item \dQuote{pixels}: molecule coordinates in pixel space +}} + +\item{metadata}{Type of available metadata to read; +choose zero or more of: +\itemize{ + \item \dQuote{Area}: number of pixels in cell segmentation + \item \dQuote{fov}: cell's fov + \item \dQuote{Mean.MembraneStain}: mean membrane stain intensity + \item \dQuote{Mean.DAPI}: mean DAPI stain intensity + \item \dQuote{Mean.G}: mean green channel stain intensity + \item \dQuote{Mean.Y}: mean yellow channel stain intensity + \item \dQuote{Mean.R}: mean red channel stain intensity + \item \dQuote{Max.MembraneStain}: max membrane stain intensity + \item \dQuote{Max.DAPI}: max DAPI stain intensity + \item \dQuote{Max.G}: max green channel stain intensity + \item \dQuote{Max.Y}: max yellow stain intensity + \item \dQuote{Max.R}: max red stain intensity +}} + +\item{mols.filter}{Filter molecules that match provided string} + +\item{genes.filter}{Filter genes from cell x gene matrix that match +provided string} + +\item{fov.filter}{Only load in select FOVs. Nanostring SMI data contains +30 total FOVs.} + +\item{subset.counts.matrix}{If the counts matrix should be built from +molecule coordinates for a specific segmentation; One of: +\itemize{ + \item \dQuote{Nuclear}: nuclear segmentations + \item \dQuote{Cytoplasm}: cell cytoplasm segmentations + \item \dQuote{Membrane}: cell membrane segmentations +}} + +\item{cell.mols.only}{If TRUE, only load molecules within a cell} + +\item{fov}{Name to store FOV as} + +\item{assay}{Name to store expression matrix as} +} +\value{ +\code{ReadNanostring}: A list with some combination of the +following values: +\itemize{ + \item \dQuote{\code{matrix}}: a + \link[Matrix:dgCMatrix-class]{sparse matrix} with expression data; cells + are columns and features are rows + \item \dQuote{\code{centroids}}: a data frame with cell centroid + coordinates in three columns: \dQuote{x}, \dQuote{y}, and \dQuote{cell} + \item \dQuote{\code{pixels}}: a data frame with molecule pixel coordinates + in three columns: \dQuote{x}, \dQuote{y}, and \dQuote{gene} +} + +\code{LoadNanostring}: A \code{\link[SeuratObject]{Seurat}} object +} +\description{ +Read and Load Nanostring SMI data +} +\note{ +This function requires the +\href{https://cran.r-project.org/package=data.table}{\pkg{data.table}} package +to be installed +} +\section{Progress Updates with \pkg{progressr}}{ + +This function uses +\href{https://cran.r-project.org/package=progressr}{\pkg{progressr}} to +render status updates and progress bars. To enable progress updates, wrap +the function call in \code{\link[progressr]{with_progress}} or run +\code{\link[progressr:handlers]{handlers(global = TRUE)}} before running +this function. For more details about \pkg{progressr}, please read +\href{https://progressr.futureverse.org/articles/progressr-intro.html}{\code{vignette("progressr-intro")}} +} + +\section{Parallelization with \pkg{future}}{ + +This function uses +\href{https://cran.r-project.org/package=future}{\pkg{future}} to enable +parallelization. Parallelization strategies can be set using +\code{\link[future]{plan}}. Common plans include \dQuote{\code{sequential}} +for non-parallelized processing or \dQuote{\code{multisession}} for parallel +evaluation using multiple \R sessions; for other plans, see the +\dQuote{Implemented evaluation strategies} section of +\code{\link[future:plan]{?future::plan}}. For a more thorough introduction +to \pkg{future}, see +\href{https://future.futureverse.org/articles/future-1-overview.html}{\code{vignette("future-1-overview")}} +} + +\concept{future} +\concept{preprocessing} diff --git a/man/ReadVitessce.Rd b/man/ReadVitessce.Rd new file mode 100644 index 000000000..6558ac87c --- /dev/null +++ b/man/ReadVitessce.Rd @@ -0,0 +1,104 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/preprocessing.R, R/convenience.R +\name{ReadVitessce} +\alias{ReadVitessce} +\alias{LoadHuBMAPCODEX} +\title{Read Data From Vitessce} +\usage{ +ReadVitessce( + counts = NULL, + coords = NULL, + molecules = NULL, + type = c("segmentations", "centroids"), + filter = NA_character_ +) + +LoadHuBMAPCODEX(data.dir, fov, assay = "CODEX") +} +\arguments{ +\item{counts}{Path or URL to a Vitessce-formatted JSON file with +expression data; should end in \dQuote{\code{.genes.json}} or +\dQuote{\code{.clusters.json}}; pass \code{NULL} to skip} + +\item{coords}{Path or URL to a Vitessce-formatted JSON file with cell/spot +spatial coordinates; should end in \dQuote{\code{.cells.json}}; +pass \code{NULL} to skip} + +\item{molecules}{Path or URL to a Vitessce-formatted JSON file with molecule +spatial coordinates; should end in \dQuote{\code{.molecules.json}}; +pass \code{NULL} to skip} + +\item{type}{Type of cell/spot spatial coordinates to return, +choose one or more from: +\itemize{ + \item \dQuote{segmentations} cell/spot segmentations + \item \dQuote{centroids} cell/spot centroids +}} + +\item{filter}{A character to filter molecules by, pass \code{NA} to skip +molecule filtering} + +\item{data.dir}{Path to a directory containing Vitessce cells +and clusters JSONs} + +\item{fov}{Name to store FOV as} + +\item{assay}{Name to store expression matrix as} +} +\value{ +\code{ReadVitessce}: A list with some combination of the +following values: +\itemize{ + \item \dQuote{\code{counts}}: if \code{counts} is not \code{NULL}, an + expression matrix with cells as columns and features as rows + \item \dQuote{\code{centroids}}: if \code{coords} is not \code{NULL} and + \code{type} is contains\dQuote{centroids}, a data frame with cell centroids + in three columns: \dQuote{x}, \dQuote{y}, and \dQuote{cell} + \item \dQuote{\code{segmentations}}: if \code{coords} is not \code{NULL} and + \code{type} contains \dQuote{centroids}, a data frame with cell + segmentations in three columns: \dQuote{x}, \dQuote{y} and \dQuote{cell} + \item \dQuote{\code{molecules}}: if \code{molecules} is not \code{NULL}, a + data frame with molecule spatial coordinates in three columns: \dQuote{x}, + \dQuote{y}, and \dQuote{gene} +} + +\code{LoadHuBMAPCODEX}: A \code{\link[SeuratObject]{Seurat}} object +} +\description{ +Read in data from Vitessce-formatted JSON files +} +\note{ +This function requires the +\href{https://cran.r-project.org/package=jsonlite}{\pkg{jsonlite}} package +to be installed +} +\section{Progress Updates with \pkg{progressr}}{ + +This function uses +\href{https://cran.r-project.org/package=progressr}{\pkg{progressr}} to +render status updates and progress bars. To enable progress updates, wrap +the function call in \code{\link[progressr]{with_progress}} or run +\code{\link[progressr:handlers]{handlers(global = TRUE)}} before running +this function. For more details about \pkg{progressr}, please read +\href{https://progressr.futureverse.org/articles/progressr-intro.html}{\code{vignette("progressr-intro")}} +} + +\examples{ +\dontrun{ +coords <- ReadVitessce( + counts = + "https://s3.amazonaws.com/vitessce-data/0.0.31/master_release/wang/wang.genes.json", + coords = + "https://s3.amazonaws.com/vitessce-data/0.0.31/master_release/wang/wang.cells.json", + molecules = + "https://s3.amazonaws.com/vitessce-data/0.0.31/master_release/wang/wang.molecules.json" +) +names(coords) +coords$counts[1:10, 1:10] +head(coords$centroids) +head(coords$segmentations) +head(coords$molecules) +} + +} +\concept{preprocessing} diff --git a/man/ReadVizgen.Rd b/man/ReadVizgen.Rd new file mode 100644 index 000000000..443a04555 --- /dev/null +++ b/man/ReadVizgen.Rd @@ -0,0 +1,136 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/preprocessing.R, R/convenience.R +\name{ReadVizgen} +\alias{ReadVizgen} +\alias{LoadVizgen} +\title{Read and Load MERFISH Input from Vizgen} +\usage{ +ReadVizgen( + data.dir, + transcripts = NULL, + spatial = NULL, + molecules = NULL, + type = "segmentations", + mol.type = "microns", + metadata = NULL, + filter = NA_character_, + z = 3L +) + +LoadVizgen(data.dir, fov, assay = "Vizgen", z = 3L) +} +\arguments{ +\item{data.dir}{Path to the directory with Vizgen MERFISH files; requires at +least one of the following files present: +\itemize{ + \item \dQuote{\code{cell_by_gene.csv}}: used for reading count matrix + \item \dQuote{\code{cell_metadata.csv}}: used for reading cell spatial + coordinate matrices + \item \dQuote{\code{detected_transcripts.csv}}: used for reading molecule + spatial coordinate matrices +}} + +\item{transcripts}{Optional file path for counts matrix; pass \code{NA} to +suppress reading counts matrix} + +\item{spatial}{Optional file path for spatial metadata; pass \code{NA} to +suppress reading spatial coordinates. If \code{spatial} is provided and +\code{type} is \dQuote{segmentations}, uses \code{dirname(spatial)} instead of +\code{data.dir} to find HDF5 files} + +\item{molecules}{Optional file path for molecule coordinates file; pass +\code{NA} to suppress reading spatial molecule information} + +\item{type}{Type of cell spatial coordinate matrices to read; choose one +or more of: +\itemize{ + \item \dQuote{segmentations}: cell segmentation vertices; requires + \href{https://cran.r-project.org/package=hdf5r}{\pkg{hdf5r}} to be + installed and requires a directory \dQuote{\code{cell_boundaries}} within + \code{data.dir}. Within \dQuote{\code{cell_boundaries}}, there must be + one or more HDF5 file named \dQuote{\code{feature_data_##.hdf5}} + \item \dQuote{centroids}: cell centroids in micron coordinate space + \item \dQuote{boxes}: cell box outlines in micron coordinate space +}} + +\item{mol.type}{Type of molecule spatial coordinate matrices to read; +choose one or more of: +\itemize{ + \item \dQuote{pixels}: molecule coordinates in pixel space + \item \dQuote{microns}: molecule coordinates in micron space +}} + +\item{metadata}{Type of available metadata to read; +choose zero or more of: +\itemize{ + \item \dQuote{volume}: estimated cell volume + \item \dQuote{fov}: cell's fov +}} + +\item{filter}{A character to filter molecules by, pass \code{NA} to skip +molecule filtering} + +\item{z}{Z-index to load; must be between 0 and 6, inclusive} + +\item{fov}{Name to store FOV as} + +\item{assay}{Name to store expression matrix as} +} +\value{ +\code{ReadVizgen}: A list with some combination of the +following values: +\itemize{ + \item \dQuote{\code{transcripts}}: a + \link[Matrix:dgCMatrix-class]{sparse matrix} with expression data; cells + are columns and features are rows + \item \dQuote{\code{segmentations}}: a data frame with cell polygon outlines in + three columns: \dQuote{x}, \dQuote{y}, and \dQuote{cell} + \item \dQuote{\code{centroids}}: a data frame with cell centroid + coordinates in three columns: \dQuote{x}, \dQuote{y}, and \dQuote{cell} + \item \dQuote{\code{boxes}}: a data frame with cell box outlines in three + columns: \dQuote{x}, \dQuote{y}, and \dQuote{cell} + \item \dQuote{\code{microns}}: a data frame with molecule micron + coordinates in three columns: \dQuote{x}, \dQuote{y}, and \dQuote{gene} + \item \dQuote{\code{pixels}}: a data frame with molecule pixel coordinates + in three columns: \dQuote{x}, \dQuote{y}, and \dQuote{gene} + \item \dQuote{\code{metadata}}: a data frame with the cell-level metadata + requested by \code{metadata} +} + +\code{LoadVizgen}: A \code{\link[SeuratObject]{Seurat}} object +} +\description{ +Read and load in MERFISH data from Vizgen-formatted files +} +\note{ +This function requires the +\href{https://cran.r-project.org/package=data.table}{\pkg{data.table}} package +to be installed +} +\section{Progress Updates with \pkg{progressr}}{ + +This function uses +\href{https://cran.r-project.org/package=progressr}{\pkg{progressr}} to +render status updates and progress bars. To enable progress updates, wrap +the function call in \code{\link[progressr]{with_progress}} or run +\code{\link[progressr:handlers]{handlers(global = TRUE)}} before running +this function. For more details about \pkg{progressr}, please read +\href{https://progressr.futureverse.org/articles/progressr-intro.html}{\code{vignette("progressr-intro")}} +} + +\section{Parallelization with \pkg{future}}{ + +This function uses +\href{https://cran.r-project.org/package=future}{\pkg{future}} to enable +parallelization. Parallelization strategies can be set using +\code{\link[future]{plan}}. Common plans include \dQuote{\code{sequential}} +for non-parallelized processing or \dQuote{\code{multisession}} for parallel +evaluation using multiple \R sessions; for other plans, see the +\dQuote{Implemented evaluation strategies} section of +\code{\link[future:plan]{?future::plan}}. For a more thorough introduction +to \pkg{future}, see +\href{https://future.futureverse.org/articles/future-1-overview.html}{\code{vignette("future-1-overview")}} +} + +\concept{future} +\concept{preprocessing} diff --git a/man/ReadXenium.Rd b/man/ReadXenium.Rd new file mode 100644 index 000000000..df86aa864 --- /dev/null +++ b/man/ReadXenium.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/convenience.R, R/preprocessing.R +\name{LoadXenium} +\alias{LoadXenium} +\alias{ReadXenium} +\title{Read and Load 10x Genomics Xenium in-situ data} +\usage{ +LoadXenium(data.dir, fov = "fov", assay = "Xenium") + +ReadXenium( + data.dir, + outs = c("matrix", "microns"), + type = "centroids", + mols.qv.threshold = 20 +) +} +\arguments{ +\item{data.dir}{Directory containing all Xenium output files with +default filenames} + +\item{fov}{FOV name} + +\item{assay}{Assay name} + +\item{outs}{Types of molecular outputs to read; choose one or more of: +\itemize{ + \item \dQuote{matrix}: the counts matrix + \item \dQuote{microns}: molecule coordinates +}} + +\item{type}{Type of cell spatial coordinate matrices to read; choose one +or more of: +\itemize{ + \item \dQuote{centroids}: cell centroids in pixel coordinate space + \item \dQuote{segmentations}: cell segmentations in pixel coordinate space +}} + +\item{mols.qv.threshold}{Remove transcript molecules with +a QV less than this threshold. QV >= 20 is the standard threshold +used to construct the cell x gene count matrix.} +} +\value{ +\code{LoadXenium}: A \code{\link[SeuratObject]{Seurat}} object + +\code{ReadXenium}: A list with some combination of the +following values: +\itemize{ + \item \dQuote{\code{matrix}}: a + \link[Matrix:dgCMatrix-class]{sparse matrix} with expression data; cells + are columns and features are rows + \item \dQuote{\code{centroids}}: a data frame with cell centroid + coordinates in three columns: \dQuote{x}, \dQuote{y}, and \dQuote{cell} + \item \dQuote{\code{pixels}}: a data frame with molecule pixel coordinates + in three columns: \dQuote{x}, \dQuote{y}, and \dQuote{gene} +} +} +\description{ +Read and Load 10x Genomics Xenium in-situ data +} +\concept{preprocessing} diff --git a/man/SingleImagePlot.Rd b/man/SingleImagePlot.Rd new file mode 100644 index 000000000..dc4030b61 --- /dev/null +++ b/man/SingleImagePlot.Rd @@ -0,0 +1,98 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/visualization.R +\name{SingleImagePlot} +\alias{SingleImagePlot} +\title{Single Spatial Plot} +\usage{ +SingleImagePlot( + data, + col.by = NA, + col.factor = TRUE, + cols = NULL, + shuffle.cols = FALSE, + size = 0.1, + molecules = NULL, + mols.size = 0.1, + mols.cols = NULL, + mols.alpha = 1, + alpha = molecules \%iff\% 0.3 \%||\% 0.6, + border.color = "white", + border.size = NULL, + na.value = "grey50", + dark.background = TRUE, + ... +) +} +\arguments{ +\item{data}{A data frame with at least the following columns: +\itemize{ + \item \dQuote{\code{x}}: Spatial-resolved \emph{x} coordinates, will be + plotted on the \emph{y}-axis + \item \dQuote{\code{y}}: Spatially-resolved \emph{y} coordinates, will be + plotted on the \emph{x}-axis + \item \dQuote{\code{cell}}: Cell name + \item \dQuote{\code{boundary}}: Segmentation boundary label; when plotting + multiple segmentation layers, the order of boundary transparency is set by + factor levels for this column +} +Can pass \code{NA} to \code{data} suppress segmentation visualization} + +\item{col.by}{Name of column in \code{data} to color cell segmentations by; +pass \code{NA} to suppress coloring} + +\item{col.factor}{Are the colors a factor or discrete?} + +\item{cols}{Colors for cell segmentations; can be one of the +following: +\itemize{ + \item \code{NULL} for default ggplot2 colors + \item A numeric value or name of a + \link[RColorBrewer:RColorBrewer]{color brewer palette} + \item Name of a palette for \code{\link{DiscretePalette}} + \item A vector of colors equal to the length of unique levels of + \code{data$col.by} +}} + +\item{shuffle.cols}{Randomly shuffle colors when a palette or +vector of colors is provided to \code{cols}} + +\item{size}{Point size for cells when plotting centroids} + +\item{molecules}{A data frame with spatially-resolved molecule coordinates; +should have the following columns: +\itemize{ + \item \dQuote{\code{x}}: Spatial-resolved \emph{x} coordinates, will be + plotted on the \emph{y}-axis + \item \dQuote{\code{y}}: Spatially-resolved \emph{y} coordinates, will be + plotted on the \emph{x}-axis + \item \dQuote{\code{molecule}}: Molecule name +}} + +\item{mols.size}{Point size for molecules} + +\item{mols.cols}{A vector of color for molecules. The "Set1" palette from +RColorBrewer is used by default.} + +\item{mols.alpha}{Alpha value for molecules, should be between 0 and 1} + +\item{alpha}{Alpha value, should be between 0 and 1; when plotting multiple +boundaries, \code{alpha} is equivalent to max alpha} + +\item{border.color}{Color of cell segmentation border; pass \code{NA} +to suppress borders for segmentation-based plots} + +\item{border.size}{Thickness of cell segmentation borders; pass \code{NA} +to suppress borders for centroid-based plots} + +\item{na.value}{Color value for \code{NA} segmentations when +using custom scale} + +\item{...}{Ignored} +} +\value{ +A ggplot object +} +\description{ +Single Spatial Plot +} +\keyword{internal} diff --git a/man/SpatialPlot.Rd b/man/SpatialPlot.Rd index 7ffff836a..133501e2a 100644 --- a/man/SpatialPlot.Rd +++ b/man/SpatialPlot.Rd @@ -97,7 +97,7 @@ default, ggplot2 assigns colors} \item{image.alpha}{Adjust the opacity of the background images. Set to 0 to remove.} -\item{crop}{Crop the plot in to focus on points plotted. Set to FALSE to show +\item{crop}{Crop the plot in to focus on points plotted. Set to \code{FALSE} to show entire background image.} \item{slot}{If plotting a feature, which data slot to pull from (counts, diff --git a/man/VizDimLoadings.Rd b/man/VizDimLoadings.Rd index fe90f7368..eed04f3d0 100644 --- a/man/VizDimLoadings.Rd +++ b/man/VizDimLoadings.Rd @@ -35,11 +35,11 @@ FALSE (default), returns the top genes ranked by the scores absolute values} \item{ncol}{Number of columns to display} -\item{combine}{Combine plots into a single \code{\link[patchwork]{patchwork}ed} +\item{combine}{Combine plots into a single \code{patchwork} ggplot object. If \code{FALSE}, return a list of ggplot objects} } \value{ -A \code{\link[patchwork]{patchwork}ed} ggplot object if +A \code{patchwork} ggplot object if \code{combine = TRUE}; otherwise, a list of ggplot objects } \description{ diff --git a/man/fortify-Spatial.Rd b/man/fortify-Spatial.Rd new file mode 100644 index 000000000..e2d13bcf8 --- /dev/null +++ b/man/fortify-Spatial.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/visualization.R +\name{fortify-Spatial} +\alias{fortify-Spatial} +\alias{fortify.Centroids} +\alias{fortify} +\alias{fortify.Molecules} +\alias{fortify.Segmentation} +\title{Prepare Coordinates for Spatial Plots} +\usage{ +\method{fortify}{Centroids}(model, data, ...) + +\method{fortify}{Molecules}(model, data, nmols = NULL, seed = NA_integer_, ...) + +\method{fortify}{Segmentation}(model, data, ...) +} +\arguments{ +\item{model}{A \code{\linkS4class{Segmentation}}, +\code{\linkS4class{Centroids}}, +or \code{\linkS4class{Molecules}} object} + +\item{data}{Extra data to be used for annotating the cell segmentations; the +easiest way to pass data is a one-column +\code{\link[base:data.frame]{data frame}} with the values to color by and +the cell names are rownames} + +\item{...}{Arguments passed to other methods} +} +\description{ +Prepare Coordinates for Spatial Plots +} +\keyword{internal} diff --git a/man/reexports.Rd b/man/reexports.Rd index 4e5b1716e..e94d58954 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -3,6 +3,8 @@ \docType{import} \name{reexports} \alias{reexports} +\alias{\%||\%} +\alias{\%iff\%} \alias{AddMetaData} \alias{as.Graph} \alias{as.Neighbor} @@ -68,6 +70,6 @@ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ - \item{SeuratObject}{\code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject]{Index}}, \code{\link[SeuratObject:Index]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}} + \item{SeuratObject}{\code{\link[SeuratObject:set-if-null]{\%||\%}}, \code{\link[SeuratObject:set-if-null]{\%iff\%}}, \code{\link[SeuratObject]{AddMetaData}}, \code{\link[SeuratObject]{as.Graph}}, \code{\link[SeuratObject]{as.Neighbor}}, \code{\link[SeuratObject]{as.Seurat}}, \code{\link[SeuratObject]{as.sparse}}, \code{\link[SeuratObject:ObjectAccess]{Assays}}, \code{\link[SeuratObject]{Cells}}, \code{\link[SeuratObject]{CellsByIdentities}}, \code{\link[SeuratObject]{Command}}, \code{\link[SeuratObject]{CreateAssayObject}}, \code{\link[SeuratObject]{CreateDimReducObject}}, \code{\link[SeuratObject]{CreateSeuratObject}}, \code{\link[SeuratObject]{DefaultAssay}}, \code{\link[SeuratObject:DefaultAssay]{DefaultAssay<-}}, \code{\link[SeuratObject]{Distances}}, \code{\link[SeuratObject]{Embeddings}}, \code{\link[SeuratObject]{FetchData}}, \code{\link[SeuratObject:AssayData]{GetAssayData}}, \code{\link[SeuratObject]{GetImage}}, \code{\link[SeuratObject]{GetTissueCoordinates}}, \code{\link[SeuratObject:VariableFeatures]{HVFInfo}}, \code{\link[SeuratObject]{Idents}}, \code{\link[SeuratObject:Idents]{Idents<-}}, \code{\link[SeuratObject]{Images}}, \code{\link[SeuratObject]{Index}}, \code{\link[SeuratObject:Index]{Index<-}}, \code{\link[SeuratObject]{Indices}}, \code{\link[SeuratObject]{IsGlobal}}, \code{\link[SeuratObject]{JS}}, \code{\link[SeuratObject:JS]{JS<-}}, \code{\link[SeuratObject]{Key}}, \code{\link[SeuratObject:Key]{Key<-}}, \code{\link[SeuratObject]{Loadings}}, \code{\link[SeuratObject:Loadings]{Loadings<-}}, \code{\link[SeuratObject]{LogSeuratCommand}}, \code{\link[SeuratObject]{Misc}}, \code{\link[SeuratObject:Misc]{Misc<-}}, \code{\link[SeuratObject:ObjectAccess]{Neighbors}}, \code{\link[SeuratObject]{Project}}, \code{\link[SeuratObject:Project]{Project<-}}, \code{\link[SeuratObject]{Radius}}, \code{\link[SeuratObject:ObjectAccess]{Reductions}}, \code{\link[SeuratObject]{RenameCells}}, \code{\link[SeuratObject:Idents]{RenameIdents}}, \code{\link[SeuratObject:Idents]{ReorderIdent}}, \code{\link[SeuratObject]{RowMergeSparseMatrices}}, \code{\link[SeuratObject:AssayData]{SetAssayData}}, \code{\link[SeuratObject:Idents]{SetIdent}}, \code{\link[SeuratObject:VariableFeatures]{SpatiallyVariableFeatures}}, \code{\link[SeuratObject:Idents]{StashIdent}}, \code{\link[SeuratObject]{Stdev}}, \code{\link[SeuratObject:VariableFeatures]{SVFInfo}}, \code{\link[SeuratObject]{Tool}}, \code{\link[SeuratObject:Tool]{Tool<-}}, \code{\link[SeuratObject]{UpdateSeuratObject}}, \code{\link[SeuratObject]{VariableFeatures}}, \code{\link[SeuratObject:VariableFeatures]{VariableFeatures<-}}, \code{\link[SeuratObject]{WhichCells}}} }} diff --git a/man/roxygen/templates/note-reqdpkg.R b/man/roxygen/templates/note-reqdpkg.R new file mode 100644 index 000000000..57c2e85cf --- /dev/null +++ b/man/roxygen/templates/note-reqdpkg.R @@ -0,0 +1,3 @@ +#' @note This function requires the +#' \href{https://cran.r-project.org/package=<%= pkg %>}{\pkg{<%= pkg %>}} package +#' to be installed diff --git a/man/roxygen/templates/section-future.R b/man/roxygen/templates/section-future.R new file mode 100644 index 000000000..bf6159bd0 --- /dev/null +++ b/man/roxygen/templates/section-future.R @@ -0,0 +1,13 @@ +#' @section Parallelization with \pkg{future}: +#' This function uses +#' \href{https://cran.r-project.org/package=future}{\pkg{future}} to enable +#' parallelization. Parallelization strategies can be set using +#' \code{\link[future]{plan}}. Common plans include \dQuote{\code{sequential}} +#' for non-parallelized processing or \dQuote{\code{multisession}} for parallel +#' evaluation using multiple \R sessions; for other plans, see the +#' \dQuote{Implemented evaluation strategies} section of +#' \code{\link[future:plan]{?future::plan}}. For a more thorough introduction +#' to \pkg{future}, see +#' \href{https://future.futureverse.org/articles/future-1-overview.html}{\code{vignette("future-1-overview")}} +#' +#' @concept future diff --git a/man/roxygen/templates/section-progressr.R b/man/roxygen/templates/section-progressr.R new file mode 100644 index 000000000..fdcde574f --- /dev/null +++ b/man/roxygen/templates/section-progressr.R @@ -0,0 +1,8 @@ +#' @section Progress Updates with \pkg{progressr}: +#' This function uses +#' \href{https://cran.r-project.org/package=progressr}{\pkg{progressr}} to +#' render status updates and progress bars. To enable progress updates, wrap +#' the function call in \code{\link[progressr]{with_progress}} or run +#' \code{\link[progressr:handlers]{handlers(global = TRUE)}} before running +#' this function. For more details about \pkg{progressr}, please read +#' \href{https://progressr.futureverse.org/articles/progressr-intro.html}{\code{vignette("progressr-intro")}} diff --git a/man/roxygen/templates/seealso-methods.R b/man/roxygen/templates/seealso-methods.R new file mode 100644 index 000000000..4d525bdd3 --- /dev/null +++ b/man/roxygen/templates/seealso-methods.R @@ -0,0 +1 @@ +#' @seealso \code{<%= cls %>} methods: \code{\link{<%= cls %>-methods}} diff --git a/tests/testthat/test_differential_expression.R b/tests/testthat/test_differential_expression.R index 0651ae379..373a6b6d9 100644 --- a/tests/testthat/test_differential_expression.R +++ b/tests/testthat/test_differential_expression.R @@ -326,6 +326,34 @@ test_that("FindAllMarkers works as expected", { expect_equal(rownames(results.pseudo)[1], "HLA-DPB1") }) + +# Tests for running FindMarkers post integration/transfer +ref <- pbmc_small +ref <- FindVariableFeatures(object = ref, verbose = FALSE, nfeatures = 100) +query <- CreateSeuratObject( + counts = GetAssayData(object = pbmc_small[['RNA']], slot = "counts") + rpois(n = ncol(pbmc_small), lambda = 1) +) +query2 <- CreateSeuratObject( + counts = GetAssayData(object = pbmc_small[['RNA']], slot = "counts")[, 1:40] + rpois(n = ncol(pbmc_small), lambda = 1) +) +query.list <- list(query, query2) +query.list <- lapply(X = query.list, FUN = NormalizeData, verbose = FALSE) +query.list <- lapply(X = query.list, FUN = FindVariableFeatures, verbose = FALSE, nfeatures = 100) +query.list <- lapply(X = query.list, FUN = ScaleData, verbose = FALSE) +query.list <- suppressWarnings(lapply(X = query.list, FUN = RunPCA, verbose = FALSE, npcs = 20)) + +anchors <- suppressMessages(suppressWarnings(FindIntegrationAnchors(object.list = c(ref, query.list), k.filter = NA, verbose = FALSE))) +object <- suppressMessages(IntegrateData(anchorset = anchors, k.weight = 25, verbose = FALSE)) +object <- suppressMessages(ScaleData(object, verbose = FALSE)) +object <- suppressMessages(RunPCA(object, verbose = FALSE)) +object <- suppressMessages(FindNeighbors(object = object, verbose = FALSE)) +object <- suppressMessages(FindClusters(object, verbose = FALSE)) +markers <- FindMarkers(object = object, ident.1="0", ident.2="1") +test_that("FindMarkers recognizes log normalizatio", { + expect_equal(markers[1, "p_val"], 1.598053e-14) + expect_equal(markers[1, "avg_log2FC"], -2.614686, tolerance = 1e-6) +}) + # Tests for FindConservedMarkers # ------------------------------------------------------------------------------- diff --git a/vignettes/get_started.Rmd b/vignettes/get_started.Rmd index fd28ff622..2193aa3e3 100644 --- a/vignettes/get_started.Rmd +++ b/vignettes/get_started.Rmd @@ -105,7 +105,7 @@ We provide a series of vignettes, tutorials, and analysis walkthroughs to help u For new users of Seurat, we suggest starting with a guided walk through of a dataset of 2,700 Peripheral Blood Mononuclear Cells (PBMCs) made publicly available by 10X Genomics. This tutorial implements the major components of a standard unsupervised clustering workflow including QC and data filtration, calculation of high-variance genes, dimensional reduction, graph-based clustering, and the identification of cluster markers. -We provide additional introductory vignettes for users who are interested in analyzing multimodal single-cell datasets (e.g. from CITE-seq, or the 10x mulitome kit), or spatial datasets (e.g. from 10x visium or SLIDE-seq). +We provide additional introductory vignettes for users who are interested in analyzing multimodal single-cell datasets (e.g. from CITE-seq, or the 10x multiome kit), or spatial datasets (e.g. 10x Visium or Vizgen MERFISH). ```{r results='asis', echo=FALSE, warning=FALSE, message = FALSE} make_vignette_card_section(vdat = vdat, cat = 1) diff --git a/vignettes/spatial_vignette_2.Rmd b/vignettes/spatial_vignette_2.Rmd new file mode 100644 index 000000000..2e315bccf --- /dev/null +++ b/vignettes/spatial_vignette_2.Rmd @@ -0,0 +1,341 @@ +--- +title: "Analysis of Image-based Spatial Data in Seurat" +output: + html_document: + theme: united + df_print: kable +date: 'Compiled: `r format(Sys.Date(), "%B %d, %Y")`' +--- +*** + +```{r setup, include=FALSE} +all_times <- list() # store the time for each chunk +knitr::knit_hooks$set(time_it = local({ + now <- NULL + function(before, options) { + if (before) { + now <<- Sys.time() + } else { + res <- difftime(Sys.time(), now, units = "secs") + all_times[[options$label]] <<- res + } + } +})) +knitr::opts_chunk$set( + tidy = TRUE, + tidy.opts = list(width.cutoff = 95), + message = FALSE, + warning = FALSE, + time_it = TRUE +) +``` + +# Overview + +In this vignette, we introduce a Seurat extension to analyze new types of spatially-resolved data. We have [previously introduced a spatial framework](https://satijalab.org/seurat/articles/spatial_vignette.html) which is compatible with sequencing-based technologies, like the 10x Genomics Visium system, or SLIDE-seq. Here, we extend this framework to analyze new data types that are captured via highly multiplexed imaging. In contrast to sequencing-based technologies, these datasets are often targeted (i.e. they profile a pre-selected set of genes). However they can resolve individual molecules - retaining single-cell (and subcellular) resolution. These approaches also often capture cellular boundaries (segmentations). + +We update the Seurat infrastructure to enable the analysis, visualization, and exploration of these exciting datasets. In this vignette, we focus on three datasets produced by different multiplexed imaging technologies, each of which is publicly available. We will be adding support for additional imaging-based technologies in the coming months. + +* Vizgen MERSCOPE (Mouse Brain) +* Nanostring CosMx Spatial Molecular Imager (FFPE Human Lung) +* Akoya CODEX (Human Lymph Node) + +First, we install the updated versions of Seurat and SeuratObject that support this infrastructure, as well as other packages necessary for this vignette. + +```{r init, message=FALSE, warning=FALSE} +library(remotes) +remotes::install_github("satijalab/seurat", "feat/imaging") +library(Seurat) +library(future) +plan("multisession", workers = 10) +``` + +# Mouse Brain: Vizgen MERSCOPE + +This dataset was produced using the Vizgen MERSCOPE system, which utilizes the MERFISH technology. The total dataset is available for [public download](https://info.vizgen.com/mouse-brain-data), and contains nine samples (three full coronal slices of the mouse brain, with three biological replicates per slice). The gene panel consists of 483 gene targets, representing known anonical cell type markers, nonsensory G-Protein coupled receptors (GPCRs), and Receptor Tyrosine Kinases (RTKs). In this vignette, we analyze one of the samples - slice 2, replicate 1. The median number of transcripts detected in each cell is 206. + +First, we read in the dataset and create a Seurat object. + +We use the `LoadVizgen()` function, which we have written to read in the output of the Vizgen analysis pipeline. The resulting Seurat object contains the following information: + +* A count matrix, indicating the number of observed molecules for each of the 483 transcripts in each cell. This matrix is analogous to a count matrix in scRNA-seq, and is stored by default in the RNA assay of the Seurat object + +```{r, message=FALSE, warning=FALSE} +# Loading segmentations is a slow process and multi processing with the future pacakge is recommended +vizgen.obj <- LoadVizgen(data.dir = "/brahms/hartmana/spatial_vignette_data/vizgen/s2r1/", fov = "s2r1") +``` + +The next pieces of information are specific to imaging assays, and is stored in the images slot of the resulting Seurat object: + +
+ **Cell Centroids: The spatial coordinates marking the centroid for each cell being profiled** + +```{r} +# Get the center position of each centroid. There is one row per cell in this dataframe. +head(GetTissueCoordinates(vizgen.obj[["s2r1"]][["centroids"]])) +``` +
+
+ **Cell Segmentation Boundaries: The spatial coordinates that describe the polygon segmentation of each single cell** + +```{r} +# Get the coordinates for each segmentation vertice. Each cell will have a variable number of vertices describing its shape. +head(GetTissueCoordinates(vizgen.obj[["s2r1"]][["segmentation"]])) +``` +
+
+ **Molecule positions: The spatial coordinates for each individual molecule that was detected during the multiplexed smFISH experiment.** + +```{r} +# Fetch molecules positions for Chrm1 +head(FetchData(vizgen.obj[["s2r1"]][["molecules"]], vars="Chrm1")) +``` +
+\ + +## Preprocessing and unsupervised analysis +We start by performing a standard unsupervised clustering analysis, essentially first treating the dataset as an scRNA-seq experiment. We use SCTransform-based normalization, though we slightly modify the default clipping parameters to mitigate the effect of outliers that we occasionally observe in smFISH experiments. After normalization, we can run dimensional reduction and clustering. + +```{r analysis, results='hide'} +vizgen.obj <- SCTransform(vizgen.obj, assay = "Vizgen", clip.range = c(-10,10),) +vizgen.obj <- RunPCA(vizgen.obj, npcs = 30, features = rownames(vizgen.obj)) +vizgen.obj <- RunUMAP(vizgen.obj, dims = 1:30) +vizgen.obj <- FindNeighbors(vizgen.obj, reduction = "pca", dims = 1:30) +vizgen.obj <- FindClusters(vizgen.obj, resolution = 0.3) +``` + +We can then visualize the results of the clustering either in UMAP space (with `DimPlot()`) or overlaid on the image with `ImageDimPlot()`. + +```{r umap} +DimPlot(vizgen.obj, reduction = "umap") +``` + +```{r spatial.plot, fig.height=6, fig.width=6} +ImageDimPlot(vizgen.obj, fov = "s2r1", cols = "polychrome", axes = TRUE) +``` + +You can also customize multiple aspect of the plot, including the color scheme, cell border widths, and size (see below). + +
+ **Customizing spatial plots in Seurat** + +The `ImageDimPlot()` and `ImageFeaturePlot()` functions have a few parameters which you can customize individual visualizations. These include: + +* alpha: Ranges from 0 to 1. Sets the transparency of within-cell coloring. +* size: determines the size of points representing cells, if centroids are being plotted +* cols: Sets the color scheme for the internal shading of each cell. Examples settings are `polychrome`, `glasbey`, `Paired`, `Set3`, and `parade`. Default is the ggplot2 color palette +* shuffle.cols: In some cases the selection of `cols` is more effective when the same colors are assigned to different clusters. Set `shuffle.cols = TRUE` to randomly shuffle the colors in the palette. +* border.size: Sets the width of the cell segmentation borders. By default, segmentations are plotted with a border size of 0.3 and centroids are plotted without border. +* border.color: Sets the color of the cell segmentation borders +* dark.background: Sets a black background color (TRUE by default) +* axes: Display +
+ +Since it can be difficult to visualize the spatial localization patterns of an individual cluster when viewing them all together, we can highlight all cells that belong to a particular cluster: + +```{r, fig.height=8, fig.width=12} +p1 <- ImageDimPlot(vizgen.obj, fov = "s2r1", cols = "red", cells = WhichCells(vizgen.obj, idents = 14)) +p2 <- ImageDimPlot(vizgen.obj, fov = "s2r1", cols = "red", cells = WhichCells(vizgen.obj, idents = 15)) +p1 + p2 +``` + +We can find markers of individual clusters and visualize their spatial expression pattern. We can color cells based on their quantified expression of an individual gene, using `ImageFeaturePlot()`, which is analagous to the `FeaturePlot()` function for visualizing expression on a 2D embedding. Since MERFISH images individual molecules, we can also visualize the location of individual *molecules*. + +```{r, fig.height=7, fig.width=12} +p1 <- ImageFeaturePlot(vizgen.obj, features = "Slc17a7") +p2 <- ImageDimPlot(vizgen.obj, molecules = "Slc17a7", nmols = 10000, alpha = 0.3, mols.cols = "red") +p1 + p2 +``` + +Note that the `nmols` parameter can be used to reduce the total number of molecules shown to reduce overplotting. You can also use the `mols.size`, `mols.cols`, and `mols.alpha` parameter to further optimize. + +Plotting molecules is especially useful for visualizing co-expression of multiple genes on the same plot. + +```{r, fig.height=7, fig.width=12} +p1 <- ImageDimPlot(vizgen.obj, fov = "s2r1", alpha = 0.3, molecules = c("Slc17a7", "Olig1"), nmols = 10000) +markers.14 <- FindMarkers(vizgen.obj, ident.1 = "14") +p2 <- ImageDimPlot(vizgen.obj, fov = "s2r1", alpha = 0.3, molecules = rownames(markers.14)[1:4], nmols = 10000) +p1 + p2 +``` + +The updated Seurat spatial framework has the option to treat cells as individual points, or also to visualize cell boundaries (segmentations). By default, Seurat ignores cell segmentations and treats each cell as a point ('centroids'). This speeds up plotting, especially when looking at large areas, where cell boundaries are too small to visualize. + +We can zoom into a region of tissue, creating a new field of view. For example, we can zoom into a region that contains the hippocampus. Once zoomed-in, we can set `DefaultBoundary()` to show cell segmentations. You can also 'simplify' the cell segmentations, reducing the number of edges in each polygon to speed up plotting. + +```{r, fig.height=5, fig.width=14} +# create a Crop +cropped.coords <- Crop(vizgen.obj[["s2r1"]], x = c(1750, 3000), y = c(3750, 5250), coords = "plot") +# set a new field of view (fov) +vizgen.obj[["hippo"]] <- cropped.coords + +# visualize FOV using default settings (no cell boundaries) +p1 <- ImageDimPlot(vizgen.obj, fov = "hippo", axes = TRUE, size = 0.7, border.color = "white", cols = "polychrome", coord.fixed = FALSE) + +# visualize FOV with full cell segmentations +DefaultBoundary(vizgen.obj[["hippo"]]) <- "segmentation" +p2 <- ImageDimPlot(vizgen.obj, fov = "hippo", axes = TRUE, border.color = "white", border.size = 0.1, cols = "polychrome", coord.fixed = FALSE) + +# simplify cell segmentations +vizgen.obj[["hippo"]][["simplified.segmentations"]] <- Simplify(coords = vizgen.obj[["hippo"]][["segmentation"]], tol = 3) +DefaultBoundary(vizgen.obj[["hippo"]]) <- "simplified.segmentations" + +# visualize FOV with simplified cell segmentations +DefaultBoundary(vizgen.obj[["hippo"]]) <- "simplified.segmentations" +p3 <- ImageDimPlot(vizgen.obj, fov = "hippo", axes = TRUE, border.color = "white", border.size = 0.1, cols = "polychrome", coord.fixed = FALSE) + +p1 + p2 + p3 +``` + +
+ **What is the tol parameter?** + +The tol parameter determines how simplified the resulting segmentations are. A higher value of tol will reduce the number of vertices more drastically which will speed up plotting, but some segmentation detail will be lost. See https://rgeos.r-forge.r-project.org/reference/topo-unary-gSimplify.html for examples using different values for tol. + +
+ +We can visualize individual molecules plotted at higher resolution after zooming-in +```{r, fig.height=8, fig.width=8} +# Since there is nothing behind the segmentations, alpha will slightly mute colors +ImageDimPlot(vizgen.obj, fov = "hippo", molecules = rownames(markers.14)[1:4], cols = "polychrome", mols.size = 1, alpha = 0.5, mols.cols = c("red", "blue", "yellow", "green")) +``` + +# Human Lung: Nanostring CosMx Spatial Molecular Imager + +This dataset was produced using Nanostring CosMx Spatial Molecular Imager (SMI). The CosMX SMI performs multiplexed single molecule profiling, can profile both RNA and protein targets, and can be applied directly to FFPE tissues. The dataset represents 8 FFPE samples taken from 5 non-small-cell lung cancer (NSCLC) tissues, and is available for [public download](https://www.nanostring.com/products/cosmx-spatial-molecular-imager/ffpe-dataset/). The gene panel consists of 960 transcripts. + +In this vignette, we load one of 8 samples (lung 5, replicate 1). We use the `LoadNanostring()` function, which parses the outputs available on the public download site. Note that the coordinates for the cell boundaries were provided by Nanostring by request, and are available for download [here](https://www.dropbox.com/s/hl3peavrx92bluy/Lung5_Rep1-polygons.csv?dl=0). + +For this dataset, instead of performing unsupervised analysis, we map the Nanostring profiles to our Azimuth Healthy Human Lung reference, which was defined by scRNA-seq. We used Azimuth version 0.4.3 with the [human lung](https://azimuth.hubmapconsortium.org/references/#Human%20-%20Lung%20v1) reference version 1.0.0. You can download the precomputed results [here](https://seurat.nygenome.org/vignette_data/spatial_vignette_2/nanostring_data.Rds), which include annotations, prediction scores, and a UMAP visualization. The median number of detected transcripts/cell is 249, which does create uncertainty for the annotation process. + +```{r load} +nano.obj <- LoadNanostring(data.dir = "/brahms/hartmana/spatial_vignette_data/nanostring/lung5_rep1", fov="lung5.rep1") +``` + +```{r integration} +# add in precomputed Azimuth annotations +azimuth.data <- readRDS("/brahms/hartmana/spatial_vignette_data/nanostring_data.Rds") +nano.obj <- AddMetaData(nano.obj, metadata = azimuth.data$annotations) +nano.obj[["proj.umap"]] <- azimuth.data$umap +Idents(nano.obj) <- nano.obj$predicted.annotation.l1 + +# set to avoid error exceeding max allowed size of globals +options(future.globals.maxSize = 8000 * 1024^2) +nano.obj <- SCTransform(nano.obj, assay = "Nanostring", clip.range = c(-10, 10), verbose = FALSE) + +# text display of annotations and prediction scores +head(slot(object = nano.obj, name = "meta.data")[2:5]) +``` + +We can visualize the Nanostring cells and annotations, projected onto the reference-defined UMAP. Note that for this NSCLC sample, tumor samples are annotated as 'basal', which is the closest cell type match in the healthy reference. + +```{r, fig.width=9, fig.height=4} +DimPlot(nano.obj) +``` + +## Visualization of cell type and expression localization patterns + +As in the previous example, `ImageDimPlot()` plots c ells based on their spatial locations, and colors them based on their assigned cell type. Notice that the basal cell population (tumor cells) is tightly spatially organized, as expected. + +```{r, fig.width=11, fig.height=7} +ImageDimPlot(nano.obj, fov = "lung5.rep1", axes = TRUE, cols = "glasbey") +``` + +Since there are many cell types present, we can highlight the localization of a few select groups. + +```{r, fig.width=10, fig.height=7} +ImageDimPlot(nano.obj, fov = "lung5.rep1", cells = WhichCells(nano.obj, idents=c("Basal", "Macrophage", "Smooth Muscle", "CD4 T")), cols=c("red", "green", "blue", "orange"), size = 0.6) +``` + +We can also visualize gene expression markers a few different ways: + +```{r, fig.width=10, fig.height=5} +VlnPlot(nano.obj, features = "KRT17", slot = "counts", pt.size = 0.1, y.max = 30) + NoLegend() +``` + +```{r, fig.width=5, fig.height=4} +FeaturePlot(nano.obj, features = "KRT17") +``` + +```{r, fig.height=4, fig.width=8} +p1 <- ImageFeaturePlot(nano.obj, fov = "lung5.rep1", features = "KRT17", max.cutoff = "q95") +p2 <- ImageDimPlot(nano.obj, fov = "lung5.rep1", alpha = 0.3, molecules = "KRT17", nmols = 10000) + NoLegend() +p1 + p2 +``` + +We can plot molecules in order to co-visualize the expression of multiple markers, including KRT17 (basal cells), C1QA (macrophages), IL7R (T cells), and TAGLN (Smooth muscle cells). + +```{r, fig.width=10, fig.height=7} +# Plot some of the molecules which seem to display spatial correlation with each other +ImageDimPlot(nano.obj, fov = "lung5.rep1", group.by = NA, alpha = 0.3, molecules = c("KRT17", "C1QA", "IL7R", "TAGLN"), nmols = 20000) +``` + +We zoom in on one basal-rich region using the `Crop()` function. Once zoomed-in, we can visualize individual cell boundaries as well in all visualizations. + +```{r} +basal.crop <- Crop(nano.obj[["lung5.rep1"]], x = c(159500, 164000), y = c(8700, 10500)) +nano.obj[["zoom1"]] <- basal.crop +DefaultBoundary(nano.obj[["zoom1"]]) <- "segmentation" +``` + +```{r, fig.width=11, fig.height=7} +ImageDimPlot(nano.obj, fov = "zoom1", cols = "polychrome", coord.fixed = FALSE) +``` + +```{r, fig.width=11, fig.height=7} +# note the clouds of TPSAB1 molecules denoting mast cells +ImageDimPlot(nano.obj, fov = "zoom1", cols = "polychrome", alpha = 0.3, molecules = c("KRT17", "IL7R", "TPSAB1"), mols.size = 0.3, nmols = 20000, border.color = "black", coord.fixed = FALSE) +``` + +# Human Lymph Node: Akoya CODEX system + +This dataset was produced using Akoya CODEX system. The CODEX system performs multiplexed spatially-resolved protein profiling, iteratively visualizing antibody-binding events. The dataset here represents a tissue section from a human lymph node, and was generated by the University of Florida as part of the Human Biomolecular Atlas Program (HuBMAP). More information about the sample and experiment is available [here](https://portal.hubmapconsortium.org/browse/dataset/c95d9373d698faf60a66ffdc27499fe1). The protein panel in this dataset consists of 28 markers, and protein intensities were quantified as part of the Akoya processor pipeline, which outputs a CSV file providing the intensity of each marker in each cell, as well as the cell coordinates. The file is available for public download via Globus [here](https://app.globus.org/file-manager?origin_id=af603d86-eab9-4eec-bb1d-9d26556741bb&origin_path=%2Fc95d9373d698faf60a66ffdc27499fe1%2Fdrv_CX_20-008_lymphnode_n10_reg001%2Fprocessed_2020-12-2320-008LNn10r001%2Fsegm%2Fsegm-1%2Ffcs%2Fcompensated%2F). + + +First, we load in the data of a HuBMAP dataset using the `LoadAkoya()` function in Seurat: + +```{r} +codex.obj <- LoadAkoya( + filename = "/brahms/hartmana/spatial_vignette_data/LN7910_20_008_11022020_reg001_compensated.csv", + type = "processor", + fov = "HBM754.WKLP.262" +) +``` + +We can now run unsupervised analysis to identify cell clusters. To normalize the protein data, we use centered log-ratio based normalization, as we typically apply to the protein modality of CITE-seq data. We then run dimensional reduction and graph-based clustering. + +```{r} +codex.obj <- NormalizeData(object = codex.obj, normalization.method = "CLR", margin = 2) +codex.obj <- ScaleData(codex.obj) +VariableFeatures(codex.obj) <- rownames(codex.obj) # since the panel is small, treat all features as variable. +codex.obj <- RunPCA(object = codex.obj, npcs = 20, verbose = FALSE) +codex.obj <- RunUMAP(object = codex.obj, dims = 1:20, verbose = FALSE) +codex.obj <- FindNeighbors(object = codex.obj, dims = 1:20, verbose = FALSE) +codex.obj <- FindClusters(object = codex.obj, verbose = FALSE, resolution = 0.4, n.start = 1) +``` + +We can visualize the cell clusters based on a protein intensity-based UMAP embedding, or also based on their spatial location. + +```{r} +DimPlot(codex.obj, label = TRUE, label.box = TRUE) + NoLegend() +``` + +```{r, fig.width=6, fig.height=5} +ImageDimPlot(codex.obj, cols = "parade") +``` + +The expression patters of individual markers clearly denote different cell types and spatial structures, including Lyve1 (lymphatic endothelial cells), CD34 (blood endothelial cells), and CD21 (B cells). As expected, endothelial cells group together into vessels, and B cells are key components of specialized microstructures known as germinal zones. You can read more about protein markers in this dataset, as well as cellular networks in human lynmphatic tissues, in this [preprint](https://www.biorxiv.org/content/10.1101/2021.10.20.465151v1.full). + +```{r, fig.width=9, fig.height=8} +p1 <- ImageFeaturePlot(codex.obj, fov = "HBM754.WKLP.262", features = c("CD34", "CD21", "Lyve1"), min.cutoff = "q10", max.cutoff = "q90") +p2 <- ImageDimPlot(codex.obj, fov = "HBM754.WKLP.262", cols = "parade") +p1 + p2 +``` + +Each of these datasets represents an opportunity to learn organizing principles that govern the spatial localization of different cell types. Stay tuned for future updates to Seurat enabling further exploration and characterization of the relationship between spatial position and molecular state. + +
+ **Session Info** +```{r} +sessionInfo() +``` +
diff --git a/vignettes/vignettes.yaml b/vignettes/vignettes.yaml index 7ed3c016c..6316d70bf 100644 --- a/vignettes/vignettes.yaml +++ b/vignettes/vignettes.yaml @@ -12,12 +12,18 @@ An introduction to working with multi-modal datasets in Seurat. image: citeseq_plot.jpg - - title: Analysis of spatial datasets + - title: Analysis of spatial datasets (Sequencing-based) name: spatial_vignette summary: | Learn to explore spatially-resolved transcriptomic data with examples from 10x Visium and Slide-seq v2. image: spatial_vignette_ttr.jpg + - title: Analysis of spatial datasets (Imaging-based) + name: spatial_vignette_2 + summary: | + Learn to explore spatially-resolved data from multiplexed imaging technologies, including MERFISH, Nanostring SMI, and CODEX. + image: spatial_vignette_2.png + - category: Data Integration vignettes: - title: Introduction to scRNA-seq integration