Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support to handle .parquet output from Vizgen #7190

Open
wants to merge 24 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
5fe5b89
updates for Vizgen
alikhuseynov Apr 13, 2023
aa32cc6
updates for Vizgen
alikhuseynov Apr 13, 2023
56d983c
updates for `ReadVizgen()`
alikhuseynov Apr 13, 2023
115e45d
updates for `LoadVizgen`
alikhuseynov Apr 13, 2023
eff89e9
fix for argument 'metadata'
alikhuseynov Apr 13, 2023
87d9303
param args for `LoadVizgen`
alikhuseynov Apr 13, 2023
089740a
fix args for `ReadVizgen`
alikhuseynov Apr 13, 2023
547000e
add support for `future.apply`
alikhuseynov Apr 19, 2023
f73dbdb
Vizgen support single `.parquet` file
alikhuseynov May 24, 2023
5ca1069
major fix for `.parquet` segmentations
alikhuseynov May 31, 2023
9389ce9
fix for `LoadVizgen`
alikhuseynov May 31, 2023
dc029ac
major fix for `ReadVizgen()`
alikhuseynov Jun 6, 2023
8f7153e
fix for `LoadVizgen()`
alikhuseynov Jun 6, 2023
799323d
update `ReadVizgen()`
alikhuseynov Jul 26, 2023
9ea4458
update `LoadVizgen()`
alikhuseynov Jul 26, 2023
38fad2a
resolving some conflicts in preprocessing.R
alikhuseynov Jul 26, 2023
bf15b6e
..update preprocessing.R from `develop`
alikhuseynov Jul 26, 2023
f8461ca
cleaning `ReadVizgen`
alikhuseynov Aug 18, 2023
a7be25a
small bug fix in `ReadVizgen`
alikhuseynov Aug 22, 2023
6352d56
added `sf` & filter polygons -> `ReadVizgen()`
alikhuseynov Aug 28, 2023
f43fed0
..updated `LoadVizgen()`
alikhuseynov Aug 28, 2023
69ba89d
adding `.filter_polygons()`
alikhuseynov Aug 28, 2023
038271f
optimized parallelization for `.filter_polygons()`
alikhuseynov Aug 28, 2023
9db188a
rm space-only changes in `convenience.R`
alikhuseynov Feb 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
185 changes: 147 additions & 38 deletions R/convenience.R
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,8 @@ LoadNanostring <- function(data.dir, fov, assay = 'Nanostring') {
molecules = data$pixels,
assay = assay
)
obj <- CreateSeuratObject(counts = data$matrix, assay = assay)

obj <- CreateSeuratObject(counts = data$matrix, assay = assay)
# subset both object and coords based on the cells shared by both
cells <- intersect(
Cells(x = coords, boundary = "segmentation"),
Expand All @@ -129,44 +129,153 @@ LoadNanostring <- function(data.dir, fov, assay = 'Nanostring') {

#' @return \code{LoadVizgen}: A \code{\link[SeuratObject]{Seurat}} object
#'
#' @param fov Name to store FOV as
#' @param assay Name to store expression matrix as
#' @param add.zIndex If to add \code{z} slice index to a cell
#' @param update.object If to update final object, default to TRUE
#' @param add.molecules If to add \code{molecules} coordinates to FOV of the object,
#' default to TRUE
#' @param ... Arguments passed to \code{ReadVizgen}
#'
#' @importFrom SeuratObject Cells CreateCentroids CreateFOV
#' CreateSegmentation CreateSeuratObject
#' @import dplyr
#'
#' @export
#'
#' @rdname ReadVizgen
#'
LoadVizgen <- function(data.dir, fov, assay = 'Vizgen', z = 3L) {
data <- ReadVizgen(
data.dir = data.dir,
filter = "^Blank-",
type = c("centroids", "segmentations"),
z = z
)
segs <- CreateSegmentation(data$segmentations)
cents <- CreateCentroids(data$centroids)
segmentations.data <- list(
"centroids" = cents,
"segmentation" = segs
)
coords <- CreateFOV(
coords = segmentations.data,
type = c("segmentation", "centroids"),
molecules = data$microns,
assay = assay
)
obj <- CreateSeuratObject(counts = data$transcripts, assay = assay)
# only consider the cells we have counts and a segmentation for
# Cells which don't have a segmentation are probably found in other z slices.
coords <- subset(
x = coords,
cells = intersect(
x = Cells(x = coords[["segmentation"]]),
y = Cells(x = obj)
)
)
# add coords to seurat object
LoadVizgen <- function(
data.dir,
fov = 'vz',
assay = 'Vizgen',
mol.type = 'microns',
filter = '^Blank-',
z = 3L,
add.zIndex = TRUE,
update.object = TRUE,
add.molecules = TRUE,
min.area = 5,
verbose,
...)
{
# reading data..
data <- ReadVizgen(data.dir = data.dir,
mol.type = mol.type,
filter = filter,
z = z,
min.area = min.area,
verbose = verbose,
...)

if (verbose) { message("Creating Seurat object..") }
obj <- CreateSeuratObject(counts = data[["transcripts"]], assay = assay)

# in case no segmentation is present, use boxes
if (!"segmentations" %in% names(data)) {
if ("boxes" %in% names(data)) {
bound.boxes <- CreateSegmentation(data[["boxes"]])
cents <- CreateCentroids(data[["centroids"]])
bound.boxes.data <- list(centroids = cents,
boxes = bound.boxes)
if (verbose) {
message("Creating FOVs..", "\n",
if (!add.molecules) { ">>> `molecules` coordidates will be skipped" },
"\n",
">>> using box coordinates instead of segmentations")
}
coords <-
CreateFOV(coords = bound.boxes.data,
type = c("boxes", "centroids"),
molecules =
if (add.molecules) {
data[[mol.type]] } else { NULL },
assay = assay) %>%
subset(x = .,
cells = intersect(x = Cells(x = .[["boxes"]]),
y = Cells(x = obj)))
} else {
# in case no segmentation & no boxes are present, use centroids only
cents <- CreateCentroids(data[["centroids"]])
if (verbose) {
message("Creating FOVs..", "\n",
if (!add.molecules) { ">>> `molecules` coordidates will be skipped" },
"\n",
">>> using only centroids")
}
coords <-
CreateFOV(coords = list(centroids = cents),
type = c("centroids"),
molecules =
if (add.molecules) {
data[[mol.type]] } else { NULL },
assay = assay) %>%
subset(x = .,
cells = intersect(x = Cells(x = .[["centroids"]]),
y = Cells(x = obj)))
}
} else if ("segmentations" %in% names(data)) {
segs <- CreateSegmentation(data[["segmentations"]])
cents <- CreateCentroids(data[["centroids"]])
segmentations.data <- list(centroids = cents, segmentation = segs)
if (verbose) {
message("Creating FOVs..", "\n",
if (!add.molecules) { ">>> `molecules` coordidates will be skipped" },
"\n",
">>> using segmentations")
}
coords <-
CreateFOV(coords = segmentations.data,
type = c("segmentation", "centroids"),
molecules =
if (add.molecules) {
data[[mol.type]] } else { NULL },
assay = assay) %>%
# only consider the cells we have counts and a segmentation.
# Cells which don't have a segmentation are probably found in other z slices.
subset(x = .,
cells = intersect(x = Cells(x = .[["segmentation"]]),
y = Cells(x = obj)))
}

# add z-stack index for cells
if (add.zIndex) { obj$z <- data$zIndex %>% pull(z) }

# add metadata vars
if (verbose) { message(">>> adding metadata infos") }
if (c("metadata" %in% names(data))) {
metadata <- match.arg(arg = "metadata", choices = names(data), several.ok = TRUE)
meta.vars <- names(data[[metadata]])
for (i in meta.vars %>% seq) {
obj %<>% AddMetaData(metadata = data[[metadata]][[meta.vars[i]]],
col.name = meta.vars[i])
}
}

# sanity on fov name
fov %<>% gsub("_|-", ".", .)

if (verbose) { message(">>> adding FOV") }
obj[[fov]] <- coords

## filter - keep cells with counts > 0
# helper function to return metadata
callmeta <- function (object = NULL) { return(object@meta.data) }
nCount <- grep("nCount", callmeta(obj) %>% names, value = TRUE)
if (any(obj[[nCount]] == 0)) {
if (verbose) { message(">>> filtering object - keeping cells with counts > 0") }
obj %<>% subset(subset = !!base::as.symbol(nCount) > 0)
} else { if (verbose) { message(">>> all counts are > 0") } }

if (update.object) {
if (verbose) { message("Updating object:")
obj %<>% UpdateSeuratObject()
} else {
obj %<>%
UpdateSeuratObject() %>%
suppressMessages() } }

if (verbose) { message("Object is ready!") }
return(obj)
}

Expand All @@ -188,8 +297,8 @@ LoadXenium <- function(data.dir, fov = 'fov', assay = 'Xenium') {
data.dir = data.dir,
type = c("centroids", "segmentations"),
)

segmentations.data <- list(
segmentations.data <- list(
"centroids" = CreateCentroids(data$centroids),
"segmentation" = CreateSegmentation(data$segmentations)
)
Expand All @@ -199,16 +308,16 @@ LoadXenium <- function(data.dir, fov = 'fov', assay = 'Xenium') {
molecules = data$microns,
assay = assay
)

xenium.obj <- CreateSeuratObject(counts = data$matrix[["Gene Expression"]], assay = assay)
xenium.obj <- CreateSeuratObject(counts = data$matrix[["Gene Expression"]], assay = assay)
if("Blank Codeword" %in% names(data$matrix))
xenium.obj[["BlankCodeword"]] <- CreateAssayObject(counts = data$matrix[["Blank Codeword"]])
else
xenium.obj[["BlankCodeword"]] <- CreateAssayObject(counts = data$matrix[["Unassigned Codeword"]])
xenium.obj[["ControlCodeword"]] <- CreateAssayObject(counts = data$matrix[["Negative Control Codeword"]])
xenium.obj[["ControlProbe"]] <- CreateAssayObject(counts = data$matrix[["Negative Control Probe"]])

xenium.obj[[fov]] <- coords
xenium.obj[[fov]] <- coords
return(xenium.obj)
}

Expand Down