Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/RELEASE_3_15'
Browse files Browse the repository at this point in the history
  • Loading branch information
jtanevski committed May 3, 2022
2 parents ee34372 + f75bba8 commit 992c1ac
Show file tree
Hide file tree
Showing 12 changed files with 180 additions and 70 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Expand Up @@ -6,6 +6,7 @@
(^|/)\.misty\.temp$
(^|/)results$
^vignettes/mistySeurat.Rmd$
^vignettes/mistySpatialExperiment.Rmd$
^LICENSE$
^.*\.Rproj$
^\.Rproj\.user$
Expand Down
11 changes: 5 additions & 6 deletions DESCRIPTION
@@ -1,7 +1,7 @@
Package: mistyR
Type: Package
Title: Multiview Intercellular SpaTial modeling framework
Version: 1.2.1
Version: 1.4.0
Authors@R: c(
person(given = "Jovan",
family = "Tanevski",
Expand Down Expand Up @@ -40,10 +40,9 @@ License: GPL-3
Encoding: UTF-8
VignetteBuilder: knitr
Imports: assertthat, caret, deldir, digest, distances, dplyr, filelock,
furrr (>= 0.2.0), ggplot2, MASS, purrr, ranger, readr, rlang, rlist,
R.utils, stats, stringr, tibble, tidyr, withr
Suggests: BiocStyle, covr, future, igraph, knitr, Matrix, progeny, rmarkdown,
sctransform, SingleCellExperiment, SpatialExperiment, SummarizedExperiment,
testthat (>= 3.0.0)
furrr (>= 0.2.0), ggplot2, methods, purrr, ranger, readr, ridge, rlang,
rlist, R.utils, stats, stringr, tibble, tidyr, utils, withr
Suggests: BiocStyle, covr, future, igraph (>= 1.2.7), knitr, MASS,
rmarkdown, testthat (>= 3.0.0)
RoxygenNote: 7.1.2
Config/testthat/edition: 3
18 changes: 15 additions & 3 deletions NEWS.md
@@ -1,3 +1,15 @@
# mistyR 1.4.0

- Release version for Bioconductor 3.15. See changes for 1.3.x.

# mistyR 1.3.x

- Switched from Louvain to Leiden algorithm for community detection (requires igraph >= 1.2.7).
- The metamodel is now build by ridge regression. Intercept p-values are not calculated, the values are set to NA for backwards compatibility.
- Unique value error for cv folds is downgraded to warning.
- Prefix can be added to the column names generated by the fucntions add_juxtaview and add_paraview. This allows modeling the maker expression by its own juxtaview and paraview.
- Bug fixes.

# mistyR 1.2.1

- Fixed a separator issue in results aggregation and signature generation that might cause issues with variable names containing "_".
Expand All @@ -8,16 +20,16 @@

# mistyR 1.1.x

- Added funtions for view manipulation, including view filtering and marker selection.
- Added functions for view manipulation, including view filtering and marker selection.
- Added functions for performance, contribution and importance signature extraction from results.
- Aggregation and signature generation is generalized for samples with non-identical targets by working on the intersection.
- Modeling of intraview can be bypassed.
- Added families of distances to calculate paraview.
- Paraview can exlude measurements within a used defined zone of indifference around each spatial unit.
- Paraview can exlcude measurements within a used defined zone of indifference around each spatial unit.
- Improved plotting control.
- Complete test coverage.

**IMPORTANT**: R2 is now reported in percentages for intra, multi and gain. Collecting results from running mistyR < 1.1.11 will lead to miscalcuation of gain.R2. Update the performance.txt files by multiplying the values in columns intra.R2 and multi.R2 by 100.
**IMPORTANT**: R2 is now reported in percentages for intra, multi and gain. Collecting results from running mistyR < 1.1.11 will lead to miscalculation of gain.R2. Update the performance.txt files by multiplying the values in columns intra.R2 and multi.R2 by 100.

# mistyR 1.0.3

Expand Down
56 changes: 45 additions & 11 deletions R/misty.R
Expand Up @@ -14,6 +14,9 @@
#' @export
dplyr::`%>%`

# allow using tidyselect where
utils::globalVariables("where")

#' Train MISTy models
#'
#' Trains multi-view models for all target markers, estimates the performance,
Expand Down Expand Up @@ -111,23 +114,24 @@ run_misty <- function(views, results.folder = "results", seed = 42,
paste(names(which(target.var == 0)),
collapse = ", "
),
"have zero variance."
"have zero variance (they are noninformative). Remove them to proceed."
)
)

target.unique <- colnames(expr) %>%
purrr::set_names() %>%
purrr::map_int(~ length(unique(expr %>% dplyr::pull(.x))))

assertthat::assert_that(all(target.unique >= cv.folds),
msg = paste(
if (any(target.unique < cv.folds)) {
msg <- paste(
"Targets",
paste(names(which(target.unique < cv.folds)),
collapse = ", "
),
"have fewer unique values than cv.folds"
"have fewer unique values than cv.folds. This might result in errors during modeling."
)
)
warning(msg)
}


coef.file <- paste0(
Expand Down Expand Up @@ -181,13 +185,34 @@ run_misty <- function(views, results.folder = "results", seed = 42,

combined.views <- target.model[["meta.model"]]

model.summary <- summary(combined.views)
model.lm <- methods::is(combined.views, "lm")

coefs <- stats::coef(combined.views) %>% tidyr::replace_na(0)

pvals <- if (model.lm) {
# fix for missing pvals
combined.views.summary <- summary(combined.views)
pvals <- data.frame(c = stats::coef(combined.views)) %>%
tibble::rownames_to_column("views") %>%
dplyr::left_join(
data.frame(p = stats::coef(combined.views.summary)[, 4]) %>%
tibble::rownames_to_column("views"),
by = "views"
) %>%
dplyr::pull(.data$p) %>%
tidyr::replace_na(1)

if (bypass.intra) append(pvals[-1], c(NA, 1), 0) else c(NA, pvals)
} else {
pvals <- ridge::pvals(combined.views)$pval[, combined.views$chosen.nPCs]
if (bypass.intra) append(pvals, c(NA, 1), 0) else c(NA, pvals)
}


# coefficient values and p-values
# WARNING: hardcoded column index
coeff <- c(
if (bypass.intra) 0, stats::coef(combined.views),
if (bypass.intra) 1, model.summary$coefficients[, 4]
if (bypass.intra) append(coefs, 0, 1) else coefs,
pvals
)

current.lock <- filelock::lock(coef.lock)
Expand Down Expand Up @@ -219,7 +244,8 @@ run_misty <- function(views, results.folder = "results", seed = 42,
)

# performance
if (sum(target.model[["performance.estimate"]] < 0) > 0) {
if (sum(target.model[["performance.estimate"]] < 0 |
is.na(target.model[["performance.estimate"]])) > 0) {
warning.message <-
paste(
"Negative performance detected and replaced with 0 for target",
Expand All @@ -229,7 +255,15 @@ run_misty <- function(views, results.folder = "results", seed = 42,
}

performance.estimate <- target.model[["performance.estimate"]] %>%
dplyr::mutate_if(~ sum(. < 0) > 0, ~ pmax(., 0))
dplyr::mutate(dplyr::across(
dplyr::ends_with("R2"),
~ pmax(., 0, na.rm = TRUE)
)) %>%
dplyr::mutate(dplyr::across(
dplyr::ends_with("RMSE"),
~ pmin(., max(.), na.rm = TRUE)
))

performance.summary <- c(
performance.estimate %>% colMeans(),
tryCatch(stats::t.test(performance.estimate %>%
Expand Down
53 changes: 37 additions & 16 deletions R/models.R
Expand Up @@ -21,7 +21,6 @@
#' @noRd
build_model <- function(views, target, bypass.intra = FALSE, seed = 42,
cv.folds = 10, cached = FALSE, ...) {

cache.location <- R.utils::getAbsolutePath(paste0(
".misty.temp", .Platform$file.sep,
views[["misty.uniqueid"]]
Expand Down Expand Up @@ -67,7 +66,7 @@ build_model <- function(views, target, bypass.intra = FALSE, seed = 42,
} else {
if ((view[["abbrev"]] == "intra") & bypass.intra) {
transformed.view.data <-
tibble::tibble(!!target := target.vector, ".novar" := 0)
tibble::tibble(!!target := target.vector, ".novar" := 1)
} else {
transformed.view.data <- view[["data"]] %>%
dplyr::mutate(!!target := target.vector)
Expand Down Expand Up @@ -95,15 +94,25 @@ build_model <- function(views, target, bypass.intra = FALSE, seed = 42,
tibble::as_tibble(.name_repair = make.names) %>%
dplyr::mutate(!!target := target.vector)

# train lm on above, if bypass.intra set intercept to 0
# train lm on above, if bypass.intra remove it from the model
formula <- stats::as.formula(
ifelse(bypass.intra, paste0(target, " ~ 0 + ."), paste0(target, " ~ ."))
)
combined.views <- stats::lm(
formula,
oob.predictions
ifelse(bypass.intra, paste0(target, " ~ . - intraview"), paste0(target, " ~ ."))
)


if (ncol(oob.predictions) <= (2 + bypass.intra)) {
combined.views <- stats::lm(
formula,
oob.predictions
)
} else {
combined.views <- ridge::linearRidge(formula,
oob.predictions,
lambda = "automatic",
scaling = "corrForm"
)
}

# cv performance estimate
test.folds <- withr::with_seed(
seed,
Expand All @@ -112,19 +121,31 @@ build_model <- function(views, target, bypass.intra = FALSE, seed = 42,

intra.view.only <-
model.views[["intraview"]]$predictions %>%
tibble::enframe(name = NULL) %>%
tibble::enframe(name = NULL, value = "intraview") %>%
dplyr::mutate(!!target := target.vector)

performance.estimate <- test.folds %>% purrr::map_dfr(function(test.fold) {
meta.intra <- stats::lm(
formula,
intra.view.only %>% dplyr::slice(-test.fold)
)
meta.multi <- stats::lm(
formula,
oob.predictions %>% dplyr::slice(-test.fold)
intra.view.only %>% dplyr::slice(-test.fold),
)

if (identical(oob.predictions, intra.view.only)) {
meta.multi <- meta.intra
} else if (ncol(oob.predictions) <= (2 + bypass.intra)) {
meta.multi <- stats::lm(
formula,
oob.predictions %>% dplyr::slice(-test.fold)
)
} else {
meta.multi <- ridge::linearRidge(
formula,
oob.predictions %>% dplyr::slice(-test.fold),
lambda = "automatic",
scaling = "corrForm"
)
}

intra.prediction <- stats::predict(meta.intra, intra.view.only %>%
dplyr::slice(test.fold))
multi.view.prediction <- stats::predict(meta.multi, oob.predictions %>%
Expand All @@ -141,8 +162,8 @@ build_model <- function(views, target, bypass.intra = FALSE, seed = 42,
)

tibble::tibble(
intra.RMSE = intra.RMSE, intra.R2 = 100*intra.R2,
multi.RMSE = multi.RMSE, multi.R2 = 100*multi.R2
intra.RMSE = intra.RMSE, intra.R2 = 100 * intra.R2,
multi.RMSE = multi.RMSE, multi.R2 = 100 * multi.R2
)
})

Expand Down
9 changes: 6 additions & 3 deletions R/plots.R
Expand Up @@ -413,8 +413,11 @@ plot_interaction_communities <- function(misty.results, view, cutoff = 1) {
msg = "The predictor and target markers in the view must match."
)

assertthat::assert_that(requireNamespace("igraph", quietly = TRUE),
msg = "The package igraph is required to calculate the interaction communities."
assertthat::assert_that(requireNamespace("igraph",
versionCheck = list(op = ">=", version = "1.2.7"),
quietly = TRUE
),
msg = "The package igraph (>= 1.2.7) is required to calculate the interaction communities."
)

A <- view.wide %>%
Expand All @@ -430,7 +433,7 @@ plot_interaction_communities <- function(misty.results, view, cutoff = 1) {
igraph::set.vertex.attribute("name", value = names(igraph::V(.))) %>%
igraph::delete.vertices(which(igraph::degree(.) == 0))

C <- igraph::cluster_louvain(G)
C <- igraph::cluster_leiden(G)

layout <- igraph::layout_with_fr(G)

Expand Down
16 changes: 11 additions & 5 deletions R/view-composers.R
Expand Up @@ -218,6 +218,7 @@ get_neighbors <- function(ddobj, id) {
#' as in the intraview.
#' @param neighbor.thr a threshold value used to indicate the largest distance
#' between two spatial units that can be considered as neighboring.
#' @param prefix a prefix to add to the column names.
#' @param cached a \code{logical} indicating whether to cache the calculated view
#' after the first calculation and to reuse a previously cached view if it
#' already exists for this sample.
Expand Down Expand Up @@ -249,7 +250,7 @@ get_neighbors <- function(ddobj, id) {
#' str(misty.views[["juxtaview.1.5"]])
#' @export
add_juxtaview <- function(current.views, positions, neighbor.thr = 15,
cached = FALSE, verbose = TRUE) {
prefix = "", cached = FALSE, verbose = TRUE) {
expr <- current.views[["intraview"]][["data"]]

cache.location <- R.utils::getAbsolutePath(paste0(
Expand Down Expand Up @@ -287,7 +288,7 @@ add_juxtaview <- function(current.views, positions, neighbor.thr = 15,

return(current.views %>% add_views(create_view(
paste0("juxtaview.", neighbor.thr),
juxta.view,
juxta.view %>% dplyr::rename_with(~paste0(prefix, .x)),
paste0("juxta.", neighbor.thr)
)))
}
Expand Down Expand Up @@ -453,7 +454,7 @@ add_paraview <- function(current.views, positions, l, zoi = 0,
"gaussian", "exponential",
"linear", "constant"
),
approx = 1, nn = NULL,
approx = 1, nn = NULL, prefix = "",
cached = FALSE, verbose = TRUE) {
dists <- distances::distances(as.data.frame(positions))
expr <- current.views[["intraview"]][["data"]]
Expand Down Expand Up @@ -495,6 +496,11 @@ add_paraview <- function(current.views, positions, l, zoi = 0,
message("\nApproximating RBF matrix using the Nystrom method")
}

assertthat::assert_that(requireNamespace("MASS", quietly = TRUE),
msg = "The package MASS is required to approximate the paraview using
the Nystrom method."
)

# single Nystrom approximation expert, given RBF with parameter l
s <- sort(sample.int(n = ncol(dists), size = approx))
C <- get_weight(family, dists[, s], l, zoi)
Expand Down Expand Up @@ -529,10 +535,10 @@ add_paraview <- function(current.views, positions, l, zoi = 0,
}
if (cached) readr::write_rds(para.view, para.cache.file)
}

return(current.views %>% add_views(create_view(
paste0("paraview.", l),
para.view,
para.view %>% dplyr::rename_with(~paste0(prefix, .x)),
paste0("para.", l)
)))
}
Expand Down
4 changes: 2 additions & 2 deletions README.md
Expand Up @@ -68,6 +68,6 @@ Start by reading `vignette("mistyR")` to learn how to run **mistyR**. To learn h
Example pipelines and synthetic data for **mistyR** are also available from [this repository](https://github.com/saezlab/misty_pipelines). To run **mistyR** on the provided synthetic data run the script *synthetic_pipeline.R*.

## Citation
If you use **mistyR** for your research please cite the [following publication](https://doi.org/10.1101/2020.05.08.084145):
If you use **mistyR** for your research please cite the [following publication](https://doi.org/10.1186/s13059-022-02663-5):

> Jovan Tanevski, Attila Gabor, Ricardo Omar Ramirez Flores, Denis Schapiro, Julio Saez-Rodriguez (2020). Explainable multi-view framework for dissecting inter-cellular signaling from highly multiplexed spatial data. *bioRxiv*. doi: [10.1101/2020.05.08.084145](https://doi.org/10.1101/2020.05.08.084145)
> Jovan Tanevski, Ricardo Omar Ramirez Flores, Attila Gabor, Denis Schapiro, Julio Saez-Rodriguez. Explainable multiview framework for dissecting spatial relationships from highly multiplexed data. Genome Biology 23, 97 (2022). https://doi.org/10.1186/s13059-022-02663-5
17 changes: 9 additions & 8 deletions inst/CITATION
@@ -1,15 +1,16 @@
citEntry(
entry="article",
author = c(person("Jovan", "Tanevski"),
person("Ricardo Omar", "Ramirez Flores"),
person("Attila", "Gabor"),
person("Ricardo Omar", "Ramirez Flores"),
person("Denis", "Schapiro"),
person("Julio", "Saez-Rodriguez")),
title = "Explainable multi-view framework for dissecting inter-cellular signaling from highly multiplexed spatial data.",
journal = "bioRxiv",
number = "2020.05.08.084145",
year = 2020,
url = "https://doi.org/10.1101/2020.05.08.084145",
person("Julio", "Saez-Rodriguez")),
title = "Explainable multiview framework for dissecting spatial relationships from highly multiplexed data",
journal = "Genome Biology",
volume = "23",
number = "97",
year = 2022,
url = "https://doi.org/10.1186/s13059-022-02663-5",

textVersion = "Jovan Tanevski, Attila Gabor, Ricardo Omar Ramirez Flores, Denis Schapiro, Julio Saez-Rodriguez (2020). Explainable multi-view framework for dissecting inter-cellular signaling from highly multiplexed spatial data. bioRxiv. doi: 10.1101/2020.05.08.084145"
textVersion = "Jovan Tanevski, Ricardo Omar Ramirez Flores, Attila Gabor, Denis Schapiro, Julio Saez-Rodriguez. Explainable multiview framework for dissecting spatial relationships from highly multiplexed data. Genome Biology 23, 97 (2022). https://doi.org/10.1186/s13059-022-02663-5"
)

0 comments on commit 992c1ac

Please sign in to comment.