Merge remote-tracking branch 'upstream/RELEASE_3_15'

saezlab · May 3, 2022 · 992c1ac · 992c1ac
2 parents ee34372 + f75bba8
commit 992c1ac
Show file tree

Hide file tree

Showing 12 changed files with 180 additions and 70 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -6,6 +6,7 @@
 (^|/)\.misty\.temp$
 (^|/)results$
 ^vignettes/mistySeurat.Rmd$
+^vignettes/mistySpatialExperiment.Rmd$
 ^LICENSE$
 ^.*\.Rproj$
 ^\.Rproj\.user$

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: mistyR
 Type: Package
 Title: Multiview Intercellular SpaTial modeling framework
-Version: 1.2.1
+Version: 1.4.0
 Authors@R: c(
     person(given = "Jovan",
            family = "Tanevski",
@@ -40,10 +40,9 @@ License: GPL-3
 Encoding: UTF-8
 VignetteBuilder: knitr
 Imports: assertthat, caret, deldir, digest, distances, dplyr, filelock, 
-    furrr (>= 0.2.0), ggplot2, MASS, purrr, ranger, readr, rlang, rlist, 
-    R.utils, stats, stringr, tibble, tidyr, withr
-Suggests: BiocStyle, covr, future, igraph, knitr, Matrix, progeny, rmarkdown,
-    sctransform, SingleCellExperiment, SpatialExperiment, SummarizedExperiment,
-    testthat (>= 3.0.0)
+    furrr (>= 0.2.0), ggplot2, methods, purrr, ranger, readr, ridge, rlang, 
+    rlist, R.utils, stats, stringr, tibble, tidyr, utils, withr
+Suggests: BiocStyle, covr, future, igraph (>= 1.2.7), knitr, MASS, 
+    rmarkdown, testthat (>= 3.0.0)
 RoxygenNote: 7.1.2
 Config/testthat/edition: 3
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,15 @@
+# mistyR 1.4.0
+
+- Release version for Bioconductor 3.15. See changes for 1.3.x.
+
+# mistyR 1.3.x
+
+- Switched from Louvain to Leiden algorithm for community detection (requires igraph >= 1.2.7).
+- The metamodel is now build by ridge regression. Intercept p-values are not calculated, the values are set to NA for backwards compatibility.
+- Unique value error for cv folds is downgraded to warning.
+- Prefix can be added to the column names generated by the fucntions add_juxtaview and add_paraview. This allows modeling the maker expression by its own juxtaview and paraview.
+- Bug fixes.
+
 # mistyR 1.2.1
 
 - Fixed a separator issue in results aggregation and signature generation that might cause issues with variable names containing "_".
@@ -8,16 +20,16 @@
 
 # mistyR 1.1.x
 
-- Added funtions for view manipulation, including view filtering and marker selection.
+- Added functions for view manipulation, including view filtering and marker selection.
 - Added functions for performance, contribution and importance signature extraction from results.
 - Aggregation and signature generation is generalized for samples with non-identical targets by working on the intersection.
 - Modeling of intraview can be bypassed.
 - Added families of distances to calculate paraview.
-- Paraview can exlude measurements within a used defined zone of indifference around each spatial unit.
+- Paraview can exlcude measurements within a used defined zone of indifference around each spatial unit.
 - Improved plotting control.
 - Complete test coverage.
 
-**IMPORTANT**: R2 is now reported in percentages for intra, multi and gain. Collecting results from running mistyR < 1.1.11 will lead to miscalcuation of gain.R2. Update the performance.txt files by multiplying the values in columns intra.R2 and multi.R2 by 100.
+**IMPORTANT**: R2 is now reported in percentages for intra, multi and gain. Collecting results from running mistyR < 1.1.11 will lead to miscalculation of gain.R2. Update the performance.txt files by multiplying the values in columns intra.R2 and multi.R2 by 100.
 
 # mistyR 1.0.3
 

diff --git a/R/misty.R b/R/misty.R
@@ -14,6 +14,9 @@
 #' @export
 dplyr::`%>%`
 
+# allow using tidyselect where
+utils::globalVariables("where")
+
 #' Train MISTy models
 #'
 #' Trains multi-view models for all target markers, estimates the performance,
@@ -111,23 +114,24 @@ run_misty <- function(views, results.folder = "results", seed = 42,
       paste(names(which(target.var == 0)),
         collapse = ", "
       ),
-      "have zero variance."
+      "have zero variance (they are noninformative). Remove them to proceed."
     )
   )
 
   target.unique <- colnames(expr) %>%
     purrr::set_names() %>%
     purrr::map_int(~ length(unique(expr %>% dplyr::pull(.x))))
 
-  assertthat::assert_that(all(target.unique >= cv.folds),
-    msg = paste(
+  if (any(target.unique < cv.folds)) {
+    msg <- paste(
       "Targets",
       paste(names(which(target.unique < cv.folds)),
         collapse = ", "
       ),
-      "have fewer unique values than cv.folds"
+      "have fewer unique values than cv.folds. This might result in errors during modeling."
     )
-  )
+    warning(msg)
+  }
 
 
   coef.file <- paste0(
@@ -181,13 +185,34 @@ run_misty <- function(views, results.folder = "results", seed = 42,
 
     combined.views <- target.model[["meta.model"]]
 
-    model.summary <- summary(combined.views)
+    model.lm <- methods::is(combined.views, "lm")
+
+    coefs <- stats::coef(combined.views) %>% tidyr::replace_na(0)
+
+    pvals <- if (model.lm) {
+      # fix for missing pvals
+      combined.views.summary <- summary(combined.views)
+      pvals <- data.frame(c = stats::coef(combined.views)) %>%
+        tibble::rownames_to_column("views") %>%
+        dplyr::left_join(
+          data.frame(p = stats::coef(combined.views.summary)[, 4]) %>%
+            tibble::rownames_to_column("views"),
+          by = "views"
+        ) %>%
+        dplyr::pull(.data$p) %>%
+        tidyr::replace_na(1)
+
+      if (bypass.intra) append(pvals[-1], c(NA, 1), 0) else c(NA, pvals)
+    } else {
+      pvals <- ridge::pvals(combined.views)$pval[, combined.views$chosen.nPCs]
+      if (bypass.intra) append(pvals, c(NA, 1), 0) else c(NA, pvals)
+    }
+
 
     # coefficient values and p-values
-    # WARNING: hardcoded column index
     coeff <- c(
-      if (bypass.intra) 0, stats::coef(combined.views),
-      if (bypass.intra) 1, model.summary$coefficients[, 4]
+      if (bypass.intra) append(coefs, 0, 1) else coefs,
+      pvals
     )
 
     current.lock <- filelock::lock(coef.lock)
@@ -219,7 +244,8 @@ run_misty <- function(views, results.folder = "results", seed = 42,
     )
 
     # performance
-    if (sum(target.model[["performance.estimate"]] < 0) > 0) {
+    if (sum(target.model[["performance.estimate"]] < 0 |
+      is.na(target.model[["performance.estimate"]])) > 0) {
       warning.message <-
         paste(
           "Negative performance detected and replaced with 0 for target",
@@ -229,7 +255,15 @@ run_misty <- function(views, results.folder = "results", seed = 42,
     }
 
     performance.estimate <- target.model[["performance.estimate"]] %>%
-      dplyr::mutate_if(~ sum(. < 0) > 0, ~ pmax(., 0))
+      dplyr::mutate(dplyr::across(
+        dplyr::ends_with("R2"),
+        ~ pmax(., 0, na.rm = TRUE)
+      )) %>%
+      dplyr::mutate(dplyr::across(
+        dplyr::ends_with("RMSE"),
+        ~ pmin(., max(.), na.rm = TRUE)
+      ))
+
     performance.summary <- c(
       performance.estimate %>% colMeans(),
       tryCatch(stats::t.test(performance.estimate %>%

diff --git a/R/models.R b/R/models.R
@@ -21,7 +21,6 @@
 #' @noRd
 build_model <- function(views, target, bypass.intra = FALSE, seed = 42,
                         cv.folds = 10, cached = FALSE, ...) {
-
   cache.location <- R.utils::getAbsolutePath(paste0(
     ".misty.temp", .Platform$file.sep,
     views[["misty.uniqueid"]]
@@ -67,7 +66,7 @@ build_model <- function(views, target, bypass.intra = FALSE, seed = 42,
       } else {
         if ((view[["abbrev"]] == "intra") & bypass.intra) {
           transformed.view.data <-
-            tibble::tibble(!!target := target.vector, ".novar" := 0)
+            tibble::tibble(!!target := target.vector, ".novar" := 1)
         } else {
           transformed.view.data <- view[["data"]] %>%
             dplyr::mutate(!!target := target.vector)
@@ -95,15 +94,25 @@ build_model <- function(views, target, bypass.intra = FALSE, seed = 42,
     tibble::as_tibble(.name_repair = make.names) %>%
     dplyr::mutate(!!target := target.vector)
 
-  # train lm on above, if bypass.intra set intercept to 0
+  # train lm on above, if bypass.intra remove it from the model
   formula <- stats::as.formula(
-    ifelse(bypass.intra, paste0(target, " ~ 0 + ."), paste0(target, " ~ ."))
-  )
-  combined.views <- stats::lm(
-    formula,
-    oob.predictions
+    ifelse(bypass.intra, paste0(target, " ~ . - intraview"), paste0(target, " ~ ."))
   )
 
+
+  if (ncol(oob.predictions) <= (2 + bypass.intra)) {
+    combined.views <- stats::lm(
+      formula,
+      oob.predictions
+    )
+  } else {
+    combined.views <- ridge::linearRidge(formula,
+      oob.predictions,
+      lambda = "automatic",
+      scaling = "corrForm"
+    )
+  }
+
   # cv performance estimate
   test.folds <- withr::with_seed(
     seed,
@@ -112,19 +121,31 @@ build_model <- function(views, target, bypass.intra = FALSE, seed = 42,
 
   intra.view.only <-
     model.views[["intraview"]]$predictions %>%
-    tibble::enframe(name = NULL) %>%
+    tibble::enframe(name = NULL, value = "intraview") %>%
     dplyr::mutate(!!target := target.vector)
 
   performance.estimate <- test.folds %>% purrr::map_dfr(function(test.fold) {
     meta.intra <- stats::lm(
       formula,
-      intra.view.only %>% dplyr::slice(-test.fold)
-    )
-    meta.multi <- stats::lm(
-      formula,
-      oob.predictions %>% dplyr::slice(-test.fold)
+      intra.view.only %>% dplyr::slice(-test.fold),
     )
 
+    if (identical(oob.predictions, intra.view.only)) {
+      meta.multi <- meta.intra
+    } else if (ncol(oob.predictions) <= (2 + bypass.intra)) {
+      meta.multi <- stats::lm(
+        formula,
+        oob.predictions %>% dplyr::slice(-test.fold)
+      )
+    } else {
+      meta.multi <- ridge::linearRidge(
+        formula,
+        oob.predictions %>% dplyr::slice(-test.fold),
+        lambda = "automatic",
+        scaling = "corrForm"
+      )
+    }
+
     intra.prediction <- stats::predict(meta.intra, intra.view.only %>%
       dplyr::slice(test.fold))
     multi.view.prediction <- stats::predict(meta.multi, oob.predictions %>%
@@ -141,8 +162,8 @@ build_model <- function(views, target, bypass.intra = FALSE, seed = 42,
     )
 
     tibble::tibble(
-      intra.RMSE = intra.RMSE, intra.R2 = 100*intra.R2,
-      multi.RMSE = multi.RMSE, multi.R2 = 100*multi.R2
+      intra.RMSE = intra.RMSE, intra.R2 = 100 * intra.R2,
+      multi.RMSE = multi.RMSE, multi.R2 = 100 * multi.R2
     )
   })
 

diff --git a/R/plots.R b/R/plots.R
@@ -413,8 +413,11 @@ plot_interaction_communities <- function(misty.results, view, cutoff = 1) {
     msg = "The predictor and target markers in the view must match."
   )
 
-  assertthat::assert_that(requireNamespace("igraph", quietly = TRUE),
-    msg = "The package igraph is required to calculate the interaction communities."
+  assertthat::assert_that(requireNamespace("igraph",
+    versionCheck = list(op = ">=", version = "1.2.7"),
+    quietly = TRUE
+  ),
+  msg = "The package igraph (>= 1.2.7) is required to calculate the interaction communities."
   )
 
   A <- view.wide %>%
@@ -430,7 +433,7 @@ plot_interaction_communities <- function(misty.results, view, cutoff = 1) {
     igraph::set.vertex.attribute("name", value = names(igraph::V(.))) %>%
     igraph::delete.vertices(which(igraph::degree(.) == 0))
 
-  C <- igraph::cluster_louvain(G)
+  C <- igraph::cluster_leiden(G)
 
   layout <- igraph::layout_with_fr(G)
 

diff --git a/R/view-composers.R b/R/view-composers.R
@@ -218,6 +218,7 @@ get_neighbors <- function(ddobj, id) {
 #'     as in the intraview.
 #' @param neighbor.thr a threshold value used to indicate the largest distance
 #'     between two spatial units that can be considered as neighboring.
+#' @param prefix a prefix to add to the column names.
 #' @param cached a \code{logical} indicating whether to cache the calculated view
 #'     after the first calculation and to reuse a previously cached view if it
 #'     already exists for this sample.
@@ -249,7 +250,7 @@ get_neighbors <- function(ddobj, id) {
 #' str(misty.views[["juxtaview.1.5"]])
 #' @export
 add_juxtaview <- function(current.views, positions, neighbor.thr = 15,
-                          cached = FALSE, verbose = TRUE) {
+                          prefix = "", cached = FALSE, verbose = TRUE) {
   expr <- current.views[["intraview"]][["data"]]
 
   cache.location <- R.utils::getAbsolutePath(paste0(
@@ -287,7 +288,7 @@ add_juxtaview <- function(current.views, positions, neighbor.thr = 15,
 
   return(current.views %>% add_views(create_view(
     paste0("juxtaview.", neighbor.thr),
-    juxta.view,
+    juxta.view %>% dplyr::rename_with(~paste0(prefix, .x)),
     paste0("juxta.", neighbor.thr)
   )))
 }
@@ -453,7 +454,7 @@ add_paraview <- function(current.views, positions, l, zoi = 0,
                            "gaussian", "exponential",
                            "linear", "constant"
                          ),
-                         approx = 1, nn = NULL,
+                         approx = 1, nn = NULL, prefix = "",
                          cached = FALSE, verbose = TRUE) {
   dists <- distances::distances(as.data.frame(positions))
   expr <- current.views[["intraview"]][["data"]]
@@ -495,6 +496,11 @@ add_paraview <- function(current.views, positions, l, zoi = 0,
           message("\nApproximating RBF matrix using the Nystrom method")
         }
 
+        assertthat::assert_that(requireNamespace("MASS", quietly = TRUE),
+          msg = "The package MASS is required to approximate the paraview using
+          the Nystrom method."
+        )
+
         # single Nystrom approximation expert, given RBF with parameter l
         s <- sort(sample.int(n = ncol(dists), size = approx))
         C <- get_weight(family, dists[, s], l, zoi)
@@ -529,10 +535,10 @@ add_paraview <- function(current.views, positions, l, zoi = 0,
     }
     if (cached) readr::write_rds(para.view, para.cache.file)
   }
-
+  
   return(current.views %>% add_views(create_view(
     paste0("paraview.", l),
-    para.view,
+    para.view %>% dplyr::rename_with(~paste0(prefix, .x)),
     paste0("para.", l)
   )))
 }

diff --git a/README.md b/README.md
@@ -68,6 +68,6 @@ Start by reading `vignette("mistyR")` to learn how to run **mistyR**. To learn h
 Example pipelines and synthetic data for **mistyR** are also available from [this repository](https://github.com/saezlab/misty_pipelines). To run **mistyR** on the provided synthetic data run the script *synthetic_pipeline.R*.
 
 ## Citation
-If you use **mistyR** for your research please cite the [following publication](https://doi.org/10.1101/2020.05.08.084145): 
+If you use **mistyR** for your research please cite the [following publication](https://doi.org/10.1186/s13059-022-02663-5): 
 
-> Jovan Tanevski, Attila Gabor, Ricardo Omar Ramirez Flores, Denis Schapiro, Julio Saez-Rodriguez (2020). Explainable multi-view framework for dissecting inter-cellular signaling from highly multiplexed spatial data. *bioRxiv*. doi: [10.1101/2020.05.08.084145](https://doi.org/10.1101/2020.05.08.084145)
+> Jovan Tanevski, Ricardo Omar Ramirez Flores, Attila Gabor, Denis Schapiro, Julio Saez-Rodriguez. Explainable multiview framework for dissecting spatial relationships from highly multiplexed data. Genome Biology 23, 97 (2022). https://doi.org/10.1186/s13059-022-02663-5
diff --git a/inst/CITATION b/inst/CITATION
@@ -1,15 +1,16 @@
 citEntry(
     entry="article",
     author = c(person("Jovan", "Tanevski"),
+           person("Ricardo Omar", "Ramirez Flores"),
     		   person("Attila", "Gabor"),
-    		   person("Ricardo Omar", "Ramirez Flores"),
     		   person("Denis", "Schapiro"),
-               person("Julio", "Saez-Rodriguez")),
-    title = "Explainable multi-view framework for dissecting inter-cellular signaling from highly multiplexed spatial data.",
-    journal = "bioRxiv",
-    number = "2020.05.08.084145",
-    year = 2020,
-    url = "https://doi.org/10.1101/2020.05.08.084145",
+           person("Julio", "Saez-Rodriguez")),
+    title = "Explainable multiview framework for dissecting spatial relationships from highly multiplexed data",
+    journal = "Genome Biology",
+    volume = "23",
+    number = "97",
+    year = 2022,
+    url = "https://doi.org/10.1186/s13059-022-02663-5",
 
-    textVersion = "Jovan Tanevski, Attila Gabor, Ricardo Omar Ramirez Flores, Denis Schapiro, Julio Saez-Rodriguez (2020). Explainable multi-view framework for dissecting inter-cellular signaling from highly multiplexed spatial data. bioRxiv. doi: 10.1101/2020.05.08.084145"
+    textVersion = "Jovan Tanevski, Ricardo Omar Ramirez Flores, Attila Gabor, Denis Schapiro, Julio Saez-Rodriguez. Explainable multiview framework for dissecting spatial relationships from highly multiplexed data. Genome Biology 23, 97 (2022). https://doi.org/10.1186/s13059-022-02663-5"
 )