Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/RELEASE_3_14'
Browse files Browse the repository at this point in the history
  • Loading branch information
jtanevski committed Oct 27, 2021
2 parents c68db61 + 5ccab86 commit 940be06
Show file tree
Hide file tree
Showing 34 changed files with 1,314 additions and 223 deletions.
13 changes: 8 additions & 5 deletions DESCRIPTION
@@ -1,7 +1,7 @@
Package: mistyR
Type: Package
Title: Multiview Intercellular SpaTial modeling framework
Version: 1.0.3
Version: 1.2.0
Authors@R: c(
person(given = "Jovan",
family = "Tanevski",
Expand All @@ -11,7 +11,10 @@ Authors@R: c(
person(given = "Ricardo Omar",
family = "Ramirez Flores",
role = "ctb",
comment = c(ORCID = "0000-0003-0087-371X")))
comment = c(ORCID = "0000-0003-0087-371X")),
person(given = "Philipp",
family = "Schäfer",
role = "ctb"))
Description: mistyR is an implementation of the Multiview Intercellular
SpaTialmodeling framework (MISTy). MISTy is an explainable machine
learning framework for knowledge extraction and analysis of single-cell,
Expand All @@ -28,10 +31,10 @@ Description: mistyR is an implementation of the Multiview Intercellular
marker expressions. Each MISTy view is then analyzed for its contribution
to the total expression of each marker and is explained in terms of the
interactions with other measurements that led to the observed contribution.
URL: https://github.com/saezlab/mistyR
URL: https://saezlab.github.io/mistyR/
BugReports: https://github.com/saezlab/mistyR/issues
biocViews: Software, BiomedicalInformatics, CellBiology, SystemsBiology,
Regression, DecisionTree, SingleCell
Regression, DecisionTree, SingleCell, Spatial
Depends: R (>= 4.0)
License: GPL-3
Encoding: UTF-8
Expand All @@ -42,5 +45,5 @@ Imports: assertthat, caret, deldir, digest, distances, dplyr, filelock,
Suggests: BiocStyle, covr, future, igraph, knitr, Matrix, progeny, rmarkdown,
sctransform, SingleCellExperiment, SpatialExperiment, SummarizedExperiment,
testthat (>= 3.0.0)
RoxygenNote: 7.1.1
RoxygenNote: 7.1.2
Config/testthat/edition: 3
4 changes: 4 additions & 0 deletions NAMESPACE
Expand Up @@ -8,14 +8,18 @@ export(clear_cache)
export(collect_results)
export(create_initial_view)
export(create_view)
export(extract_signature)
export(filter_views)
export(plot_contrast_heatmap)
export(plot_contrast_results)
export(plot_improvement_stats)
export(plot_interaction_communities)
export(plot_interaction_heatmap)
export(plot_view_contributions)
export(remove_views)
export(rename_view)
export(run_misty)
export(select_markers)
importFrom(dplyr,"%>%")
importFrom(rlang,"!!")
importFrom(rlang,":=")
Expand Down
19 changes: 18 additions & 1 deletion NEWS.md
@@ -1,3 +1,20 @@
# mistyR 1.2.0

- Release version for Bioconductor 3.14. See changes for 1.1.x.

# mistyR 1.1.x

- Added funtions for view manipulation, including view filtering and marker selection.
- Added functions for performance, contribution and importance signature extraction from results.
- Aggregation and signature generation is generalized for samples with non-identical targets by working on the intersection.
- Modeling of intraview can be bypassed.
- Added families of distances to calculate paraview.
- Paraview can exlude measurements within a used defined zone of indifference around each spatial unit.
- Improved plotting control.
- Complete test coverage.

**IMPORTANT**: R2 is now reported in percentages for intra, multi and gain. Collecting results from running mistyR < 1.1.11 will lead to miscalcuation of gain.R2. Update the performance.txt files by multiplying the values in columns intra.R2 and multi.R2 by 100.

# mistyR 1.0.3

- Fixed display of messages and progress during view generation.
Expand Down Expand Up @@ -52,4 +69,4 @@

# mistyR 0.1.0 (MISTy)

- Initial beta release of mistyR (named as MISTy) with function documentation.
- Initial beta release of mistyR (named as MISTy) with function documentation.
2 changes: 1 addition & 1 deletion R/data.R
@@ -1,5 +1,5 @@
# mistyR companion data
# Copyright (c) 2020 Jovan Tanevski, Attila Gabor <attila.gabor@uni-heidelberg.de>
# Copyleft (ɔ) 2020 Jovan Tanevski, Attila Gabor <attila.gabor@uni-heidelberg.de>

#' Synthetic benchmark data for mistyR
#'
Expand Down
52 changes: 41 additions & 11 deletions R/misty.R
@@ -1,5 +1,5 @@
# MISTy runner
# Copyright (c) 2020 Jovan Tanevski [jovan.tanevski@uni-heidelberg.de]
# Copyleft (ɔ) 2020 Jovan Tanevski [jovan.tanevski@uni-heidelberg.de]


#' @importFrom rlang !! := .data
Expand All @@ -20,6 +20,12 @@ dplyr::`%>%`
#' the contributions of the view specific models and the importance of predictor
#' markers for each target marker.
#'
#' If \code{bypass.intra} is set to \code{TRUE} all variable in the intraview
#' the intraview data will be treated as targets only. The baseline intraview
#' model in this case is a trivial model that predicts the average of each
#' target. If the intraview has only one variable this switch is automatically
#' set to \code{TRUE}.
#'
#' Default values passed to \code{\link[ranger]{ranger}()} for training the
#' view-specific models: \code{num.trees = 100}, \code{importance = "impurity"},
#' \code{num.threads = 1}, \code{seed = seed}.
Expand All @@ -29,6 +35,8 @@ dplyr::`%>%`
#' @param seed seed used for random sampling to ensure reproducibility.
#' @param target.subset subset of targets to train models for. If \code{NULL},
#' models will be trained for markers in the intraview.
#' @param bypass.intra a \code{logical} indicating whether to train a baseline
#' model using the intraview data (see Details).
#' @param cv.folds number of cross-validation folds to consider for estimating
#' the performance of the multi-view models.
#' @param cached a \code{logical} indicating whether to cache the trained models
Expand Down Expand Up @@ -66,8 +74,8 @@ dplyr::`%>%`
#' run_misty(misty.views)
#' @export
run_misty <- function(views, results.folder = "results", seed = 42,
target.subset = NULL, cv.folds = 10, cached = FALSE,
append = FALSE, ...) {
target.subset = NULL, bypass.intra = FALSE, cv.folds = 10,
cached = FALSE, append = FALSE, ...) {
normalized.results.folder <- R.utils::getAbsolutePath(results.folder)

if (!dir.exists(normalized.results.folder)) {
Expand All @@ -93,18 +101,34 @@ run_misty <- function(views, results.folder = "results", seed = 42,
msg = "The data has less rows than the requested number of cv folds."
)

target.var <- apply(expr, 2, stats::sd)
if (ncol(expr) == 1) bypass.intra <- TRUE

target.var <- apply(expr, 2, stats::sd, na.rm = TRUE)

if (any(target.var == 0)) {
warning.message <- paste(
assertthat::assert_that(!any(target.var == 0),
msg = paste(
"Targets",
paste(names(which(target.var == 0)),
collapse = ", "
),
"have zero variance."
)
warning(warning.message)
}
)

target.unique <- colnames(expr) %>%
purrr::set_names() %>%
purrr::map_int(~ length(unique(expr %>% dplyr::pull(.x))))

assertthat::assert_that(all(target.unique >= cv.folds),
msg = paste(
"Targets",
paste(names(which(target.unique < cv.folds)),
collapse = ", "
),
"have fewer unique values than cv.folds"
)
)


coef.file <- paste0(
normalized.results.folder, .Platform$file.sep,
Expand Down Expand Up @@ -148,17 +172,23 @@ run_misty <- function(views, results.folder = "results", seed = 42,
NULL
)

message("Training models")
message("\nTraining models")
targets %>% furrr::future_map_chr(function(target, ...) {
target.model <- build_model(views, target, seed, cv.folds, cached, ...)
target.model <- build_model(
views, target, bypass.intra,
seed, cv.folds, cached, ...
)

combined.views <- target.model[["meta.model"]]

model.summary <- summary(combined.views)

# coefficient values and p-values
# WARNING: hardcoded column index
coeff <- c(model.summary$coefficients[, 1], model.summary$coefficients[, 4])
coeff <- c(
if (bypass.intra) 0, stats::coef(combined.views),
if (bypass.intra) 1, model.summary$coefficients[, 4]
)

current.lock <- filelock::lock(coef.lock)
write(paste(target, paste(coeff, collapse = " ")),
Expand Down
43 changes: 27 additions & 16 deletions R/models.R
@@ -1,5 +1,5 @@
# mistyR model training functions
# Copyright (c) 2020 Jovan Tanevski <jovan.tanevski@uni-heidelberg.de>
# Copyleft (ɔ) 2020 Jovan Tanevski <jovan.tanevski@uni-heidelberg.de>

#' Train a multi-view model for a single target
#'
Expand All @@ -19,8 +19,9 @@
#' view-specific models and performance estimates.
#'
#' @noRd
build_model <- function(views, target, seed = 42, cv.folds = 10, cached = FALSE,
...) {
build_model <- function(views, target, bypass.intra = FALSE, seed = 42,
cv.folds = 10, cached = FALSE, ...) {

cache.location <- R.utils::getAbsolutePath(paste0(
".misty.temp", .Platform$file.sep,
views[["misty.uniqueid"]]
Expand All @@ -34,7 +35,6 @@ build_model <- function(views, target, seed = 42, cv.folds = 10, cached = FALSE,

target.vector <- expr %>% dplyr::pull(target)


# merge ellipsis with default algorithm arguments
algo.arguments <- list(
num.trees = 100, importance = "impurity",
Expand All @@ -43,9 +43,10 @@ build_model <- function(views, target, seed = 42, cv.folds = 10, cached = FALSE,
)

ellipsis.args <- list(...)
ellipsis.args.text <- paste(names(ellipsis.args), ellipsis.args,
sep = ".", collapse = ".")

ellipsis.args.text <- paste(names(ellipsis.args), ellipsis.args,
sep = ".", collapse = "."
)

if (!(length(ellipsis.args) == 0)) {
algo.arguments <- rlist::list.merge(algo.arguments, ellipsis.args)
}
Expand All @@ -57,18 +58,25 @@ build_model <- function(views, target, seed = 42, cv.folds = 10, cached = FALSE,
model.view.cache.file <-
paste0(
cache.location, .Platform$file.sep,
"model.", view[["abbrev"]], ".", target,
"model.", view[["abbrev"]], ".", target,
".par", ellipsis.args.text, ".rds"
)

if (file.exists(model.view.cache.file) & cached) {
model.view <- readr::read_rds(model.view.cache.file)
} else {
if ((view[["abbrev"]] == "intra") & bypass.intra) {
transformed.view.data <-
tibble::tibble(!!target := target.vector, ".novar" := 0)
} else {
transformed.view.data <- view[["data"]] %>%
dplyr::mutate(!!target := target.vector)
}

model.view <- do.call(
ranger::ranger,
c(
list(data = (view[["data"]] %>%
dplyr::mutate(!!target := target.vector))),
list(data = transformed.view.data),
algo.arguments
)
)
Expand All @@ -87,9 +95,12 @@ build_model <- function(views, target, seed = 42, cv.folds = 10, cached = FALSE,
tibble::as_tibble(.name_repair = make.names) %>%
dplyr::mutate(!!target := target.vector)

# train lm on above
# train lm on above, if bypass.intra set intercept to 0
formula <- stats::as.formula(
ifelse(bypass.intra, paste0(target, " ~ 0 + ."), paste0(target, " ~ ."))
)
combined.views <- stats::lm(
stats::as.formula(paste0(target, "~.")),
formula,
oob.predictions
)

Expand All @@ -106,11 +117,11 @@ build_model <- function(views, target, seed = 42, cv.folds = 10, cached = FALSE,

performance.estimate <- test.folds %>% purrr::map_dfr(function(test.fold) {
meta.intra <- stats::lm(
stats::as.formula(paste0(target, "~.")),
formula,
intra.view.only %>% dplyr::slice(-test.fold)
)
meta.multi <- stats::lm(
stats::as.formula(paste0(target, "~.")),
formula,
oob.predictions %>% dplyr::slice(-test.fold)
)

Expand All @@ -130,8 +141,8 @@ build_model <- function(views, target, seed = 42, cv.folds = 10, cached = FALSE,
)

tibble::tibble(
intra.RMSE = intra.RMSE, intra.R2 = intra.R2,
multi.RMSE = multi.RMSE, multi.R2 = multi.R2
intra.RMSE = intra.RMSE, intra.R2 = 100*intra.R2,
multi.RMSE = multi.RMSE, multi.R2 = 100*multi.R2
)
})

Expand Down

0 comments on commit 940be06

Please sign in to comment.