Skip to content

Commit

Permalink
remove donttest from CaDrA
Browse files Browse the repository at this point in the history
  • Loading branch information
RC-88 committed May 31, 2023
1 parent 50991dd commit 008d229
Show file tree
Hide file tree
Showing 10 changed files with 102 additions and 104 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: CaDrA
Type: Package
Title: Candidate Driver Analysis
Version: 0.99.5
Version: 0.99.6
Date: 2022-11-20
Authors@R:
c(person(given="Reina", family="Chau", role=c("aut","cre"),
Expand Down
82 changes: 43 additions & 39 deletions R/cadra.R
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,6 @@
#' sum(perm_best_scores > obs_best_score)/n_perm
#'
#' @examples
#'
#'\donttest{
#'
#' # Load pre-computed feature set
#' data(sim_FS)
Expand All @@ -98,7 +96,6 @@
#' set.seed(21)
#'
#' # Define additional parameters and start the function
#' # DONNOT RUN as this would take some time
#' cadra_result <- CaDrA(
#' FS = sim_FS, input_score = sim_Scores, method = "ks_pval",
#' weight = NULL, alternative = "less", top_N = 1,
Expand All @@ -107,8 +104,6 @@
#' ncores = 1, cache_path = NULL
#' )
#'
#'}
#'
#' @export
#' @import R.cache doParallel ggplot2 plyr methods SummarizedExperiment
#'
Expand All @@ -133,25 +128,25 @@ CaDrA <- function(
cache_path = NULL,
verbose = FALSE
){

# Set up verbose option
options(verbose = verbose)

# Match arguments
method <- match.arg(method)
alternative <- match.arg(alternative)
search_method <- match.arg(search_method)

# Check n_perm
stopifnot("invalid number of permutations (nperm)"=
(length(n_perm)==1 && !is.na(n_perm) &&
is.numeric(n_perm) && n_perm > 0) )

# Check ncores
stopifnot("invalid number of CPU cores (ncores)"=
(length(ncores)==1 && !is.na(ncores) &&
is.numeric(ncores) && ncores > 0) )

####### CACHE CHECKING #######
if(!is.null(cache_path)){
R.cache::setCacheRootPath(cache_path)
Expand Down Expand Up @@ -183,31 +178,31 @@ CaDrA <- function(

# Load perm_best_scores with the given key parameters
perm_best_scores <- R.cache::loadCache(key)

# Start the 'clock' to see how long the process takes
ptm <- proc.time()

# Check if, given the dataset and search-specific parameters,
# there is already a cached null distribution available
n_perm <- as.integer(n_perm)

if(!is.null(perm_best_scores) & (length(perm_best_scores) >= n_perm)){

if(length(perm_best_scores) == n_perm){
message("Found ", length(perm_best_scores),
verbose("Found ", length(perm_best_scores),
" permutated scores for the specified dataset",
" and search parameters in cache path\n")
message("LOADING PERMUTATED SCORES FROM CACHE\n")
verbose("LOADING PERMUTATED SCORES FROM CACHE\n")
}else{
message("n_perm is set to ", n_perm, " but found ",
verbose("n_perm is set to ", n_perm, " but found ",
length(perm_best_scores),
" permutated scores for the specified dataset",
" and search parameters in cache path\n")
message("LOADING LARGER PERMUTATED SCORES FROM CACHE\n")
verbose("LOADING LARGER PERMUTATED SCORES FROM CACHE\n")
}

}else{

if(is.null(perm_best_scores)){
verbose("No permutated scores for the specified dataset and ",
"search parameters were found in cache path\n")
Expand All @@ -220,23 +215,23 @@ CaDrA <- function(
verbose("RE-COMPUTE PERMUTATION-BASED TESTINGS ",
"WITH LARGER NUMBER OF PERMUTATIONS\n")
}

#######################################################################

# Check ncores
ncores <- as.integer(ncores)

# Sets up the parallel backend which will be utilized by Plyr.
parallel <- FALSE
progress <- "text"

if(ncores > 1){
doParallel::registerDoParallel(cores = ncores)
parallel <- TRUE
progress <- "none"
verbose("Running tests in parallel...")
}

# Generate matrix of permuted input_score
perm_labels_matrix <- generate_permutations(
input_score = input_score,
Expand All @@ -248,8 +243,10 @@ CaDrA <- function(
perm_labels_matrix,
1,
function(x){

perm_input_score <- x
names(perm_input_score) <- colnames(perm_labels_matrix)

best_score <- candidate_search(
FS = FS,
input_score = perm_input_score,
Expand All @@ -267,7 +264,9 @@ CaDrA <- function(
do_check = FALSE,
verbose = FALSE
)

return(best_score)

},
.parallel = parallel,
.progress = progress)
Expand All @@ -288,13 +287,13 @@ CaDrA <- function(

verbose("FINISHED\n")
verbose("Time elapsed: ", round((proc.time()-ptm)[3]/60, 2), " mins \n\n")

#########################################################################

if(is.null(obs_best_score)){

verbose("Computing observed best score...\n\n")

obs_best_score <- candidate_search(
FS = FS,
input_score = input_score,
Expand All @@ -312,9 +311,9 @@ CaDrA <- function(
do_check = FALSE,
verbose = FALSE
) |> unlist()

}else{

# Check obs_best_score
stopifnot("invalid observed best score (obs_best_score)"=
(length(obs_best_score)==1 && !is.na(obs_best_score) &&
Expand All @@ -324,19 +323,19 @@ CaDrA <- function(
obs_best_score <- as.numeric(obs_best_score)

}

verbose("Observed score: ", obs_best_score, "\n")

########### PERMUTATION P-VALUE COMPUTATION ############

#Add a smoothing factor of 1 if smooth is specified
#This is just to not return a p-value of 0
c <- 0
if(smooth) c <- 1

perm_pval <- (sum(perm_best_scores > obs_best_score) + c)/
(length(perm_best_scores) + c)

verbose("Permutation p-value: ", perm_pval, "\n")
verbose("Number of permutations: ", length(perm_best_scores), "\n")

Expand All @@ -347,12 +346,17 @@ CaDrA <- function(
obs_best_score = obs_best_score,
perm_pval = perm_pval
)

# If plot = TRUE, produce the permutation plot
if(plot == TRUE){
permutation_plot(perm_res = perm_res)
}

return(perm_res)

}





2 changes: 1 addition & 1 deletion R/cadra_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,7 @@ generate_permutations <- function(
# Create permutation matrix
perm <- matrix(NA, nrow=n_perm, ncol=n)
colnames(perm) <- names(input_score)
rownames(perm) <- 1:n_perm

# Sample the input scores
for(i in seq_len(n_perm)){
Expand All @@ -379,4 +380,3 @@ generate_permutations <- function(
return(perm)

}

16 changes: 9 additions & 7 deletions README.Rmd
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@

---
output: rmarkdown::github_document
title: CaDrA
---

<!-- README.md is generated from README.Rmd. Please edit that file -->
Expand All @@ -10,12 +9,15 @@ title: CaDrA
knitr::opts_chunk$set(fig.path="./man/figures/", message=FALSE, collapse = TRUE, comment="")
library(SummarizedExperiment)
library(devtools)
load_all(".")
load_all()
```

<!-- badges: start -->
<!-- [![R-CMD-check](https://github.com/montilab/CaDrA/workflows/R-CMD-check/badge.svg)](https://github.com/montilab/CaDrA/actions) -->
<!-- badges: end -->
# CaDrA

![build](https://github.com/montilab/cadra/workflows/rcmdcheck/badge.svg)
![Gitter](https://img.shields.io/gitter/room/montilab/cadra)
![GitHub issues](https://img.shields.io/github/issues/montilab/cadra)
![GitHub last commit](https://img.shields.io/github/last-commit/montilab/cadra)

**Ca**ndidate **Dr**ivers **A**nalysis: Multi-Omic Search for Candidate Drivers of Functional Signatures

Expand All @@ -24,7 +26,7 @@ load_all(".")
The main function takes two inputs:

i) A binary multi-omics dataset, which can be represented as a matrix of binary features or a **SummarizedExperiment** class object where the rows are 1/0 vectors indicating the presence/absence of ‘omics’ features (e.g. somatic mutations, copy number alterations, epigenetic marks, etc.), and the columns are the samples.
ii) A molecular phenotype of interest, which can be represented as a vector of continuous scores (e.g. protein expression, pathway activity, etc.)
ii) A molecular phenotype of interest which can be represented as a vector of continuous scores (e.g. protein expression, pathway activity, etc.)

Based on these two inputs, **CaDrA** implements a forward/backward search algorithm to find a set of features that together is maximally associated with the observed input scores, based on one of several scoring functions (*Kolmogorov-Smirnov*, *Conditional Mutual Information*, *Wilcoxon*, *custom-defined scoring function*), making it useful to find complementary omics features likely driving the input molecular phenotype.

Expand Down Expand Up @@ -79,7 +81,7 @@ eset_mut_scna_flt <- CaDrA::prefilter_data(

Here, we repeat the candidate search starting from each of the top 'N' features and report the combined results as a heatmap (to summarize the number of times each feature is selected across repeated runs).

IMPORTANT NOTE: The legacy function `topn_eval()` is equivalent to the recommended `candidate_search()` function
IMPORTANT NOTE: The legacy function `topn_eval()` is equivalent to the new recommended `candidate_search()` function

```{r cadra}
Expand Down

0 comments on commit 008d229

Please sign in to comment.