Skip to content

Commit

Permalink
updates simulated dataset, revealer method, and unit tests
Browse files Browse the repository at this point in the history
  • Loading branch information
RC-88 committed Aug 25, 2023
1 parent ee853bf commit fefb3d5
Show file tree
Hide file tree
Showing 36 changed files with 504 additions and 405 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,9 @@ Imports:
Suggests:
BiocStyle,
devtools,
rmarkdown,
knitr,
pheatmap,
rmarkdown,
testthat (>= 3.1.6)
Config/testthat/edition: 3
biocViews: Microarray, RNASeq, GeneExpression, Software, FeatureExtraction
Expand Down
4 changes: 2 additions & 2 deletions R/CCLE_MUT_SCNA.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#' Genomic Data from CCLE MUT + SCNA
#'
#' A SummarizedExperiment object consists of 17,723 genomic features across
#' A SummarizedExperiment object consists of 17,724 genomic features across
#' 82 samples.
#'
#' @docType data
Expand All @@ -9,7 +9,7 @@
#'
#' @format An object of class \code{SummarizedExperiment} from
#' \code{SummarizedExperiment} package
#' containing a matrix of 17,723 rows (features) and 82 columns (samples).
#' containing a matrix of 17,724 rows (features) and 82 columns (samples).
#' See \code{SummarizedExperiment} for more details.
#'
#' @return a SummarizedExperiment object
Expand Down
83 changes: 53 additions & 30 deletions R/cadra.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,25 +62,32 @@
#' null distribution of the permuted best scores. Default is \code{TRUE}.
#' @param ncores an integer specifies the number of cores to perform
#' parallelization for permutation-based testing. Default is \code{1}.
#' @param cache_path a full path uses to cache the permuted best scores.
#' We recycle these scores instead of re-computing them to save time.
#' Default is \code{NULL}. If NULL, the cache path is set to \code{~/.Rcache}
#' for future loading.
#' @param cache a logical value to determine whether or not to cache the
#' permuted best scores. This would help save time for future loading instead
#' of re-computing the permutation-based testing every time.
#' Default is \code{TRUE}.
#' @param cache_path If cache = TRUE, a full path can be used to cache the
#' permuted best scores. Default is \code{NULL}. If NULL, the cache path is
#' set to \code{~/.Rcache} for future loading.
#' @param verbose a logical value indicates whether or not to print the
#' diagnostic messages. Default is \code{FALSE}.
#'
#' @return a list of 4 objects: \code{key}, \code{perm_best_scores},
#' \code{obs_best_score}, \code{perm_pval}
#' \code{key}: a list of parameters that are used to cache the
#'
#' -\code{key}: a list of parameters that are used to cache the
#' results of the permutation-based testing. This is useful as the
#' permuted scores are recycled to save time for future loading.
#' \code{perm_best_scores}: a vector of permuted best scores obtained
#' permuted best scores can be recycled to save time for future loading.
#'
#' -\code{perm_best_scores}: a vector of permuted best scores obtained
#' by performing \code{candidate_search} over \code{n_perm} iterations of
#' the permuted input scores.
#' \code{obs_best_score}: the observed best score obtained by performing
#' \code{candidate_search} on the given dataset and parameters. This value is
#' later used to compare against the permuted best scores
#' permuted input scores.
#'
#' -\code{obs_best_score}: the observed best score obtained by performing
#' \code{candidate_search} on a given dataset and input parameters. This
#' value is later used to compare against the permuted best scores
#' (\code{perm_best_scores}).
#'
#' \code{perm_pval}: a permutation-based p-value obtained by calculating
#' sum(perm_best_scores > obs_best_score)/n_perm
#'
Expand Down Expand Up @@ -125,6 +132,7 @@ CaDrA <- function(
smooth = TRUE,
plot = TRUE,
ncores = 1,
cache = TRUE,
cache_path = NULL,
verbose = FALSE
){
Expand All @@ -147,15 +155,6 @@ CaDrA <- function(
(length(ncores)==1 && !is.na(ncores) &&
is.numeric(ncores) && ncores > 0) )

####### CACHE CHECKING #######
if(!is.null(cache_path)){
R.cache::setCacheRootPath(cache_path)
message("Using provided cache root path: ", cache_path, "")
} else{
R.cache::setCacheRootPath()
message("Setting cache root path as: ", getCacheRootPath(), "\n")
}

# Retrieve the original class object of feature set
# If FS is a SummarizedExperiment, convert it to a matrix object
# used its matrix form as a default caching key
Expand All @@ -176,8 +175,25 @@ CaDrA <- function(
search_method = search_method,
max_size = max_size)

# Load perm_best_scores with the given key parameters
perm_best_scores <- R.cache::loadCache(key)
####### CACHE CHECKING #######
if(cache == TRUE){

if(!is.null(cache_path)){
R.cache::setCacheRootPath(cache_path)
message("Using provided cache root path: ", cache_path, "")
} else{
R.cache::setCacheRootPath()
message("Setting cache root path as: ", getCacheRootPath(), "\n")
}

# Load perm_best_scores with the given key parameters
perm_best_scores <- R.cache::loadCache(key)

}else{

perm_best_scores <- NULL

}

# Start the 'clock' to see how long the process takes
ptm <- proc.time()
Expand Down Expand Up @@ -261,7 +277,6 @@ CaDrA <- function(
max_size = max_size,
best_score_only = TRUE,
do_plot = FALSE,
do_check = FALSE,
verbose = FALSE
)

Expand All @@ -271,15 +286,20 @@ CaDrA <- function(
.parallel = parallel,
.progress = progress)

# Set up verbose option
options(verbose = verbose)

# Extract the permuted best scores
perm_best_scores <- lapply(
seq_along(perm_best_scores_l),
function(l){ perm_best_scores_l[[l]] }) |> unlist()

# Save computed scores to cache
verbose("Saving to cache...\n")
R.cache::saveCache(perm_best_scores, key=key, comment="null_scores()")

if(cache == TRUE){
# Save computed scores to cache
verbose("Saving to cache...\n")
R.cache::saveCache(perm_best_scores, key=key)
}

} # end caching else statement block

# Return to using just a single core
Expand Down Expand Up @@ -308,22 +328,25 @@ CaDrA <- function(
max_size = max_size,
best_score_only = TRUE,
do_plot = FALSE,
do_check = FALSE,
verbose = FALSE
) |> unlist()

}else{

# Check obs_best_score
stopifnot("invalid observed best score (obs_best_score)"=
stopifnot("Invalid observed best score (obs_best_score)"=
(length(obs_best_score)==1 && !is.na(obs_best_score) &&
is.numeric(obs_best_score)))

verbose("Using provided value of observed best score...\n\n")
verbose("Using provided value of observed best score...\n")

obs_best_score <- as.numeric(obs_best_score)

}

# Set up verbose option
options(verbose = verbose)

verbose("Observed score: ", obs_best_score, "\n")

########### PERMUTATION P-VALUE COMPUTATION ############
Expand Down
12 changes: 2 additions & 10 deletions R/cadra_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ check_data_input <- function(
"names of the FS object.\n")

# Check if the features have either all 0s or 1s values
if(any(rowSums(FS_mat) %in% c(0, ncol(FS_mat)) ))
if(any(rowSums(FS_mat) %in% c(0, ncol(FS_mat))))
stop("The FS object has features that are either all 0s or 1s. ",
"These features must be removed from the FS object as ",
"they are uninformative.")
Expand Down Expand Up @@ -270,10 +270,6 @@ check_top_N <- function(
stop("Please specify a top_N value that is less than the number of ",
"features in the FS object.\n")

if(top_N > 10)
warning("top_N value specified is greater than 10. ",
"This may result in a longer search time.\n")

# Getting the top N features with the biggest scores
top_features <- names(rowscore[seq_len(top_N)])

Expand All @@ -284,14 +280,10 @@ check_top_N <- function(
}) |> unlist()

verbose("Evaluating search over top ", length(search_feature_index),
" features\n")
" feature(s)\n")

}else{

if(!is.na(top_N) && length(top_N) > 0)
warning("Since search_start variable is given, ",
"evaluating over top_N value will be ignored.\n")

search_start <- strsplit(as.character(search_start), ",", fixed=TRUE) |>
unlist() |>
trimws()
Expand Down
11 changes: 2 additions & 9 deletions R/calc_rowscore.R
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ calc_rowscore <- function(
metric <- "pval"
}

# Extract only the method value (no metric info)
# Extract only the method value (e.g. ks/wilcox/revealer/custom)
# based on a given method string
method <- gsub("_score|_pval", "", method)

Expand Down Expand Up @@ -254,14 +254,7 @@ calc_rowscore <- function(
)
)

# If there is a returned row score
# Re-order FS in a decreasing order (from most to least significant)
# This comes in handy when doing the top-N evaluation of
# the top N 'best' features
if(length(rscores) > 0){
rscores <- rscores[order(rscores, decreasing=TRUE)]
return(rscores)
}
return(rscores)

}

Expand Down

0 comments on commit fefb3d5

Please sign in to comment.