updates simulated dataset, revealer method, and unit tests

montilab · Aug 25, 2023 · fefb3d5 · fefb3d5
1 parent ee853bf
commit fefb3d5
Show file tree

Hide file tree

Showing 36 changed files with 504 additions and 405 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -46,8 +46,9 @@ Imports:
 Suggests: 
     BiocStyle,
     devtools,
-    rmarkdown,
     knitr,
+    pheatmap,
+    rmarkdown,
     testthat (>= 3.1.6)
 Config/testthat/edition: 3
 biocViews: Microarray, RNASeq, GeneExpression, Software, FeatureExtraction

diff --git a/R/CCLE_MUT_SCNA.R b/R/CCLE_MUT_SCNA.R
@@ -1,6 +1,6 @@
 #' Genomic Data from CCLE MUT + SCNA
 #'
-#' A SummarizedExperiment object consists of 17,723 genomic features across 
+#' A SummarizedExperiment object consists of 17,724 genomic features across 
 #' 82 samples.
 #'
 #' @docType data
@@ -9,7 +9,7 @@
 #'
 #' @format An object of class \code{SummarizedExperiment} from 
 #' \code{SummarizedExperiment} package
-#' containing a matrix of 17,723 rows (features) and 82 columns (samples).
+#' containing a matrix of 17,724 rows (features) and 82 columns (samples).
 #' See \code{SummarizedExperiment} for more details.
 #' 
 #' @return a SummarizedExperiment object

diff --git a/R/cadra.R b/R/cadra.R
@@ -62,25 +62,32 @@
 #' null distribution of the permuted best scores. Default is \code{TRUE}.
 #' @param ncores an integer specifies the number of cores to perform
 #' parallelization for permutation-based testing. Default is \code{1}.
-#' @param cache_path a full path uses to cache the permuted best scores.
-#' We recycle these scores instead of re-computing them to save time.
-#' Default is \code{NULL}. If NULL, the cache path is set to \code{~/.Rcache}
-#' for future loading.
+#' @param cache a logical value to determine whether or not to cache the 
+#' permuted best scores. This would help save time for future loading instead 
+#' of re-computing the permutation-based testing every time. 
+#' Default is \code{TRUE}.
+#' @param cache_path If cache = TRUE, a full path can be used to cache the 
+#' permuted best scores. Default is \code{NULL}. If NULL, the cache path is
+#' set to \code{~/.Rcache} for future loading.
 #' @param verbose a logical value indicates whether or not to print the
 #' diagnostic messages. Default is \code{FALSE}.
 #'
 #' @return a list of 4 objects: \code{key}, \code{perm_best_scores}, 
 #' \code{obs_best_score}, \code{perm_pval}
-#' \code{key}: a list of parameters that are used to cache the 
+#' 
+#' -\code{key}: a list of parameters that are used to cache the 
 #' results of the permutation-based testing. This is useful as the
-#' permuted scores are recycled to save time for future loading.
-#' \code{perm_best_scores}: a vector of permuted best scores obtained 
+#' permuted best scores can be recycled to save time for future loading.
+#' 
+#' -\code{perm_best_scores}: a vector of permuted best scores obtained 
 #' by performing \code{candidate_search} over \code{n_perm} iterations of 
-#' the permuted input scores.
-#' \code{obs_best_score}: the observed best score obtained by performing
-#' \code{candidate_search} on the given dataset and parameters. This value is 
-#' later used to compare against the permuted best scores 
+#' permuted input scores.
+#' 
+#' -\code{obs_best_score}: the observed best score obtained by performing
+#' \code{candidate_search} on a given dataset and input parameters. This 
+#' value is later used to compare against the permuted best scores 
 #' (\code{perm_best_scores}).
+#' 
 #' \code{perm_pval}: a permutation-based p-value obtained by calculating
 #' sum(perm_best_scores > obs_best_score)/n_perm
 #'
@@ -125,6 +132,7 @@ CaDrA <- function(
     smooth = TRUE,
     plot = TRUE,
     ncores = 1,
+    cache = TRUE,
     cache_path = NULL,
     verbose = FALSE
 ){
@@ -147,15 +155,6 @@ CaDrA <- function(
               (length(ncores)==1 && !is.na(ncores) &&
                  is.numeric(ncores) && ncores > 0) )
 
-  ####### CACHE CHECKING #######
-  if(!is.null(cache_path)){
-    R.cache::setCacheRootPath(cache_path)
-    message("Using provided cache root path: ", cache_path, "")
-  } else{
-    R.cache::setCacheRootPath()
-    message("Setting cache root path as: ", getCacheRootPath(), "\n")
-  }
-
   # Retrieve the original class object of feature set 
   # If FS is a SummarizedExperiment, convert it to a matrix object
   # used its matrix form as a default caching key
@@ -176,8 +175,25 @@ CaDrA <- function(
               search_method = search_method,
               max_size = max_size)
 
-  # Load perm_best_scores with the given key parameters
-  perm_best_scores <- R.cache::loadCache(key)
+  ####### CACHE CHECKING #######
+  if(cache == TRUE){
+
+    if(!is.null(cache_path)){
+      R.cache::setCacheRootPath(cache_path)
+      message("Using provided cache root path: ", cache_path, "")
+    } else{
+      R.cache::setCacheRootPath()
+      message("Setting cache root path as: ", getCacheRootPath(), "\n")
+    }
+
+    # Load perm_best_scores with the given key parameters
+    perm_best_scores <- R.cache::loadCache(key)
+
+  }else{
+
+    perm_best_scores <- NULL
+
+  }
 
   # Start the 'clock' to see how long the process takes
   ptm <- proc.time()
@@ -261,7 +277,6 @@ CaDrA <- function(
           max_size = max_size,
           best_score_only = TRUE,
           do_plot = FALSE,
-          do_check = FALSE,
           verbose = FALSE
         ) 
 
@@ -271,15 +286,20 @@ CaDrA <- function(
       .parallel = parallel,
       .progress = progress)
 
+    # Set up verbose option
+    options(verbose = verbose)
+
     # Extract the permuted best scores
     perm_best_scores <- lapply(
       seq_along(perm_best_scores_l), 
       function(l){ perm_best_scores_l[[l]] }) |> unlist()
 
-    # Save computed scores to cache
-    verbose("Saving to cache...\n")
-    R.cache::saveCache(perm_best_scores, key=key, comment="null_scores()")
-
+    if(cache == TRUE){
+      # Save computed scores to cache
+      verbose("Saving to cache...\n")
+      R.cache::saveCache(perm_best_scores, key=key)
+    }
+
   } # end caching else statement block
 
   # Return to using just a single core
@@ -308,22 +328,25 @@ CaDrA <- function(
       max_size = max_size,
       best_score_only = TRUE,
       do_plot = FALSE,
-      do_check = FALSE,
       verbose = FALSE
     ) |> unlist()
 
   }else{
 
     # Check obs_best_score
-    stopifnot("invalid observed best score (obs_best_score)"=
+    stopifnot("Invalid observed best score (obs_best_score)"=
                 (length(obs_best_score)==1 && !is.na(obs_best_score) &&
                    is.numeric(obs_best_score)))
 
-    verbose("Using provided value of observed best score...\n\n")
+    verbose("Using provided value of observed best score...\n")
+
     obs_best_score <- as.numeric(obs_best_score)
 
   }
 
+  # Set up verbose option
+  options(verbose = verbose)
+
   verbose("Observed score: ", obs_best_score, "\n")
 
   ########### PERMUTATION P-VALUE COMPUTATION ############

diff --git a/R/cadra_functions.R b/R/cadra_functions.R
@@ -185,7 +185,7 @@ check_data_input <- function(
          "names of the FS object.\n")
 
   # Check if the features have either all 0s or 1s values
-  if(any(rowSums(FS_mat) %in% c(0, ncol(FS_mat)) ))
+  if(any(rowSums(FS_mat) %in% c(0, ncol(FS_mat))))
     stop("The FS object has features that are either all 0s or 1s. ",
          "These features must be removed from the FS object as ",
          "they are uninformative.")
@@ -270,10 +270,6 @@ check_top_N <- function(
       stop("Please specify a top_N value that is less than the number of ",
            "features in the FS object.\n")
 
-    if(top_N > 10)
-      warning("top_N value specified is greater than 10. ",
-              "This may result in a longer search time.\n")
-
     # Getting the top N features with the biggest scores
     top_features <- names(rowscore[seq_len(top_N)])
 
@@ -284,14 +280,10 @@ check_top_N <- function(
     }) |> unlist()
 
     verbose("Evaluating search over top ", length(search_feature_index), 
-            " features\n")
+            " feature(s)\n")
 
   }else{
 
-    if(!is.na(top_N) && length(top_N) > 0)
-      warning("Since search_start variable is given, ",
-              "evaluating over top_N value will be ignored.\n")
-
     search_start <- strsplit(as.character(search_start), ",", fixed=TRUE) |>
       unlist() |>
       trimws()

diff --git a/R/calc_rowscore.R b/R/calc_rowscore.R
@@ -211,7 +211,7 @@ calc_rowscore <- function(
     metric <- "pval"   
   }
 
-  # Extract only the method value (no metric info)
+  # Extract only the method value (e.g. ks/wilcox/revealer/custom)
   # based on a given method string
   method <- gsub("_score|_pval", "", method)
 
@@ -254,14 +254,7 @@ calc_rowscore <- function(
     )
   )
 
-  # If there is a returned row score
-  # Re-order FS in a decreasing order (from most to least significant)
-  # This comes in handy when doing the top-N evaluation of
-  # the top N 'best' features
-  if(length(rscores) > 0){
-    rscores <- rscores[order(rscores, decreasing=TRUE)]
-    return(rscores)
-  }
+  return(rscores)
 
 }