fix candidate_search to account for revealer method

montilab · Jul 14, 2023 · e8fa2d8 · e8fa2d8
1 parent 3e83492
commit e8fa2d8
Show file tree

Hide file tree

Showing 30 changed files with 543 additions and 451 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: CaDrA
 Type: Package
 Title: Candidate Driver Analysis
-Version: 0.99.6
+Version: 0.99.7
 Date: 2022-11-20
 Authors@R: 
     c(person(given="Reina", family="Chau", role=c("aut","cre"), 

diff --git a/R/cadra.R b/R/cadra.R
@@ -33,7 +33,7 @@
 #' testing (\code{"two.sided"} or \code{"greater"} or \code{"less"}).
 #' Default is \code{less} for left-skewed significance testing.
 #' NOTE: this argument only apply to KS and Wilcoxon method
-#' @param weight if method is \code{ks_score}, specifies a vector of weights 
+#' @param weights if method is \code{ks_score}, specifies a vector of weights 
 #' to perform a weighted-KS testing. Default is \code{NULL}.
 #' @param top_N an integer specifies the number of features to start the
 #' search over. By default, it starts from the top best feature (top_N = 1).
@@ -72,12 +72,12 @@
 #' @return a list of 4 objects: \code{key}, \code{perm_best_scores}, 
 #' \code{obs_best_score}, \code{perm_pval}
 #' \code{key}: a list of parameters that are used to cache the 
-#' result of the permutation-based testing. This is useful as the
+#' results of the permutation-based testing. This is useful as the
 #' permuted scores are recycled to save time for future loading.
 #' \code{perm_best_scores}: a vector of permuted best scores obtained 
-#' by performing \code{candidate_search} over (\code{n_perm}) iterations of 
-#' permuted input scores.
-#' \code{obs_best_score}: the observed best score is calculated by performing
+#' by performing \code{candidate_search} over \code{n_perm} iterations of 
+#' the permuted input scores.
+#' \code{obs_best_score}: the observed best score obtained by performing
 #' \code{candidate_search} on the given dataset and parameters. This value is 
 #' later used to compare against the permuted best scores 
 #' (\code{perm_best_scores}).
@@ -98,7 +98,7 @@
 #' # Define additional parameters and start the function
 #' cadra_result <- CaDrA(
 #'   FS = sim_FS, input_score = sim_Scores, method = "ks_pval", 
-#'   weight = NULL, alternative = "less", top_N = 1,
+#'   weights = NULL, alternative = "less", top_N = 1,
 #'   search_start = NULL, search_method = "both", max_size = 7, 
 #'   n_perm = 10, plot = FALSE, smooth = TRUE, obs_best_score = NULL,
 #'   ncores = 1, cache_path = NULL
@@ -115,7 +115,7 @@ CaDrA <- function(
     custom_function = NULL,
     custom_parameters = NULL,
     alternative = c("less", "greater", "two.sided"),
-    weight = NULL,
+    weights = NULL,
     top_N = 1,
     search_start = NULL,
     search_method = c("both", "forward"),
@@ -170,7 +170,7 @@ CaDrA <- function(
               custom_function = custom_function,
               custom_parameters = custom_parameters,
               alternative = alternative,
-              weight = weight,
+              weights = weights,
               top_N = top_N,
               search_start = search_start,
               search_method = search_method,
@@ -254,7 +254,7 @@ CaDrA <- function(
           custom_function = custom_function,
           custom_parameters = custom_parameters,
           alternative = alternative,
-          weight = weight,
+          weights = weights,
           top_N = top_N,
           search_start = search_start,
           search_method = search_method,
@@ -301,7 +301,7 @@ CaDrA <- function(
       custom_function = custom_function,
       custom_parameters = custom_parameters,
       alternative = alternative,
-      weight = weight,
+      weights = weights,
       top_N = top_N,
       search_start = search_start,
       search_method = search_method,

diff --git a/R/cadra_functions.R b/R/cadra_functions.R
@@ -91,6 +91,9 @@ prefilter_data <- function(
 #' interest such as protein expression, pathway activity, etc.
 #' NOTE: The \code{input_score} object must have names or labels that 
 #' match the column names of FS_mat object.
+#' @param seed_names a vector of one or more features representing known causes
+#' of activation or features associated with a response of interest.
+#' Default is NULL.
 #' @param do_check a logical value indicates whether or not to validate if the  
 #' given parameters (FS_mat and input_score) are valid inputs. 
 #' Default is \code{TRUE}
@@ -112,16 +115,19 @@ prefilter_data <- function(
 #' input_score = rnorm(n = ncol(FS_mat))
 #' names(input_score) <- colnames(FS_mat)
 #'
+#' # Check data inputs
 #' check_data_input(
 #'  FS_mat = FS_mat,
-#'  input_score = input_score
+#'  input_score = input_score,
+#'  seed_names = NULL
 #' )
 #' 
 #' @return If do_check=FALSE, return NULL, otherwise, check if FS_mat and 
 #' input_score are valid inputs 
 check_data_input <- function(
     FS_mat,
     input_score,
+    seed_names = NULL,
     do_check = TRUE
 ){
 
@@ -159,6 +165,12 @@ check_data_input <- function(
          "These features must be removed from the FS object as ",
          "they are uninformative.")
 
+  if(length(seed_names) > 0 && any(!seed_names %in% rownames(FS_mat)))
+    stop("The provided feature(s): ", 
+         paste0(seed_names[which(!seed_names %in% rownames(FS_mat))], 
+                collapse=", "),
+         " do(es) not exist among the row names of the FS object.\n")
+
 }
 
 
@@ -200,7 +212,7 @@ check_data_input <- function(
 #'  input_score = sim_Scores,
 #'  method = "ks_pval",
 #'  alternative = "less",
-#'  weight = NULL
+#'  weights = NULL
 #' )
 #' 
 #' top_N_index <- check_top_N(
@@ -211,10 +223,10 @@ check_data_input <- function(
 #' )
 #'
 #' @return If top_N is given, a vector of indices of top N features with 
-#' the best scores will be returned and used to start the candidate_search() 
-#' with. 
-#' Otherwise, the candidate_search() will start the search with a list of  
-#' indices of starting features defined in search_start.
+#' their corresponding best scores will be returned. These features will later 
+#' use to start the candidate_search() with. Otherwise, the candidate_search() 
+#' will start the heuristic search with a list of indices of starting features 
+#' defined in search_start.
 check_top_N <- function(
     rowscore, 
     feature_names,
@@ -251,29 +263,29 @@ check_top_N <- function(
 
   }else{
 
-    search_start <- strsplit(as.character(search_start), ",", fixed=TRUE) |>
-      unlist() |>
-      trimws()
-
     if(!is.na(top_N) && length(top_N) > 0)
       warning("Since search_start variable is given, ",
               "evaluating over top_N value will be ignored.\n")
 
-    # User-specified feature names
-    verbose("Starting with specified feature names...\n")
+    search_start <- strsplit(as.character(search_start), ",", fixed=TRUE) |>
+      unlist() |>
+      trimws()
 
     if(length(search_start) == 0 || any(!search_start %in% feature_names))
-      stop("The provided starting features: ", 
+      stop("The provided starting feature(s): ", 
            paste0(search_start[which(!search_start %in% feature_names)], 
                   collapse=", "),
-           " does not exist among the row names of FS object.\n")
+           " do(es) not exist among the row names of the FS object.\n")
 
     # Get the index of the search_start strings
     search_feature_index <- lapply(seq_along(search_start), function(f){
       #f=1;
       which(feature_names == search_start[f])
     }) |> unlist()
 
+    # User-specified feature names
+    verbose("Starting with specified feature names...\n")
+
   }
 
   return(search_feature_index)
@@ -366,7 +378,6 @@ generate_permutations <- function(
   # Create permutation matrix
   perm <- matrix(NA, nrow=n_perm, ncol=n)
   colnames(perm) <- names(input_score)
-  rownames(perm) <- seq_along(n_perm)
 
   # Sample the input scores
   for(i in seq_len(n_perm)){

diff --git a/R/calc_rowscore.R b/R/calc_rowscore.R
@@ -11,7 +11,10 @@
 #' readout of interest such as protein expression, pathway activity, etc.
 #' 
 #' NOTE: \code{input_score} object must have names or labels that match the 
-#' column names of \code{FS_mat} object.
+#' column names of \code{FS} object.
+#' @param seed_names a vector of one or more features representing known 
+#' “causes” of activation or features associated with a response of interest.
+#' It is applied for \code{method = "revealer"} only.
 #' @param method a character string specifies a scoring method that is
 #' used in the search. There are 6 options: (\code{"ks_pval"} or \code{ks_score}
 #' or \code{"wilcox_pval"} or \code{wilcox_score} or 
@@ -21,33 +24,30 @@
 #' @param custom_function if method is \code{"custom"}, specifies
 #' the name of the customized function here. Default is \code{NULL}.
 #' 
-#' NOTE: \code{custom_function} must take \code{FS_mat} and \code{input_score} 
+#' NOTE: \code{custom_function} must take \code{FS} and \code{input_score} 
 #' as its input arguments, and its final result must return a vector of row-wise 
 #' scores ordered from most significant to least significant where its labels or 
-#' names matched the row names of \code{FS_mat} object.
+#' names matched the row names of \code{FS} object.
 #' @param custom_parameters if method is \code{"custom"}, specifies a list of
-#' additional arguments (excluding \code{FS_mat} and \code{input_score}) to be 
+#' additional arguments (excluding \code{FS} and \code{input_score}) to be 
 #' passed to \code{custom_function}. Default is \code{NULL}.
 #' @param alternative a character string specifies an alternative hypothesis
 #' testing (\code{"two.sided"} or \code{"greater"} or \code{"less"}).
 #' Default is \code{less} for left-skewed significance testing.
 #' 
 #' NOTE: This argument is applied to KS and Wilcoxon method
-#' @param weight if method is \code{ks_score} or \code{ks_pval}, specifying a 
+#' @param weights if method is \code{ks_score} or \code{ks_pval}, specifying a 
 #' vector of weights will perform a weighted-KS testing. Default is \code{NULL}.
-#' @param seed_names a vector of one or more features representing known 
-#' “causes” of activation or features associated with a response of interest.
-#' It is applied for \code{method = "revealer"} only.
 #' @param do_check a logical value indicates whether or not to validate if the  
-#' given parameters (\code{FS_mat} and \code{input_score}) are valid inputs. 
+#' given parameters (\code{FS} and \code{input_score}) are valid inputs. 
 #' Default is \code{TRUE}.
 #' @param verbose a logical value indicates whether or not to print the
 #' diagnostic messages. Default is \code{FALSE}.
 #' @param ... additional parameters to be passed to \code{custom_function}
 #' 
-#' @return return a vector of row-wise scores where it is ordered from most
-#' significant to least significant (e.g. from highest to lowest values) 
-#' where its labels or names must match the row names of \code{FS_mat} object
+#' @return return a vector of row-wise positive scores where it is ordered from 
+#' most significant to least significant (e.g. from highest to lowest values) 
+#' and its labels or names must match the row names of \code{FS} object
 #' 
 #' @examples
 #' 
@@ -68,15 +68,17 @@
 #' ks_rowscore_result <- calc_rowscore(
 #'   FS = mat,
 #'   input_score = input_score,
+#'   seed_names = NULL,
 #'   method = "ks_pval",
-#'   weight = NULL,
+#'   weights = NULL,
 #'   alternative = "less"
 #' )
 #'
 #' # Run the wilcoxon method
 #' wilcox_rowscore_result <- calc_rowscore(
 #'   FS = mat,
 #'   input_score = input_score,
+#'   seed_names = NULL,
 #'   method = "wilcox_pval",
 #'   alternative = "less"
 #' )
@@ -90,7 +92,7 @@
 #' )
 #' 
 #' # A customized function using ks-test function
-#' customized_rowscore <- function(FS, input_score, alternative="less"){
+#' customized_rowscore <- function(FS, input_score, seed_names = NULL, alternative="less"){
 #'   
 #'   ks <- apply(FS, 1, function(r){ 
 #'     x = input_score[which(r==1)]; 
@@ -119,6 +121,7 @@
 #' custom_rowscore_result <- calc_rowscore(
 #'   FS = mat,
 #'   input_score = input_score,
+#'   seed_names = NULL,
 #'   method = "custom",
 #'   custom_function = customized_rowscore,            
 #'   custom_parameters = NULL  
@@ -129,13 +132,13 @@
 calc_rowscore <- function(
     FS,
     input_score,
+    seed_names = NULL,
     method = c("ks_pval", "ks_score", "wilcox_pval", "wilcox_score", 
                "revealer", "custom"),
     custom_function = NULL,
     custom_parameters = NULL,   
     alternative = c("less", "greater", "two.sided"),
-    weight = NULL,
-    seed_names = NULL,
+    weights = NULL,
     do_check = TRUE,
     verbose = FALSE,
     ...
@@ -162,8 +165,9 @@ calc_rowscore <- function(
 
   # Check if FS and input_score are valid inputs
   if(do_check == TRUE) 
-    check_data_input(FS_mat = FS_mat, input_score = input_score, do_check = do_check)
-
+    check_data_input(FS_mat = FS_mat, input_score = input_score, 
+                     seed_names = seed_names, do_check = do_check)
+
   # Define metric value based on a given scoring method
   if(length(grep("score", method)) > 0){
     metric <- "stat"
@@ -175,25 +179,21 @@ calc_rowscore <- function(
   # based on a given method string
   method <- gsub("_score|_pval", "", method)
 
-  # Create a list of known arguments (excluding FS_mat and input_score)
-  # that can be passed to custom_function()
-  known_parameters <- list(method = method, alternative = alternative, 
-                           weight = weight, seed_names = seed_names, 
-                           do_check = do_check, verbose = verbose, ...)
-
   # Select the appropriate method to compute row-wise directional scores
   rscores <- switch(
     method,
     ks = ks_rowscore(
       FS = FS_mat,
       input_score = input_score,
-      weight = weight,
+      seed_names = seed_names,
+      weights = weights,
       alternative = alternative,
       metric = metric
     ),
     wilcox = wilcox_rowscore(
       FS = FS_mat,
       input_score = input_score,
+      seed_names = seed_names,
       alternative = alternative,
       metric = metric
     ),
@@ -206,9 +206,14 @@ calc_rowscore <- function(
     custom = custom_rowscore(
       FS = FS,
       input_score = input_score,
+      seed_names = seed_names,
       custom_function = custom_function,
       custom_parameters = custom_parameters,
-      known_parameters = known_parameters
+      method = method, 
+      alternative = alternative, 
+      weights = weights, 
+      do_check = do_check, 
+      verbose = verbose
     )
   )