update alternative to method_alternative and include an alternative t…

…ype for calculating a permutation-based p-value
montilab · Nov 7, 2023 · 8c30c5f · 8c30c5f
1 parent a026f5e
commit 8c30c5f
Show file tree

Hide file tree

Showing 25 changed files with 421 additions and 373 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: CaDrA
 Type: Package
 Title: Candidate Driver Analysis
-Version: 1.1.0
+Version: 1.1.1
 Date: 2022-11-20
 Authors@R: 
     c(person(given="Reina", family="Chau", role=c("aut","cre"), 

diff --git a/R/cadra.R b/R/cadra.R
@@ -17,79 +17,96 @@
 #' used in the search. There are 6 options: (\code{"ks_pval"} or \code{ks_score}
 #' or \code{"wilcox_pval"} or \code{wilcox_score} or 
 #' \code{"revealer"} (conditional mutual information from REVEALER) or
-#' \code{"custom"} (a customized scoring method)). 
+#' \code{"custom"} (a user-defined scoring method)). 
 #' Default is \code{ks_pval}.
-#' @param custom_function if method is \code{"custom"}, specifies
-#' the name of the customized function here. Default is \code{NULL}.
+#' @param method_alternative a character string specifies an alternative 
+#' hypothesis testing (\code{"two.sided"} or \code{"greater"} or \code{"less"}).
+#' Default is \code{less} for left-skewed significance testing.
+#' 
+#' NOTE: This argument only applies to \code{ks_pval} and \code{wilcox_pval} 
+#' method
+#' @param custom_function If method is \code{"custom"}, specifies
+#' a user-defined function here. Default is \code{NULL}.
 #' 
 #' NOTE: \code{custom_function} must take \code{FS} and \code{input_score} 
-#' as its input arguments, and its final result must return a vector of row-wise 
-#' scores ordered from most significant to least significant where its labels or 
-#' names matched the row names of \code{FS} object.
-#' @param custom_parameters if method is \code{"custom"}, specifies a list of
+#' as its input arguments and its final result must return a vector of row-wise 
+#' scores where its labels or names match the row names of \code{FS} object.
+#' @param custom_parameters If method is \code{"custom"}, specifies a list of
 #' additional arguments (excluding \code{FS} and \code{input_score}) to be 
-#' passed to \code{custom_function}. Default is \code{NULL}.
-#' @param alternative a character string specifies an alternative hypothesis
-#' testing (\code{"two.sided"} or \code{"greater"} or \code{"less"}).
-#' Default is \code{less} for left-skewed significance testing.
-#' NOTE: this argument only apply to KS and Wilcoxon method
-#' @param weights if method is \code{ks_score}, specifies a vector of weights 
-#' to perform a weighted-KS testing. Default is \code{NULL}.
+#' passed to \code{custom_function}. For example:
+#' custom_parameters = list(alternative = "less"). Default is \code{NULL}.
+#' @param weights If method is \code{ks_score} or \code{ks_pval}, specifying a 
+#' vector of weights will perform a weighted-KS testing. Default is \code{NULL}.
+#' 
+#' NOTE: \code{weights} must have names or labels that match the labels of 
+#' \code{input_score}.
+#' @param search_start a vector of character strings (separated by commas)
+#' specifies feature names in the FS object to start the search with. 
+#' If \code{search_start} is provided, then \code{top_N} parameter will be 
+#' ignored and vice versa. Default is \code{NULL}.
 #' @param top_N an integer specifies the number of features to start the
-#' search over. By default, it starts from the top best feature (top_N = 1).
+#' search over. By default, it starts with the feature that has the highest 
+#' best score (top_N = 1).
+#' 
 #' NOTE: If \code{top_N} is provided, then \code{search_start} parameter
-#' will be ignored.
-#' @param search_start a list of character strings (separated by commas)
-#' which specifies feature names within the FS object to start
-#' the search with. If \code{search_start} is provided, then \code{top_N}
-#' parameter will be ignored. Default is \code{NULL}.
+#' will be ignored and vice versa. If top_N > 10, it may result in a longer 
+#' search time.
 #' @param search_method a character string specifies an algorithm to filter out
 #' the best candidates (\code{"forward"} or \code{"both"}). Default is
 #' \code{both} (i.e., backward and forward).
 #' @param max_size an integer specifies a maximum size that a meta-feature can
 #' extend to do for a given search. Default is \code{7}.
 #' @param n_perm an integer specifies the number of permutations to perform.
 #' Default is \code{1000}.
-#' @param smooth a logical value indicates whether or not to smooth the p-value
-#' calculation to avoid p-value of 0. Default is \code{TRUE}.
+#' @param perm_alternative an alternative hypothesis type for calculating 
+#' permutation-based p-value. Options: one.sided, two.sided. Default is 
+#' \code{one.sided}.
 #' @param obs_best_score a numeric value corresponding to the best observed
-#' score. This value is used to compare against the permuted best scores.
-#' Default is \code{NULL}. If set to NULL, we will compute the observed
+#' score. This value is used to compare against the \code{n_perm} calculated best 
+#' scores. Default is \code{NULL}. If set to NULL, we will compute the observed
 #' best score based on the given parameters.
-#' @param smooth a logical value indicates whether or not to smooth the p-value
-#' calculation to avoid p-value of 0. Default is \code{TRUE}.
+#' @param smooth a logical value indicates whether or not to add a smoothing 
+#' factor of 1 to the calculation of permutation-based p-value. This option is 
+#' used to avoid a returned p-value of 0. Default is \code{TRUE}.
 #' @param plot a logical value indicates whether or not to plot the empirical
-#' null distribution of the permuted best scores. Default is \code{TRUE}.
+#' null distribution of the permuted best scores. Default is \code{FALSE}.
 #' @param ncores an integer specifies the number of cores to perform
 #' parallelization for permutation-based testing. Default is \code{1}.
-#' @param cache a logical value to determine whether or not to cache the 
-#' permuted best scores. This would help save time for future loading instead 
+#' @param cache a logical value determines whether or not to cache the 
+#' permuted best scores. This helps to save time for future loading instead 
 #' of re-computing the permutation-based testing every time. 
 #' Default is \code{FALSE}.
-#' @param cache_path If cache = TRUE, a full path can be used to cache the 
-#' permuted best scores. Default is \code{NULL}. If NULL, the cache path is
-#' set to system home directory (e.g. \code{$HOME/.Rcache}) for future loading.
+#' @param cache_path a path to cache permuted best scores. Default is \code{NULL}. 
+#' If NULL, the cache path is set to system home directory 
+#' (e.g. \code{$HOME/.Rcache}) for future loading.
 #' @param verbose a logical value indicates whether or not to print the
 #' diagnostic messages. Default is \code{FALSE}.
 #'
 #' @return a list of 4 objects: \code{key}, \code{perm_best_scores}, 
 #' \code{obs_best_score}, \code{perm_pval}
 #' 
-#' -\code{key}: a list of parameters that are used to cache the 
+#' -\code{key}: a list of parameters that was used to cache the 
 #' results of the permutation-based testing. This is useful as the
 #' permuted best scores can be recycled to save time for future loading.
 #' 
 #' -\code{perm_best_scores}: a vector of permuted best scores obtained 
 #' by performing \code{candidate_search} over \code{n_perm} iterations of 
 #' permuted input scores.
 #' 
-#' -\code{obs_best_score}: the observed best score obtained by performing
-#' \code{candidate_search} on a given dataset and input parameters. This 
-#' value is later used to compare against the permuted best scores 
-#' (\code{perm_best_scores}).
+#' -\code{obs_best_score}: a user-provided best score or an observed best score 
+#' obtained by performing \code{candidate_search} on a given dataset and input 
+#' parameters. This value is later used to compare against the permuted best 
+#' scores (\code{perm_best_scores}).
 #' 
 #' \code{perm_pval}: a permutation-based p-value obtained by calculating
-#' sum(perm_best_scores > obs_best_score)/n_perm
+#' sum(perm_best_scores > obs_best_score)/n_perm 
+#' 
+#' NOTE: If smooth = TRUE, a smoothing factor of 1 will be added to the 
+#' calculation of \code{perm_pval}. 
+#' 
+#' e.g. (sum(perm_best_scores > obs_best_score) + 1) / (n_perm + c)
+#' 
+#' This is just to not return a p-value of 0 
 #'
 #' @examples
 #' 
@@ -105,9 +122,10 @@
 #' # Define additional parameters and start the function
 #' cadra_result <- CaDrA(
 #'   FS = sim_FS, input_score = sim_Scores, method = "ks_pval", 
-#'   weights = NULL, alternative = "less", top_N = 1,
+#'   weights = NULL, method_alternative = "less", top_N = 1,
 #'   search_start = NULL, search_method = "both", max_size = 7, 
-#'   n_perm = 10, plot = FALSE, smooth = TRUE, obs_best_score = NULL,
+#'   n_perm = 10, perm_alternative = "one.sided", plot = FALSE, 
+#'   smooth = TRUE, obs_best_score = NULL,
 #'   ncores = 1, cache = FALSE, cache_path = NULL
 #' )
 #' 
@@ -119,18 +137,19 @@ CaDrA <- function(
     input_score,
     method = c("ks_pval", "ks_score", "wilcox_pval", "wilcox_score", 
                "revealer", "custom"),
+    method_alternative = c("less", "greater", "two.sided"),
     custom_function = NULL,
     custom_parameters = NULL,
-    alternative = c("less", "greater", "two.sided"),
     weights = NULL,
-    top_N = 1,
     search_start = NULL,
+    top_N = 1,
     search_method = c("both", "forward"),
     max_size = 7,
     n_perm = 1000,
+    perm_alternative = c("one.sided", "two.sided"),
     obs_best_score = NULL,
     smooth = TRUE,
-    plot = TRUE,
+    plot = FALSE,
     ncores = 1,
     cache = FALSE,
     cache_path = NULL,
@@ -142,8 +161,9 @@ CaDrA <- function(
 
   # Match arguments
   method <- match.arg(method)
-  alternative <- match.arg(alternative)
+  method_alternative <- match.arg(method_alternative)
   search_method <- match.arg(search_method)
+  perm_alternative <- match.arg(perm_alternative)
 
   # Check n_perm
   stopifnot("invalid number of permutations (nperm)"=
@@ -166,9 +186,9 @@ CaDrA <- function(
               input_score = if(method %in% c("revealer", "custom"))
               { input_score } else { NULL },
               method = method,
+              method_alternative = method_alternative,
               custom_function = custom_function,
               custom_parameters = custom_parameters,
-              alternative = alternative,
               weights = weights,
               top_N = top_N,
               search_start = search_start,
@@ -272,7 +292,7 @@ CaDrA <- function(
           method = method,
           custom_function = custom_function,
           custom_parameters = custom_parameters,
-          alternative = alternative,
+          method_alternative = method_alternative,
           weights = weights,
           top_N = top_N,
           search_start = search_start,
@@ -321,7 +341,7 @@ CaDrA <- function(
       method = method,
       custom_function = custom_function,
       custom_parameters = custom_parameters,
-      alternative = alternative,
+      method_alternative = method_alternative,
       weights = weights,
       top_N = top_N,
       search_start = search_start,
@@ -357,9 +377,15 @@ CaDrA <- function(
   c <- 0
   if(smooth) c <- 1
 
-  perm_pval <- (sum(perm_best_scores > obs_best_score) + c)/
+  onesided_perm_pval <- (sum(perm_best_scores > obs_best_score) + c)/
     (length(perm_best_scores) + c)
 
+  if(perm_alternative == "two.sided"){
+    perm_pval <- 2*min(onesided_perm_pval, 1-onesided_perm_pval)
+  }else{
+    perm_pval <- onesided_perm_pval
+  }
+
   verbose("Permutation p-value: ", perm_pval, "\n")
   verbose("Number of permutations: ", length(perm_best_scores), "\n")
 

diff --git a/R/calc_rowscore.R b/R/calc_rowscore.R
@@ -13,31 +13,35 @@
 #' NOTE: \code{input_score} object must have names or labels that match the 
 #' column names of \code{FS} object.
 #' @param meta_feature a vector of one or more features representing known 
-#' causes of activation or features associated with a response of interest, 
-#' \code{e.g. input_score}. Default is NULL.
+#' causes of activation or features associated with a response of interest 
+#' (\code{e.g. input_score}). Default is NULL.
 #' @param method a character string specifies a scoring method that is
 #' used in the search. There are 6 options: (\code{"ks_pval"} or \code{ks_score}
 #' or \code{"wilcox_pval"} or \code{wilcox_score} or 
 #' \code{"revealer"} (conditional mutual information from REVEALER) or
-#' \code{"custom"} (a customized scoring method)). 
+#' \code{"custom"} (a user-defined scoring method)). 
 #' Default is \code{ks_pval}.
+#' @param method_alternative a character string specifies an alternative 
+#' hypothesis testing (\code{"two.sided"} or \code{"greater"} or \code{"less"}).
+#' Default is \code{less} for left-skewed significance testing.
+#' 
+#' NOTE: This argument only applies to \code{ks_pval} and \code{wilcox_pval} 
+#' method
 #' @param custom_function if method is \code{"custom"}, specifies
-#' the name of the customized function here. Default is \code{NULL}.
+#' a user-defined function here. Default is \code{NULL}.
 #' 
 #' NOTE: \code{custom_function} must take \code{FS} and \code{input_score} 
 #' as its input arguments, and its final result must return a vector of row-wise 
-#' scores ordered from most significant to least significant where its labels or 
-#' names matched the row names of \code{FS} object.
+#' scores where its labels or names matched the row names of \code{FS} object.
 #' @param custom_parameters if method is \code{"custom"}, specifies a list of
 #' additional arguments (excluding \code{FS} and \code{input_score}) to be 
-#' passed to \code{custom_function}. Default is \code{NULL}.
-#' @param alternative a character string specifies an alternative hypothesis
-#' testing (\code{"two.sided"} or \code{"greater"} or \code{"less"}).
-#' Default is \code{less} for left-skewed significance testing.
-#' 
-#' NOTE: This argument is applied to KS and Wilcoxon method
-#' @param weights if method is \code{ks_score} or \code{ks_pval}, specifying a 
+#' passed to \code{custom_function}. For example:
+#' custom_parameters = list(alternative = "less"). Default is \code{NULL}.
+#' @param weights If method is \code{ks_score} or \code{ks_pval}, specifying a 
 #' vector of weights will perform a weighted-KS testing. Default is \code{NULL}.
+#' 
+#' NOTE: \code{weights} must have names or labels that match the names or labels
+#' of \code{input_score}.
 #' @param do_check a logical value indicates whether or not to validate if the  
 #' given parameters (\code{FS} and \code{input_score}) are valid inputs. 
 #' Default is \code{TRUE}.
@@ -70,8 +74,8 @@
 #'   input_score = input_score,
 #'   meta_feature = NULL,
 #'   method = "ks_pval",
-#'   weights = NULL,
-#'   alternative = "less"
+#'   method_alternative = "less",
+#'   weights = NULL
 #' )
 #'
 #' # Run the wilcoxon method
@@ -80,15 +84,15 @@
 #'   input_score = input_score,
 #'   meta_feature = NULL,
 #'   method = "wilcox_pval",
-#'   alternative = "less"
+#'   method_alternative = "less"
 #' )
 #'
 #' # Run the revealer method
 #' revealer_rowscore_result <- calc_rowscore(
 #'   FS = mat,
 #'   input_score = input_score,
-#'   method = "revealer",
-#'   meta_feature = NULL
+#'   meta_feature = NULL,
+#'   method = "revealer"
 #' )
 #' 
 #' # A customized function using ks-test function
@@ -171,9 +175,9 @@ calc_rowscore <- function(
     meta_feature = NULL,
     method = c("ks_pval", "ks_score", "wilcox_pval", "wilcox_score", 
                "revealer", "custom"),
+    method_alternative = c("less", "greater", "two.sided"),
     custom_function = NULL,
     custom_parameters = NULL,   
-    alternative = c("less", "greater", "two.sided"),
     weights = NULL,
     do_check = TRUE,
     verbose = FALSE,
@@ -185,7 +189,7 @@ calc_rowscore <- function(
 
   # Match arguments
   method <- match.arg(method)
-  alternative <- match.arg(alternative)
+  method_alternative <- match.arg(method_alternative)
 
   # Check if FS is a matrix or a SummarizedExperiment class object
   if(!is(FS, "SummarizedExperiment") && !is(FS, "matrix"))
@@ -223,14 +227,14 @@ calc_rowscore <- function(
       input_score = input_score,
       meta_feature = meta_feature,
       weights = weights,
-      alternative = alternative,
+      alternative = method_alternative,
       metric = metric
     ),
     wilcox = wilcox_rowscore(
       FS = FS_mat,
       input_score = input_score,
       meta_feature = meta_feature,
-      alternative = alternative,
+      alternative = method_alternative,
       metric = metric
     ),
     revealer = revealer_rowscore(
@@ -243,10 +247,10 @@ calc_rowscore <- function(
       FS = FS,
       input_score = input_score,
       meta_feature = meta_feature,
-      custom_function = custom_function,
-      custom_parameters = custom_parameters,
       method = method, 
-      alternative = alternative, 
+      alternative = method_alternative, 
+      custom_function = custom_function,
+      custom_parameters = custom_parameters,      
       weights = weights, 
       do_check = do_check, 
       verbose = verbose,