update documentation to account for changes in candidate_search

montilab · Jul 14, 2023 · fda607f · fda607f
1 parent 06c6b66
commit fda607f
Show file tree

Hide file tree

Showing 9 changed files with 133 additions and 76 deletions.
diff --git a/R/calc_rowscore.R b/R/calc_rowscore.R
@@ -13,8 +13,8 @@
 #' NOTE: \code{input_score} object must have names or labels that match the 
 #' column names of \code{FS} object.
 #' @param seed_names a vector of one or more features representing known 
-#' “causes” of activation or features associated with a response of interest.
-#' It is applied for \code{method = "revealer"} only.
+#' causes of activation or features associated with a response of interest, 
+#' \code{e.g. input_score}. Default is NULL.
 #' @param method a character string specifies a scoring method that is
 #' used in the search. There are 6 options: (\code{"ks_pval"} or \code{ks_score}
 #' or \code{"wilcox_pval"} or \code{wilcox_score} or 
@@ -213,7 +213,8 @@ calc_rowscore <- function(
       alternative = alternative, 
       weights = weights, 
       do_check = do_check, 
-      verbose = verbose
+      verbose = verbose,
+      ...
     )
   )
 

diff --git a/R/candidate_search.R b/R/candidate_search.R
@@ -266,7 +266,7 @@ candidate_search <- function(
         best_score_only = best_score_only,
         do_plot = do_plot,
         do_check = FALSE,  # MAKE SURE DO_CHECK IS SILENCE HERE
-        verbose = FALSE,   # MAKE SURE VERBOSE IS SILENCE HERE  
+        verbose = FALSE    # MAKE SURE VERBOSE IS SILENCE HERE  
       )
 
       # The scores are already ordered from highest to lowest significant
@@ -449,7 +449,7 @@ forward_backward_check <- function
       custom_function = custom_function,
       custom_parameters = custom_parameters,     
       alternative = alternative,
-      weights = weights,
+      weights = weights, 
       ...
     )
 

diff --git a/R/custom_rowscore.R b/R/custom_rowscore.R
@@ -13,9 +13,9 @@
 #' readout of interest such as protein expression, pathway activity, etc.
 #' The \code{input_score} object must have names or labels that match the column
 #' names of \code{FS} object.
-#' @param seed_names a vector of one or more features representing known causes
-#' of activation or features associated with a response of interest.
-#' Default is NULL.
+#' @param seed_names a vector of one or more features representing known 
+#' causes of activation or features associated with a response of interest, 
+#' \code{e.g. input_score}. Default is NULL.
 #' @param custom_function a customized function which computes a row-wise 
 #' score for each row of a given binary feature set (FS).
 #' 
@@ -26,32 +26,64 @@
 #' @param custom_parameters a list of additional arguments to be passed to  
 #' \code{custom_function()} (excluding \code{FS} and \code{input_score}).
 #' Default is NULL.
-#' @param known_parameters a list of known parameters that existed in the 
-#' previous function that can be passed to \code{custom_function()} if and only 
-#' if they were not provided by \code{custom_parameters}. Default is NULL.
+#' @param ... additional parameters to be passed to \code{custom_function}
 #' 
 #' @noRd
 #' 
 #' @examples 
 #' 
 #' # A customized function using ks-test function
-#' customized_rowscore <- function(FS, input_score, alternative="less"){
+#' customized_rowscore <- function(FS, input_score, seed_names=NULL, alternative="less"){
+#'   
+#'   # Check if seed_names is provided
+#'   if(!is.null(seed_names)){
+#'     # Taking the union across the known seed features
+#'     if(length(seed_names) > 1) {
+#'       seed_vector <- as.numeric(ifelse(colSums(FS[seed_names,]) == 0, 0, 1))
+#'     }else{
+#'       seed_vector <- as.numeric(FS[seed_names,])
+#'     }
+#'      
+#'     # Remove the seeds from the binary feature matrix
+#'     # and taking logical OR btw the remaining features with the seed vector
+#'     locs <- match(seed_names, row.names(FS))
+#'     FS <- base::sweep(FS[-locs,], 2, seed_vector, `|`)*1
+#'      
+#'     # Check if there are any features that are all 1s generated from
+#'     # taking the union between the matrix
+#'     # We cannot compute statistics for such features and thus they need
+#'     # to be filtered out
+#'     if(any(rowSums(FS) == ncol(FS))){
+#'       warning("Features with all 1s generated from taking the matrix union ",
+#'               "will be removed before progressing...\n")
+#'       FS <- FS[rowSums(FS) != ncol(FS),]
+#'     }
+#'   }
+#'    
+#'   # KS is a ranked-based method
+#'   # So we need to sort input_score from highest to lowest values
+#'   input_score <- sort(input_score, decreasing=TRUE)
+#'    
+#'   # Re-order the matrix based on the order of input_score
+#'   FS <- FS[, names(input_score), drop=FALSE]  
 #'   
+#'   # Compute the scores using the KS method
 #'   ks <- apply(FS, 1, function(r){ 
 #'     x = input_score[which(r==1)]; 
 #'     y = input_score[which(r==0)];
 #'     res <- ks.test(x, y, alternative=alternative)
 #'     return(c(res$statistic, res$p.value))
 #'   })
 #'   
-#'   # Obtain score statistics and p-values from KS method
+#'   # Obtain score statistics
 #'   stat <- ks[1,]
 #'   
-#'   # Change values of 0 to the machine lowest value to avoid taking -log(0)
+#'   # Obtain p-values and change values of 0 to the machine lowest value 
+#'   # to avoid taking -log(0)
 #'   pval <- ks[2,]
 #'   pval[which(pval == 0)] <- .Machine$double.xmin
 #'   
-#'   # Compute the -log scores for pval
+#'   # Compute the -log(pval)
 #'   # Make sure scores has names that match the row names of FS object
 #'   scores <- -log(pval)
 #'   names(scores) <- rownames(FS)
@@ -75,6 +107,7 @@
 #' custom_rs <- custom_rowscore(
 #'   FS = mat,
 #'   input_score = input_score,
+#'   seed_names = NULL,
 #'   custom_function = customized_rowscore,            
 #'   custom_parameters = NULL  
 #' )
@@ -126,22 +159,22 @@ custom_rowscore <- function
          "as one of its arguments (required).")
 
   ## Create a list with only the required variables 
-  req_args <- list(FS=FS, input_score=input_score, seed_names=seed_names)
+  req_parameters <- list(FS=FS, input_score=input_score, seed_names=seed_names)
 
   # Obtain additional parameters
-  known_parameters <- list(...)
+  additional_parameters <- list(...)
 
   # Combine custom_parameters, required variables, and a list of 
   # known parameters together 
   # However, we must exclude FS, input_score from custom_parameters 
   # excluding FS, input_score, and custom_parameters from known parameters 
   # as they would be redundant
   combined_parameters <- c(
-    req_args, 
+    req_parameters, 
     custom_parameters[
-      which(!names(custom_parameters) %in% names(req_args))],
-    known_parameters[
-      which(!names(known_parameters) %in% c(names(req_args), 
+      which(!names(custom_parameters) %in% names(req_parameters))],
+    additional_parameters[
+      which(!names(additional_parameters) %in% c(names(req_parameters), 
                                             names(custom_parameters)))]
   )
 

diff --git a/R/ks_rowscore.R b/R/ks_rowscore.R
@@ -11,8 +11,8 @@
 #' The \code{input_score} object must have names or labels that match the column
 #' names of FS object.
 #' @param seed_names a vector of one or more features representing known causes
-#' of activation or features associated with a response of interest.
-#' Default is NULL.
+#' of activation or features associated with a response of interest, 
+#' \code{e.g. input_score}. Default is NULL.
 #' @param weights a vector of weights to perform a \code{weighted-KS} test.
 #' Default is \code{NULL}. If not NULL, \code{weights} must have labels or names
 #' that match labels of \code{input_score}.

diff --git a/R/revealer_rowscore.R b/R/revealer_rowscore.R
@@ -10,9 +10,9 @@
 #' readout of interest such as protein expression, pathway activity, etc.
 #' The \code{input_score} object must have names or labels that match the column
 #' names of FS object.
-#' @param seed_names a vector of one or more features representing known causes
-#' of activation or features associated with a response of interest.
-#' Default is NULL.
+#' @param seed_names a vector of one or more features representing known 
+#' causes of activation or features associated with a response of interest, 
+#' \code{e.g. input_score}. Default is NULL.
 #' @param assoc_metric an association metric: \code{"IC"} for information
 #' coefficient or \code{"COR"} for correlation. Default is \code{IC}.
 #' 

diff --git a/R/wilcox_rowscore.R b/R/wilcox_rowscore.R
@@ -11,9 +11,9 @@
 #' readout of interest such as protein expression, pathway activity, etc.
 #' The \code{input_score} object must have names or labels that match the column
 #' names of FS object.
-#' @param seed_names a vector of one or more features representing known causes
-#' of activation or features associated with a response of interest.
-#' Default is NULL.
+#' @param seed_names a vector of one or more features representing known 
+#' causes of activation or features associated with a response of interest, 
+#' \code{e.g. input_score}. Default is NULL.
 #' @param alternative a character string specifies an alternative
 #' hypothesis testing (\code{"two.sided"} or \code{"greater"} or
 #' \code{"less"}). Default is \code{less} for left-skewed significance testing.

diff --git a/README.Rmd b/README.Rmd
@@ -34,11 +34,25 @@ For more information, please see the associated manuscript [Kartha et al. (2019)
 
 ## (1) Installation
 
+- Using `devtools` package
+
 ```r
 library(devtools)
 devtools::install_github("montilab/CaDrA")
 ```
 
+- Using `BiocManager` package
+
+```r
+if (!require("BiocManager", quietly = TRUE))
+    install.packages("BiocManager")
+
+# The following initializes usage of Bioc devel
+BiocManager::install(version='devel')
+
+BiocManager::install("CaDrA")
+```
+
 ## (2) Quickstart
 
 ```r

diff --git a/man/calc_rowscore.Rd b/man/calc_rowscore.Rd
diff --git a/vignettes/scoring_functions.Rmd b/vignettes/scoring_functions.Rmd
@@ -134,53 +134,62 @@ See `?custom_rowscore` for more details
 
 # A customized function using ks-test function
 customized_rowscore <- function(FS, input_score, seed_names=NULL, alternative="less"){
-  
-  # Check if seed_names is provided
-  if(!is.null(seed_names)){
-    # Taking the union across the known seed features
-    if(length(seed_names) > 1) {
-      seed_vector <- as.numeric(ifelse(colSums(FS[seed_names,]) == 0, 0, 1))
-    }else{
-      seed_vector <- as.numeric(FS[seed_names,])
-    }
+   
+   # Check if seed_names is provided
+   if(!is.null(seed_names)){
+     # Taking the union across the known seed features
+     if(length(seed_names) > 1) {
+       seed_vector <- as.numeric(ifelse(colSums(FS[seed_names,]) == 0, 0, 1))
+     }else{
+       seed_vector <- as.numeric(FS[seed_names,])
+     }
+      
+     # Remove the seeds from the binary feature matrix
+     # and taking logical OR btw the remaining features with the seed vector
+     locs <- match(seed_names, row.names(FS))
+     FS <- base::sweep(FS[-locs,], 2, seed_vector, `|`)*1
+      
+     # Check if there are any features that are all 1s generated from
+     # taking the union between the matrix
+     # We cannot compute statistics for such features and thus they need
+     # to be filtered out
+     if(any(rowSums(FS) == ncol(FS))){
+       warning("Features with all 1s generated from taking the matrix union ",
+               "will be removed before progressing...\n")
+       FS <- FS[rowSums(FS) != ncol(FS),]
+     }
+   }
     
-    # Remove the seeds from the binary feature matrix
-    # and taking logical OR btw the remaining features with the seed vector
-    locs <- match(seed_names, row.names(FS))
-    FS <- base::sweep(FS[-locs,], 2, seed_vector, `|`)*1
+   # KS is a ranked-based method
+   # So we need to sort input_score from highest to lowest values
+   input_score <- sort(input_score, decreasing=TRUE)
     
-    # Check if there are any features that are all 1s generated from
-    # taking the union between the matrix
-    # We cannot compute statistics for such features and thus they need
-    # to be filtered out
-    if(any(rowSums(FS) == ncol(FS))){
-      warning("Features with all 1s generated from taking the matrix union ",
-              "will be removed before progressing...\n")
-      FS <- FS[rowSums(FS) != ncol(FS),]
-    }
-  }
-  
-  ks <- apply(FS, 1, function(r){ 
-    x = input_score[which(r==1)]; 
-    y = input_score[which(r==0)];
-    res <- ks.test(x, y, alternative=alternative)
-    return(c(res$statistic, res$p.value))
-  })
-  
-  # Obtain score statistics and p-values from KS method
-  stat <- ks[1,]
-  
-  # Change values of 0 to the machine lowest value to avoid taking -log(0)
-  pval <- ks[2,]
-  pval[which(pval == 0)] <- .Machine$double.xmin
-  
-  # Compute the -log scores for pval
-  # Make sure scores has names that match the row names of FS object
-  scores <- -log(pval)
-  names(scores) <- rownames(FS)
-  
-  return(scores)
-  
+   # Re-order the matrix based on the order of input_score
+   FS <- FS[, names(input_score), drop=FALSE]  
+   
+   # Compute the scores using the KS method
+   ks <- apply(FS, 1, function(r){ 
+     x = input_score[which(r==1)]; 
+     y = input_score[which(r==0)];
+     res <- ks.test(x, y, alternative=alternative)
+     return(c(res$statistic, res$p.value))
+   })
+   
+   # Obtain score statistics
+   stat <- ks[1,]
+   
+   # Obtain p-values and change values of 0 to the machine lowest value 
+   # to avoid taking -log(0)
+   pval <- ks[2,]
+   pval[which(pval == 0)] <- .Machine$double.xmin
+   
+   # Compute the -log(pval)
+   # Make sure scores has names that match the row names of FS object
+   scores <- -log(pval)
+   names(scores) <- rownames(FS)
+   
+   return(scores)
+   
 }
 
 # Search for best features using a custom-defined function