Skip to content

Commit

Permalink
fix candidate_search to account for revealer method
Browse files Browse the repository at this point in the history
  • Loading branch information
RC-88 committed Jul 14, 2023
1 parent 3e83492 commit e8fa2d8
Show file tree
Hide file tree
Showing 30 changed files with 543 additions and 451 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: CaDrA
Type: Package
Title: Candidate Driver Analysis
Version: 0.99.6
Version: 0.99.7
Date: 2022-11-20
Authors@R:
c(person(given="Reina", family="Chau", role=c("aut","cre"),
Expand Down
20 changes: 10 additions & 10 deletions R/cadra.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
#' testing (\code{"two.sided"} or \code{"greater"} or \code{"less"}).
#' Default is \code{less} for left-skewed significance testing.
#' NOTE: this argument only apply to KS and Wilcoxon method
#' @param weight if method is \code{ks_score}, specifies a vector of weights
#' @param weights if method is \code{ks_score}, specifies a vector of weights
#' to perform a weighted-KS testing. Default is \code{NULL}.
#' @param top_N an integer specifies the number of features to start the
#' search over. By default, it starts from the top best feature (top_N = 1).
Expand Down Expand Up @@ -72,12 +72,12 @@
#' @return a list of 4 objects: \code{key}, \code{perm_best_scores},
#' \code{obs_best_score}, \code{perm_pval}
#' \code{key}: a list of parameters that are used to cache the
#' result of the permutation-based testing. This is useful as the
#' results of the permutation-based testing. This is useful as the
#' permuted scores are recycled to save time for future loading.
#' \code{perm_best_scores}: a vector of permuted best scores obtained
#' by performing \code{candidate_search} over (\code{n_perm}) iterations of
#' permuted input scores.
#' \code{obs_best_score}: the observed best score is calculated by performing
#' by performing \code{candidate_search} over \code{n_perm} iterations of
#' the permuted input scores.
#' \code{obs_best_score}: the observed best score obtained by performing
#' \code{candidate_search} on the given dataset and parameters. This value is
#' later used to compare against the permuted best scores
#' (\code{perm_best_scores}).
Expand All @@ -98,7 +98,7 @@
#' # Define additional parameters and start the function
#' cadra_result <- CaDrA(
#' FS = sim_FS, input_score = sim_Scores, method = "ks_pval",
#' weight = NULL, alternative = "less", top_N = 1,
#' weights = NULL, alternative = "less", top_N = 1,
#' search_start = NULL, search_method = "both", max_size = 7,
#' n_perm = 10, plot = FALSE, smooth = TRUE, obs_best_score = NULL,
#' ncores = 1, cache_path = NULL
Expand All @@ -115,7 +115,7 @@ CaDrA <- function(
custom_function = NULL,
custom_parameters = NULL,
alternative = c("less", "greater", "two.sided"),
weight = NULL,
weights = NULL,
top_N = 1,
search_start = NULL,
search_method = c("both", "forward"),
Expand Down Expand Up @@ -170,7 +170,7 @@ CaDrA <- function(
custom_function = custom_function,
custom_parameters = custom_parameters,
alternative = alternative,
weight = weight,
weights = weights,
top_N = top_N,
search_start = search_start,
search_method = search_method,
Expand Down Expand Up @@ -254,7 +254,7 @@ CaDrA <- function(
custom_function = custom_function,
custom_parameters = custom_parameters,
alternative = alternative,
weight = weight,
weights = weights,
top_N = top_N,
search_start = search_start,
search_method = search_method,
Expand Down Expand Up @@ -301,7 +301,7 @@ CaDrA <- function(
custom_function = custom_function,
custom_parameters = custom_parameters,
alternative = alternative,
weight = weight,
weights = weights,
top_N = top_N,
search_start = search_start,
search_method = search_method,
Expand Down
41 changes: 26 additions & 15 deletions R/cadra_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@ prefilter_data <- function(
#' interest such as protein expression, pathway activity, etc.
#' NOTE: The \code{input_score} object must have names or labels that
#' match the column names of FS_mat object.
#' @param seed_names a vector of one or more features representing known causes
#' of activation or features associated with a response of interest.
#' Default is NULL.
#' @param do_check a logical value indicates whether or not to validate if the
#' given parameters (FS_mat and input_score) are valid inputs.
#' Default is \code{TRUE}
Expand All @@ -112,16 +115,19 @@ prefilter_data <- function(
#' input_score = rnorm(n = ncol(FS_mat))
#' names(input_score) <- colnames(FS_mat)
#'
#' # Check data inputs
#' check_data_input(
#' FS_mat = FS_mat,
#' input_score = input_score
#' input_score = input_score,
#' seed_names = NULL
#' )
#'
#' @return If do_check=FALSE, return NULL, otherwise, check if FS_mat and
#' input_score are valid inputs
check_data_input <- function(
FS_mat,
input_score,
seed_names = NULL,
do_check = TRUE
){

Expand Down Expand Up @@ -159,6 +165,12 @@ check_data_input <- function(
"These features must be removed from the FS object as ",
"they are uninformative.")

if(length(seed_names) > 0 && any(!seed_names %in% rownames(FS_mat)))
stop("The provided feature(s): ",
paste0(seed_names[which(!seed_names %in% rownames(FS_mat))],
collapse=", "),
" do(es) not exist among the row names of the FS object.\n")

}


Expand Down Expand Up @@ -200,7 +212,7 @@ check_data_input <- function(
#' input_score = sim_Scores,
#' method = "ks_pval",
#' alternative = "less",
#' weight = NULL
#' weights = NULL
#' )
#'
#' top_N_index <- check_top_N(
Expand All @@ -211,10 +223,10 @@ check_data_input <- function(
#' )
#'
#' @return If top_N is given, a vector of indices of top N features with
#' the best scores will be returned and used to start the candidate_search()
#' with.
#' Otherwise, the candidate_search() will start the search with a list of
#' indices of starting features defined in search_start.
#' their corresponding best scores will be returned. These features will later
#' use to start the candidate_search() with. Otherwise, the candidate_search()
#' will start the heuristic search with a list of indices of starting features
#' defined in search_start.
check_top_N <- function(
rowscore,
feature_names,
Expand Down Expand Up @@ -251,29 +263,29 @@ check_top_N <- function(

}else{

search_start <- strsplit(as.character(search_start), ",", fixed=TRUE) |>
unlist() |>
trimws()

if(!is.na(top_N) && length(top_N) > 0)
warning("Since search_start variable is given, ",
"evaluating over top_N value will be ignored.\n")

# User-specified feature names
verbose("Starting with specified feature names...\n")
search_start <- strsplit(as.character(search_start), ",", fixed=TRUE) |>
unlist() |>
trimws()

if(length(search_start) == 0 || any(!search_start %in% feature_names))
stop("The provided starting features: ",
stop("The provided starting feature(s): ",
paste0(search_start[which(!search_start %in% feature_names)],
collapse=", "),
" does not exist among the row names of FS object.\n")
" do(es) not exist among the row names of the FS object.\n")

# Get the index of the search_start strings
search_feature_index <- lapply(seq_along(search_start), function(f){
#f=1;
which(feature_names == search_start[f])
}) |> unlist()

# User-specified feature names
verbose("Starting with specified feature names...\n")

}

return(search_feature_index)
Expand Down Expand Up @@ -366,7 +378,6 @@ generate_permutations <- function(
# Create permutation matrix
perm <- matrix(NA, nrow=n_perm, ncol=n)
colnames(perm) <- names(input_score)
rownames(perm) <- seq_along(n_perm)

# Sample the input scores
for(i in seq_len(n_perm)){
Expand Down
57 changes: 31 additions & 26 deletions R/calc_rowscore.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@
#' readout of interest such as protein expression, pathway activity, etc.
#'
#' NOTE: \code{input_score} object must have names or labels that match the
#' column names of \code{FS_mat} object.
#' column names of \code{FS} object.
#' @param seed_names a vector of one or more features representing known
#' “causes” of activation or features associated with a response of interest.
#' It is applied for \code{method = "revealer"} only.
#' @param method a character string specifies a scoring method that is
#' used in the search. There are 6 options: (\code{"ks_pval"} or \code{ks_score}
#' or \code{"wilcox_pval"} or \code{wilcox_score} or
Expand All @@ -21,33 +24,30 @@
#' @param custom_function if method is \code{"custom"}, specifies
#' the name of the customized function here. Default is \code{NULL}.
#'
#' NOTE: \code{custom_function} must take \code{FS_mat} and \code{input_score}
#' NOTE: \code{custom_function} must take \code{FS} and \code{input_score}
#' as its input arguments, and its final result must return a vector of row-wise
#' scores ordered from most significant to least significant where its labels or
#' names matched the row names of \code{FS_mat} object.
#' names matched the row names of \code{FS} object.
#' @param custom_parameters if method is \code{"custom"}, specifies a list of
#' additional arguments (excluding \code{FS_mat} and \code{input_score}) to be
#' additional arguments (excluding \code{FS} and \code{input_score}) to be
#' passed to \code{custom_function}. Default is \code{NULL}.
#' @param alternative a character string specifies an alternative hypothesis
#' testing (\code{"two.sided"} or \code{"greater"} or \code{"less"}).
#' Default is \code{less} for left-skewed significance testing.
#'
#' NOTE: This argument is applied to KS and Wilcoxon method
#' @param weight if method is \code{ks_score} or \code{ks_pval}, specifying a
#' @param weights if method is \code{ks_score} or \code{ks_pval}, specifying a
#' vector of weights will perform a weighted-KS testing. Default is \code{NULL}.
#' @param seed_names a vector of one or more features representing known
#' “causes” of activation or features associated with a response of interest.
#' It is applied for \code{method = "revealer"} only.
#' @param do_check a logical value indicates whether or not to validate if the
#' given parameters (\code{FS_mat} and \code{input_score}) are valid inputs.
#' given parameters (\code{FS} and \code{input_score}) are valid inputs.
#' Default is \code{TRUE}.
#' @param verbose a logical value indicates whether or not to print the
#' diagnostic messages. Default is \code{FALSE}.
#' @param ... additional parameters to be passed to \code{custom_function}
#'
#' @return return a vector of row-wise scores where it is ordered from most
#' significant to least significant (e.g. from highest to lowest values)
#' where its labels or names must match the row names of \code{FS_mat} object
#' @return return a vector of row-wise positive scores where it is ordered from
#' most significant to least significant (e.g. from highest to lowest values)
#' and its labels or names must match the row names of \code{FS} object
#'
#' @examples
#'
Expand All @@ -68,15 +68,17 @@
#' ks_rowscore_result <- calc_rowscore(
#' FS = mat,
#' input_score = input_score,
#' seed_names = NULL,
#' method = "ks_pval",
#' weight = NULL,
#' weights = NULL,
#' alternative = "less"
#' )
#'
#' # Run the wilcoxon method
#' wilcox_rowscore_result <- calc_rowscore(
#' FS = mat,
#' input_score = input_score,
#' seed_names = NULL,
#' method = "wilcox_pval",
#' alternative = "less"
#' )
Expand All @@ -90,7 +92,7 @@
#' )
#'
#' # A customized function using ks-test function
#' customized_rowscore <- function(FS, input_score, alternative="less"){
#' customized_rowscore <- function(FS, input_score, seed_names = NULL, alternative="less"){
#'
#' ks <- apply(FS, 1, function(r){
#' x = input_score[which(r==1)];
Expand Down Expand Up @@ -119,6 +121,7 @@
#' custom_rowscore_result <- calc_rowscore(
#' FS = mat,
#' input_score = input_score,
#' seed_names = NULL,
#' method = "custom",
#' custom_function = customized_rowscore,
#' custom_parameters = NULL
Expand All @@ -129,13 +132,13 @@
calc_rowscore <- function(
FS,
input_score,
seed_names = NULL,
method = c("ks_pval", "ks_score", "wilcox_pval", "wilcox_score",
"revealer", "custom"),
custom_function = NULL,
custom_parameters = NULL,
alternative = c("less", "greater", "two.sided"),
weight = NULL,
seed_names = NULL,
weights = NULL,
do_check = TRUE,
verbose = FALSE,
...
Expand All @@ -162,8 +165,9 @@ calc_rowscore <- function(

# Check if FS and input_score are valid inputs
if(do_check == TRUE)
check_data_input(FS_mat = FS_mat, input_score = input_score, do_check = do_check)

check_data_input(FS_mat = FS_mat, input_score = input_score,
seed_names = seed_names, do_check = do_check)

# Define metric value based on a given scoring method
if(length(grep("score", method)) > 0){
metric <- "stat"
Expand All @@ -175,25 +179,21 @@ calc_rowscore <- function(
# based on a given method string
method <- gsub("_score|_pval", "", method)

# Create a list of known arguments (excluding FS_mat and input_score)
# that can be passed to custom_function()
known_parameters <- list(method = method, alternative = alternative,
weight = weight, seed_names = seed_names,
do_check = do_check, verbose = verbose, ...)

# Select the appropriate method to compute row-wise directional scores
rscores <- switch(
method,
ks = ks_rowscore(
FS = FS_mat,
input_score = input_score,
weight = weight,
seed_names = seed_names,
weights = weights,
alternative = alternative,
metric = metric
),
wilcox = wilcox_rowscore(
FS = FS_mat,
input_score = input_score,
seed_names = seed_names,
alternative = alternative,
metric = metric
),
Expand All @@ -206,9 +206,14 @@ calc_rowscore <- function(
custom = custom_rowscore(
FS = FS,
input_score = input_score,
seed_names = seed_names,
custom_function = custom_function,
custom_parameters = custom_parameters,
known_parameters = known_parameters
method = method,
alternative = alternative,
weights = weights,
do_check = do_check,
verbose = verbose
)
)

Expand Down

0 comments on commit e8fa2d8

Please sign in to comment.