Skip to content

Commit

Permalink
update documentation to account for changes in candidate_search
Browse files Browse the repository at this point in the history
  • Loading branch information
RC-88 committed Jul 14, 2023
1 parent 06c6b66 commit fda607f
Show file tree
Hide file tree
Showing 9 changed files with 133 additions and 76 deletions.
7 changes: 4 additions & 3 deletions R/calc_rowscore.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
#' NOTE: \code{input_score} object must have names or labels that match the
#' column names of \code{FS} object.
#' @param seed_names a vector of one or more features representing known
#' causes of activation or features associated with a response of interest.
#' It is applied for \code{method = "revealer"} only.
#' causes of activation or features associated with a response of interest,
#' \code{e.g. input_score}. Default is NULL.
#' @param method a character string specifies a scoring method that is
#' used in the search. There are 6 options: (\code{"ks_pval"} or \code{ks_score}
#' or \code{"wilcox_pval"} or \code{wilcox_score} or
Expand Down Expand Up @@ -213,7 +213,8 @@ calc_rowscore <- function(
alternative = alternative,
weights = weights,
do_check = do_check,
verbose = verbose
verbose = verbose,
...
)
)

Expand Down
4 changes: 2 additions & 2 deletions R/candidate_search.R
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ candidate_search <- function(
best_score_only = best_score_only,
do_plot = do_plot,
do_check = FALSE, # MAKE SURE DO_CHECK IS SILENCE HERE
verbose = FALSE, # MAKE SURE VERBOSE IS SILENCE HERE
verbose = FALSE # MAKE SURE VERBOSE IS SILENCE HERE
)

# The scores are already ordered from highest to lowest significant
Expand Down Expand Up @@ -449,7 +449,7 @@ forward_backward_check <- function
custom_function = custom_function,
custom_parameters = custom_parameters,
alternative = alternative,
weights = weights,
weights = weights,
...
)

Expand Down
65 changes: 49 additions & 16 deletions R/custom_rowscore.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
#' readout of interest such as protein expression, pathway activity, etc.
#' The \code{input_score} object must have names or labels that match the column
#' names of \code{FS} object.
#' @param seed_names a vector of one or more features representing known causes
#' of activation or features associated with a response of interest.
#' Default is NULL.
#' @param seed_names a vector of one or more features representing known
#' causes of activation or features associated with a response of interest,
#' \code{e.g. input_score}. Default is NULL.
#' @param custom_function a customized function which computes a row-wise
#' score for each row of a given binary feature set (FS).
#'
Expand All @@ -26,32 +26,64 @@
#' @param custom_parameters a list of additional arguments to be passed to
#' \code{custom_function()} (excluding \code{FS} and \code{input_score}).
#' Default is NULL.
#' @param known_parameters a list of known parameters that existed in the
#' previous function that can be passed to \code{custom_function()} if and only
#' if they were not provided by \code{custom_parameters}. Default is NULL.
#' @param ... additional parameters to be passed to \code{custom_function}
#'
#' @noRd
#'
#' @examples
#'
#' # A customized function using ks-test function
#' customized_rowscore <- function(FS, input_score, alternative="less"){
#' customized_rowscore <- function(FS, input_score, seed_names=NULL, alternative="less"){
#'
#' # Check if seed_names is provided
#' if(!is.null(seed_names)){
#' # Taking the union across the known seed features
#' if(length(seed_names) > 1) {
#' seed_vector <- as.numeric(ifelse(colSums(FS[seed_names,]) == 0, 0, 1))
#' }else{
#' seed_vector <- as.numeric(FS[seed_names,])
#' }
#'
#' # Remove the seeds from the binary feature matrix
#' # and taking logical OR btw the remaining features with the seed vector
#' locs <- match(seed_names, row.names(FS))
#' FS <- base::sweep(FS[-locs,], 2, seed_vector, `|`)*1
#'
#' # Check if there are any features that are all 1s generated from
#' # taking the union between the matrix
#' # We cannot compute statistics for such features and thus they need
#' # to be filtered out
#' if(any(rowSums(FS) == ncol(FS))){
#' warning("Features with all 1s generated from taking the matrix union ",
#' "will be removed before progressing...\n")
#' FS <- FS[rowSums(FS) != ncol(FS),]
#' }
#' }
#'
#' # KS is a ranked-based method
#' # So we need to sort input_score from highest to lowest values
#' input_score <- sort(input_score, decreasing=TRUE)
#'
#' # Re-order the matrix based on the order of input_score
#' FS <- FS[, names(input_score), drop=FALSE]
#'
#' # Compute the scores using the KS method
#' ks <- apply(FS, 1, function(r){
#' x = input_score[which(r==1)];
#' y = input_score[which(r==0)];
#' res <- ks.test(x, y, alternative=alternative)
#' return(c(res$statistic, res$p.value))
#' })
#'
#' # Obtain score statistics and p-values from KS method
#' # Obtain score statistics
#' stat <- ks[1,]
#'
#' # Change values of 0 to the machine lowest value to avoid taking -log(0)
#' # Obtain p-values and change values of 0 to the machine lowest value
#' # to avoid taking -log(0)
#' pval <- ks[2,]
#' pval[which(pval == 0)] <- .Machine$double.xmin
#'
#' # Compute the -log scores for pval
#' # Compute the -log(pval)
#' # Make sure scores has names that match the row names of FS object
#' scores <- -log(pval)
#' names(scores) <- rownames(FS)
Expand All @@ -75,6 +107,7 @@
#' custom_rs <- custom_rowscore(
#' FS = mat,
#' input_score = input_score,
#' seed_names = NULL,
#' custom_function = customized_rowscore,
#' custom_parameters = NULL
#' )
Expand Down Expand Up @@ -126,22 +159,22 @@ custom_rowscore <- function
"as one of its arguments (required).")

## Create a list with only the required variables
req_args <- list(FS=FS, input_score=input_score, seed_names=seed_names)
req_parameters <- list(FS=FS, input_score=input_score, seed_names=seed_names)

# Obtain additional parameters
known_parameters <- list(...)
additional_parameters <- list(...)

# Combine custom_parameters, required variables, and a list of
# known parameters together
# However, we must exclude FS, input_score from custom_parameters
# excluding FS, input_score, and custom_parameters from known parameters
# as they would be redundant
combined_parameters <- c(
req_args,
req_parameters,
custom_parameters[
which(!names(custom_parameters) %in% names(req_args))],
known_parameters[
which(!names(known_parameters) %in% c(names(req_args),
which(!names(custom_parameters) %in% names(req_parameters))],
additional_parameters[
which(!names(additional_parameters) %in% c(names(req_parameters),
names(custom_parameters)))]
)

Expand Down
4 changes: 2 additions & 2 deletions R/ks_rowscore.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
#' The \code{input_score} object must have names or labels that match the column
#' names of FS object.
#' @param seed_names a vector of one or more features representing known causes
#' of activation or features associated with a response of interest.
#' Default is NULL.
#' of activation or features associated with a response of interest,
#' \code{e.g. input_score}. Default is NULL.
#' @param weights a vector of weights to perform a \code{weighted-KS} test.
#' Default is \code{NULL}. If not NULL, \code{weights} must have labels or names
#' that match labels of \code{input_score}.
Expand Down
6 changes: 3 additions & 3 deletions R/revealer_rowscore.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
#' readout of interest such as protein expression, pathway activity, etc.
#' The \code{input_score} object must have names or labels that match the column
#' names of FS object.
#' @param seed_names a vector of one or more features representing known causes
#' of activation or features associated with a response of interest.
#' Default is NULL.
#' @param seed_names a vector of one or more features representing known
#' causes of activation or features associated with a response of interest,
#' \code{e.g. input_score}. Default is NULL.
#' @param assoc_metric an association metric: \code{"IC"} for information
#' coefficient or \code{"COR"} for correlation. Default is \code{IC}.
#'
Expand Down
6 changes: 3 additions & 3 deletions R/wilcox_rowscore.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
#' readout of interest such as protein expression, pathway activity, etc.
#' The \code{input_score} object must have names or labels that match the column
#' names of FS object.
#' @param seed_names a vector of one or more features representing known causes
#' of activation or features associated with a response of interest.
#' Default is NULL.
#' @param seed_names a vector of one or more features representing known
#' causes of activation or features associated with a response of interest,
#' \code{e.g. input_score}. Default is NULL.
#' @param alternative a character string specifies an alternative
#' hypothesis testing (\code{"two.sided"} or \code{"greater"} or
#' \code{"less"}). Default is \code{less} for left-skewed significance testing.
Expand Down
14 changes: 14 additions & 0 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,25 @@ For more information, please see the associated manuscript [Kartha et al. (2019)

## (1) Installation

- Using `devtools` package

```r
library(devtools)
devtools::install_github("montilab/CaDrA")
```

- Using `BiocManager` package

```r
if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager")

# The following initializes usage of Bioc devel
BiocManager::install(version='devel')

BiocManager::install("CaDrA")
```

## (2) Quickstart

```r
Expand Down
4 changes: 2 additions & 2 deletions man/calc_rowscore.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

99 changes: 54 additions & 45 deletions vignettes/scoring_functions.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -134,53 +134,62 @@ See `?custom_rowscore` for more details
# A customized function using ks-test function
customized_rowscore <- function(FS, input_score, seed_names=NULL, alternative="less"){
# Check if seed_names is provided
if(!is.null(seed_names)){
# Taking the union across the known seed features
if(length(seed_names) > 1) {
seed_vector <- as.numeric(ifelse(colSums(FS[seed_names,]) == 0, 0, 1))
}else{
seed_vector <- as.numeric(FS[seed_names,])
}
# Check if seed_names is provided
if(!is.null(seed_names)){
# Taking the union across the known seed features
if(length(seed_names) > 1) {
seed_vector <- as.numeric(ifelse(colSums(FS[seed_names,]) == 0, 0, 1))
}else{
seed_vector <- as.numeric(FS[seed_names,])
}
# Remove the seeds from the binary feature matrix
# and taking logical OR btw the remaining features with the seed vector
locs <- match(seed_names, row.names(FS))
FS <- base::sweep(FS[-locs,], 2, seed_vector, `|`)*1
# Check if there are any features that are all 1s generated from
# taking the union between the matrix
# We cannot compute statistics for such features and thus they need
# to be filtered out
if(any(rowSums(FS) == ncol(FS))){
warning("Features with all 1s generated from taking the matrix union ",
"will be removed before progressing...\n")
FS <- FS[rowSums(FS) != ncol(FS),]
}
}
# Remove the seeds from the binary feature matrix
# and taking logical OR btw the remaining features with the seed vector
locs <- match(seed_names, row.names(FS))
FS <- base::sweep(FS[-locs,], 2, seed_vector, `|`)*1
# KS is a ranked-based method
# So we need to sort input_score from highest to lowest values
input_score <- sort(input_score, decreasing=TRUE)
# Check if there are any features that are all 1s generated from
# taking the union between the matrix
# We cannot compute statistics for such features and thus they need
# to be filtered out
if(any(rowSums(FS) == ncol(FS))){
warning("Features with all 1s generated from taking the matrix union ",
"will be removed before progressing...\n")
FS <- FS[rowSums(FS) != ncol(FS),]
}
}
ks <- apply(FS, 1, function(r){
x = input_score[which(r==1)];
y = input_score[which(r==0)];
res <- ks.test(x, y, alternative=alternative)
return(c(res$statistic, res$p.value))
})
# Obtain score statistics and p-values from KS method
stat <- ks[1,]
# Change values of 0 to the machine lowest value to avoid taking -log(0)
pval <- ks[2,]
pval[which(pval == 0)] <- .Machine$double.xmin
# Compute the -log scores for pval
# Make sure scores has names that match the row names of FS object
scores <- -log(pval)
names(scores) <- rownames(FS)
return(scores)
# Re-order the matrix based on the order of input_score
FS <- FS[, names(input_score), drop=FALSE]
# Compute the scores using the KS method
ks <- apply(FS, 1, function(r){
x = input_score[which(r==1)];
y = input_score[which(r==0)];
res <- ks.test(x, y, alternative=alternative)
return(c(res$statistic, res$p.value))
})
# Obtain score statistics
stat <- ks[1,]
# Obtain p-values and change values of 0 to the machine lowest value
# to avoid taking -log(0)
pval <- ks[2,]
pval[which(pval == 0)] <- .Machine$double.xmin
# Compute the -log(pval)
# Make sure scores has names that match the row names of FS object
scores <- -log(pval)
names(scores) <- rownames(FS)
return(scores)
}
# Search for best features using a custom-defined function
Expand Down

0 comments on commit fda607f

Please sign in to comment.