Skip to content

Commit

Permalink
reformat long lines wherever possible
Browse files Browse the repository at this point in the history
  • Loading branch information
katgit committed Mar 29, 2023
1 parent fce14a6 commit e6ba0f8
Show file tree
Hide file tree
Showing 16 changed files with 155 additions and 98 deletions.
6 changes: 4 additions & 2 deletions R/BRCA_GISTIC_MUT_SIG.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
#' Genomic Data from TCGA BRCA MUT + GISTIC
#'
#' A SummarizedExperiment object consists of 16,873 genomic features across 951 samples.
#' A SummarizedExperiment object consists of 16,873 genomic features
#' across 951 samples.
#'
#' @docType data
#'
#' @usage data(BRCA_GISTIC_MUT_SIG)
#'
#' @format An object of class \code{SummarizedExperiment} from \code{SummarizedExperiment} package
#' @format An object of class \code{SummarizedExperiment} from
#' \code{SummarizedExperiment} package
#' containing an assay of 16,873 rows (features) and 951 columns (samples)
#' see \code{SummarizedExperiment} for more details.
#'
Expand Down
6 changes: 4 additions & 2 deletions R/CCLE_MUT_SCNA.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
#' Genomic Data from CCLE MUT + SCNA
#'
#' A SummarizedExperiment object consists of 17,723 genomic features across 82 samples.
#' A SummarizedExperiment object consists of 17,723 genomic features across
#' 82 samples.
#'
#' @docType data
#'
#' @usage data(CCLE_MUT_SCNA)
#'
#' @format An object of class \code{SummarizedExperiment} from \code{SummarizedExperiment} package
#' @format An object of class \code{SummarizedExperiment} from
#' \code{SummarizedExperiment} package
#' containing a matrix of 17,723 rows (features) and 82 columns (samples).
#' See \code{SummarizedExperiment} for more details.
#'
Expand Down
10 changes: 6 additions & 4 deletions R/cadra.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
#' @param input_score a vector of continuous scores representing a phenotypic
#' readout of interest such as protein expression, pathway activity, etc.
#'
#' NOTE: \code{input_score} object must have names or labels that match the column
#' names of \code{FS} object.
#' NOTE: \code{input_score} object must have names or labels that match
#' the column names of \code{FS} object.
#' @param method a character string specifies a scoring method that is
#' used in the search. There are 6 options: (\code{"ks_pval"} or \code{ks_score}
#' or \code{"wilcox_pval"} or \code{wilcox_score} or
Expand Down Expand Up @@ -96,7 +96,8 @@
CaDrA <- function(
FS,
input_score,
method = c("ks_pval", "ks_score", "wilcox_pval", "wilcox_score", "revealer", "custom"),
method = c("ks_pval", "ks_score", "wilcox_pval", "wilcox_score",
"revealer", "custom"),
custom_function = NULL,
custom_parameters = NULL,
alternative = c("less", "greater", "two.sided"),
Expand Down Expand Up @@ -191,7 +192,8 @@ CaDrA <- function(
length(perm_best_scores),
" permutated scores for the specified dataset",
" and search parameters in cache path\n")
verbose("RE-COMPUTE PERMUTATION-BASED TESTINGS WITH LARGER NUMBER OF PERMUTATIONS\n")
verbose("RE-COMPUTE PERMUTATION-BASED TESTINGS ",
"WITH LARGER NUMBER OF PERMUTATIONS\n")
}

#######################################################################
Expand Down
33 changes: 19 additions & 14 deletions R/cadra_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,11 @@ prefilter_data <- function(
#' of omics features.
#' @param input_score a vector of continuous scores of a molecular phenotype of
#' interest such as protein expression, pathway activity, etc.
#' NOTE: The \code{input_score} object must have names or labels that match the column
#' names of FS object.
#' NOTE: The \code{input_score} object must have names or labels that
#' match the column names of FS object.
#' @param do_check a logical value indicates whether or not to validate if the
#' given parameters (FS and input_score) are valid inputs. Default is \code{TRUE}
#' given parameters (FS and input_score) are valid inputs.
#' Default is \code{TRUE}
#'
#' @noRd
#'
Expand Down Expand Up @@ -179,11 +180,12 @@ check_data_input <- function(
#' @param top_N an integer specifies the number of features to start the
#' search over (e.g. starting from the top 'N' features with the best scores).
#' Default is 1.
#' If \code{top_N} is provided, then \code{search_start} parameter will be ignored.
#' If \code{top_N} is provided, then \code{search_start} parameter
#' will be ignored.
#' @param search_start a list of character strings (separated by commas)
#' which specify a list of feature names to start the search with.
#' If \code{search_start} is provided, then \code{top_N} parameter will be ignored.
#' Default is \code{NULL}.
#' If \code{search_start} is provided, then \code{top_N} parameter
#' will be ignored. Default is \code{NULL}.
#'
#' @noRd
#'
Expand Down Expand Up @@ -213,10 +215,11 @@ check_data_input <- function(
#' feature_names = rownames(sim_FS)
#' )
#'
#' @return If top_N is given, a vector of indices of top N features with the best
#' scores will be returned and used to start the candidate_search() with.
#' Otherwise, the candidate_search() will start the search with a list of indices
#' of features defined in search_start.
#' @return If top_N is given, a vector of indices of top N features with
#' the best scores will be returned and used to start the candidate_search()
#' with.
#' Otherwise, the candidate_search() will start the search with a list of
#' indices of features defined in search_start.
check_top_N <- function(
rowscore,
feature_names,
Expand Down Expand Up @@ -248,7 +251,8 @@ check_top_N <- function(
which(feature_names == top_features[f])
}) |> unlist()

verbose("Evaluating search over top ", length(search_feature_index), " features\n")
verbose("Evaluating search over top ", length(search_feature_index),
" features\n")

}else{

Expand All @@ -265,7 +269,8 @@ check_top_N <- function(

if(length(search_start) == 0 || any(!search_start %in% feature_names))
stop("The provided starting features: ",
paste0(search_start[which(!search_start %in% feature_names)], collapse=", "),
paste0(search_start[which(!search_start %in% feature_names)],
collapse=", "),
" does not exist among the row names of FS object.\n")

# Get the index of the search_start strings
Expand Down Expand Up @@ -316,8 +321,8 @@ ks_test_double_wrap <- function(n_x, y, alt=c("less", "greater", "two.sided")) {
#' Random permutation matrix generator
#'
#' Produces a random permutation score matrix given a vector of sample-specific
#' scores representing a phenotypic readout of interest such as protein expression,
#' pathway activity, etc.
#' scores representing a phenotypic readout of interest such as protein
#' expression, pathway activity, etc.
#' @param input_score a vector of continuous scores of a molecular phenotype of
#' interest such as protein expression, pathway activity, etc.
#' NOTE: The \code{input_score} object must have names or labels to track
Expand Down
11 changes: 6 additions & 5 deletions R/calc_rowscore.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
#' @param input_score a vector of continuous scores representing a phenotypic
#' readout of interest such as protein expression, pathway activity, etc.
#'
#' NOTE: \code{input_score} object must have names or labels that match the column
#' names of \code{FS_mat} object.
#' NOTE: \code{input_score} object must have names or labels that match the
#' column names of \code{FS_mat} object.
#' @param method a character string specifies a scoring method that is
#' used in the search. There are 6 options: (\code{"ks_pval"} or \code{ks_score}
#' or \code{"wilcox_pval"} or \code{wilcox_score} or
Expand All @@ -33,8 +33,8 @@
#' NOTE: This argument is applied to KS and Wilcoxon method
#' @param weight if method is \code{ks_score} or \code{ks_pval}, specifying a
#' vector of weights will perform a weighted-KS testing. Default is \code{NULL}.
#' @param seed_names a vector of one or more features representing known “causes”
#' of activation or features associated with a response of interest.
#' @param seed_names a vector of one or more features representing known
#' “causes” of activation or features associated with a response of interest.
#' It is applied for \code{method = "revealer"} only.
#' @param do_check a logical value indicates whether or not to validate if the
#' given parameters (\code{FS_mat} and \code{input_score}) are valid inputs.
Expand Down Expand Up @@ -132,7 +132,8 @@
calc_rowscore <- function(
FS_mat,
input_score,
method = c("ks_pval", "ks_score", "wilcox_pval", "wilcox_score", "revealer", "custom"),
method = c("ks_pval", "ks_score", "wilcox_pval", "wilcox_score",
"revealer", "custom"),
custom_function = NULL,
custom_parameters = NULL,
alternative = c("less", "greater", "two.sided"),
Expand Down
57 changes: 34 additions & 23 deletions R/candidate_search.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,21 @@
#'
#' Performs heuristic search on a set of binary features to determine whether
#' there are features whose union is more skewed (enriched at the extremes)
#' than either features alone. This is the main functionality of the \code{CaDrA}
#' package.
#' than either features alone. This is the main functionality of
#' the \code{CaDrA} package.
#'
#' NOTE: The legacy function \code{topn_eval()} is equivalent to the recommended
#' \code{candidate_search()} function
#' @param FS a SummarizedExperiment class object from SummarizedExperiment package
#' where rows represent features of interest (e.g. genes, transcripts, exons, etc.)
#' and columns represent the samples. The assay of FS contains binary (1/0) values
#' @param FS a SummarizedExperiment class object from SummarizedExperiment
#' package where rows represent features of interest
#' (e.g. genes, transcripts, exons, etc.) and columns represent the samples.
#' The assay of FS contains binary (1/0) values
#' indicating the presence/absence of omics features.
#' @param input_score a vector of continuous scores representing a phenotypic
#' readout of interest such as protein expression, pathway activity, etc.
#'
#' NOTE: \code{input_score} object must have names or labels that match the column
#' names of \code{FS} object.
#' NOTE: \code{input_score} object must have names or labels that match the
#' column names of \code{FS} object.
#' @param method a character string specifies a scoring method that is
#' used in the search. There are 6 options: (\code{"ks_pval"} or \code{ks_score}
#' or \code{"wilcox_pval"} or \code{wilcox_score} or
Expand Down Expand Up @@ -71,8 +72,9 @@
#' @return If \code{best_score_only} is set to \code{TRUE}, the function will
#' return a list of objects containing ONLY the best score of the union
#' meta-feature matrix for each top N searches. If \code{best_score_only} is set
#' to \code{FALSE}, a list of objects containing the returned meta-feature matrix,
#' as well as its corresponding best score and observed input scores are returned.
#' to \code{FALSE}, a list of objects containing the returned meta-feature
#' matrix, as well as its corresponding best score and observed input scores
#' are returned.
#'
#' @examples
#'
Expand All @@ -95,7 +97,8 @@
candidate_search <- function(
FS,
input_score,
method = c("ks_pval", "ks_score", "wilcox_pval", "wilcox_score", "revealer", "custom"),
method = c("ks_pval", "ks_score", "wilcox_pval", "wilcox_score",
"revealer", "custom"),
custom_function = NULL,
custom_parameters = NULL,
alternative = c("less", "greater", "two.sided"),
Expand Down Expand Up @@ -164,7 +167,8 @@ candidate_search <- function(
## Check the max_size variable ####
max_size <- as.integer(max_size)

if(is.na(max_size) || length(max_size)==0 || max_size <= 0 || max_size > nrow(FS))
if(is.na(max_size) || length(max_size)==0 ||
max_size <= 0 || max_size > nrow(FS))
stop("Please specify a maximum size that a meta-feature can extend to do ",
"for a given search (max_size must be >= 1)",
"and max_size must be lesser than the number of features in FS\n")
Expand Down Expand Up @@ -260,7 +264,8 @@ candidate_search <- function(

# Update best_meta based on feature set
best_meta <- as.numeric(ifelse(
colSums(SummarizedExperiment::assay(FS)[global_best_s_features,]) == 0, 0, 1))
colSums(SummarizedExperiment::assay(FS)[global_best_s_features,]) ==
0, 0, 1))

}

Expand All @@ -273,7 +278,8 @@ candidate_search <- function(
# Here "*1" is used to convert the boolean back to integer 1's and 0's
# Notice we remove anything in best_s_index from the original matrix
# first to form the meta matrix.
meta_mat <- base::sweep(SummarizedExperiment::assay(FS)[-best_s_index,], 2, best_meta, `|`)*1
meta_mat <- base::sweep(SummarizedExperiment::assay(FS)[-best_s_index,],
2, best_meta, `|`)*1

# Check if there are any features that are all 1s generated from
# taking the union between the matrix
Expand Down Expand Up @@ -332,7 +338,8 @@ candidate_search <- function(

verbose("\n\nFinished!\n\n")
verbose("Number of iterations covered: ", i, "\n")
verbose("Best score attained over ", i , " iterations: ", global_best_s, "\n")
verbose("Best score attained over ", i ,
" iterations: ", global_best_s, "\n")

if(length(global_best_s_features) == 1)
verbose("No meta-feature that improves the enrichment was found\n")
Expand Down Expand Up @@ -373,23 +380,25 @@ candidate_search <- function(
best_meta_scores <- unlist(scores_l)

# Fetch the best score with the highest value
best_score <- best_meta_scores[order(best_meta_scores, decreasing = TRUE)][1]
best_score <-best_meta_scores[order(best_meta_scores, decreasing = TRUE)][1]

return(best_score)

}

# By Default, the function returns the top N candidate search results as a list of lists
# By Default, the function returns the top N candidate search results as
# a list of lists
return(topn_l)

}


# Performance backward selection
#' @param FS a SummarizedExperiment class object from SummarizedExperiment package
#' where rows represent features of interest (e.g. genes, transcripts, exons, etc...)
#' and columns represent the samples. The assay of FS contains binary (1/0) values
#' indicating the presence/absence of ‘omics’ features.
#' @param FS a SummarizedExperiment class object from SummarizedExperiment
#' package where rows represent features of interest
#' (e.g. genes, transcripts, exons, etc...)
#' and columns represent the samples. The assay of FS contains binary (1/0)
#' values indicating the presence/absence of ‘omics’ features.
#' @param input_score a vector of continuous scores representing a phenotypic
#' readout of interest such as protein expression, pathway activity, etc.
#' The \code{input_score} object must have names or labels that match the column
Expand All @@ -405,8 +414,9 @@ candidate_search <- function(
#' @param alternative a character string specifies an alternative hypothesis
#' testing (\code{"two.sided"} or \code{"greater"} or \code{"less"}).
#' Default is \code{less} for left-skewed significance testing.
#' @param weight if method is \code{ks_pval} or \code{ks_score}, specifying a vector
#' of weights will perform a weighted-KS testing. Default is \code{NULL}.
#' @param weight if method is \code{ks_pval} or \code{ks_score}, specifying
#' a vector of weights will perform a weighted-KS testing. Default is
#' \code{NULL}.
#' @param glob_f a vector containing the features (or row names) whose
#' union gives the best score (so far) in the search.
#' Feature names should match those of the provided FS object
Expand Down Expand Up @@ -498,7 +508,8 @@ forward_backward_check <- function

# Return a set of features that gave a better score than
# the existing best score and its new best score as well
return(list(best_features=f_names[[f_best_index]], best_scores=f_best_score))
return(list(best_features=f_names[[f_best_index]],
best_scores=f_best_score))

}else{

Expand Down
9 changes: 6 additions & 3 deletions R/custom_rowscore.R
Original file line number Diff line number Diff line change
Expand Up @@ -133,18 +133,21 @@ custom_rowscore <- function
custom_parameters[
which(!names(custom_parameters) %in% names(req_args))],
known_parameters[
which(!names(known_parameters) %in% c(names(req_args), names(custom_parameters)))]
which(!names(known_parameters) %in% c(names(req_args),
names(custom_parameters)))]
)

# Extract a list of custom_function() parameters that existed in combined variables
# Extract a list of custom_function() parameters that existed in
# combined variables
included_parameters <- combined_parameters[
which(names(combined_parameters) %in% names(custom_args))]

# Check if some parameters not existed in a list of combined variables
excluded_parameters <- custom_args[
which(!names(custom_args) %in% names(combined_parameters))]

# If some parameters are excluded, check to see if that argument has a default value
# If some parameters are excluded,
# check to see if that argument has a default value
# Finally, return all necessarily arguments to be passed to custom_function()
all_parameters <- c(
included_parameters,
Expand Down

0 comments on commit e6ba0f8

Please sign in to comment.