Skip to content

Commit

Permalink
allow data matrix or SummarizedExperiment as input to functions
Browse files Browse the repository at this point in the history
  • Loading branch information
RC-88 committed May 1, 2023
1 parent 9226dbd commit 500f4f2
Show file tree
Hide file tree
Showing 27 changed files with 366 additions and 320 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/R-CMD-CHECK.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
push:
branches: master
branches: [master, devel]
paths-ignore:
- '**.Rmd'
- 'R/**'
- '.gitignore'
pull_request:
branches: master
branches: [master, devel]
release:
types: [published]
workflow_dispatch:
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/document.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
push:
branches: master
branches: [master, devel]
paths:
- '.github/workflows/document.yaml'
- 'R/**'
pull_request:
branches: master
branches: [master, devel]
release:
types: [published]
workflow_dispatch:
Expand All @@ -19,7 +19,7 @@ jobs:
runs-on: ubuntu-latest
# Only restrict concurrency for non-PR jobs
concurrency:
group: document-${{ github.event_name != 'pull_request' }}
group: document-${{ github.event_name != 'pull_request' || github.run_id }}
cancel-in-progress: true
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/pkgdown.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
push:
branches: master
branches: [master, devel]
paths:
- '.github/workflows/pkgdown.yaml'
- 'vignettes/**'
Expand All @@ -13,7 +13,7 @@ on:
- 'NEWS.md'
- '_pkgdown.yml'
pull_request:
branches: master
branches: [master, devel]
release:
types: [published]
workflow_dispatch:
Expand All @@ -31,7 +31,7 @@ jobs:
runs-on: ubuntu-latest
# Only restrict concurrency for non-PR jobs
concurrency:
group: pkgdown-${{ github.event_name != 'pull_request' }}
group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
cancel-in-progress: true
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/render-news-rmd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
push:
branches: master
branches: [master, devel]
paths:
- '.github/workflows/render-news-rmd.yaml'
- 'NEWS.Rmd'
pull_request:
branches: master
branches: [master, devel]
release:
types: [published]
workflow_dispatch:
Expand All @@ -18,7 +18,7 @@ jobs:
render-news-rmd:
runs-on: ubuntu-latest
concurrency:
group: news-${{ github.event_name != 'pull_request' }}
group: news-${{ github.event_name != 'pull_request' || github.run_id }}
cancel-in-progress: true
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/render-readme-rmd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
push:
branches: master
branches: [master, devel]
paths:
- '.github/workflows/render-readme-rmd.yaml'
- 'README.Rmd'
pull_request:
branches: master
branches: [master, devel]
release:
types: [published]
workflow_dispatch:
Expand All @@ -18,7 +18,7 @@ jobs:
render-readme-rmd:
runs-on: ubuntu-latest
concurrency:
group: readme-${{ github.event_name != 'pull_request' }}
group: readme-${{ github.event_name != 'pull_request' || github.run_id }}
cancel-in-progress: true
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
Expand Down
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: CaDrA
Type: Package
Title: Candidate Driver Analysis
Version: 0.99.4
Version: 0.99.5
Date: 2022-11-20
Authors@R:
c(person(given="Reina", family="Chau", role=c("aut","cre"),
Expand Down Expand Up @@ -48,7 +48,7 @@ Suggests:
devtools,
rmarkdown,
knitr,
testthat (>= 3.0.0)
testthat (>= 3.1.6)
Config/testthat/edition: 3
biocViews: Microarray, RNASeq, GeneExpression, Software, FeatureExtraction
VignetteBuilder: knitr
Expand Down
39 changes: 28 additions & 11 deletions R/cadra.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
#' Perform permutation-based testings on a sample of permuted input scores
#' using \code{candidate_search} as the main iterative function for each run.
#'
#' @param FS a SummarizedExperiment object containing binary features where
#' rows represent features of interest (e.g. genes, transcripts, exons, etc...)
#' and columns represent the samples.
#' @param FS a matrix of binary features or a SummarizedExperiment class object
#' from SummarizedExperiment package where rows represent features of interest
#' (e.g. genes, transcripts, exons, etc...) and columns represent the samples.
#' The assay of FS contains binary (1/0) values indicating the presence/absence
#' of omics features.
#' @param input_score a vector of continuous scores representing a phenotypic
#' readout of interest such as protein expression, pathway activity, etc.
#'
Expand Down Expand Up @@ -67,9 +69,20 @@
#' @param verbose a logical value indicates whether or not to print the
#' diagnostic messages. Default is \code{FALSE}.
#'
#' @return a list of key parameters that are used to cache the result of
#' permutation-based testing, a vector of permuted best scores for a given
#' \code{n_perm}, an observed best score, and a permutation p-value.
#' @return a list of 4 objects: \code{key}, \code{perm_best_scores},
#' \code{obs_best_score}, \code{perm_pval}
#' \code{key}: a list of parameters that are used to cache the
#' result of the permutation-based testing. This is useful as the
#' permuted scores are recycled to save time for future loading.
#' \code{perm_best_scores}: a vector of permuted best scores obtained
#' by performing \code{candidate_search} over (\code{n_perm}) iterations of
#' permuted input scores.
#' \code{obs_best_score}: the observed best score is calculated by performing
#' \code{candidate_search} on the given dataset and parameters. This value is
#' later used to compare against the permuted best scores
#' (\code{perm_best_scores}).
#' \code{perm_pval}: a permutation-based p-value obtained by calculating
#' sum(perm_best_scores > obs_best_score)/n_perm
#'
#' @examples
#'
Expand All @@ -95,9 +108,7 @@
#' closeAllConnections()
#'
#' @export
#' @import R.cache doParallel ggplot2 plyr methods
#'
#' @author Reina Chau
#' @import R.cache doParallel ggplot2 plyr methods SummarizedExperiment
#'
CaDrA <- function(
FS,
Expand Down Expand Up @@ -147,7 +158,13 @@ CaDrA <- function(
R.cache::setCacheRootPath()
message("Setting cache root path as: ", getCacheRootPath(), "\n")
}


# Retrieve the original class object of feature set
# If FS is a SummarizedExperiment, convert it to a matrix object
# used its matrix form as a default caching key
if(is(FS, "SummarizedExperiment"))
FS <- SummarizedExperiment::assay(FS)

# Define the key for each cached result
key <- list(FS = FS,
input_score = if(method %in% c("revealer", "custom"))
Expand All @@ -161,7 +178,7 @@ CaDrA <- function(
search_start = search_start,
search_method = search_method,
max_size = max_size)

# Load perm_best_scores with the given key parameters
perm_best_scores <- R.cache::loadCache(key)

Expand Down
76 changes: 36 additions & 40 deletions R/cadra_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,12 @@ verbose <- function(...){
#' prevalence of a feature across all samples in the FS object which the
#' feature will be filtered out. Default is 0.03 (feature that occur in
#' 3 percent or less of the samples will be removed)
#' @return A SummarizedExperiment object with only the filtered-in features
#' given the filtered thresholds
#' @param verbose a logical value indicates whether or not to print the
#' diagnostic messages. Default is \code{FALSE}.
#'
#' @return A SummarizedExperiment object with only the filtered-in features
#' given the filtered thresholds
#'
#' @examples
#'
#' # Load pre-computed feature set
Expand Down Expand Up @@ -82,68 +84,61 @@ prefilter_data <- function(

#' Checks if feature set and input scores are valid dataset
#'
#' @param FS a matrix of binary features or a SummarizedExperiment class object
#' from SummarizedExperiment package where rows represent features of interest
#' (e.g. genes, transcripts, exons, etc...) and columns represent the samples.
#' The assay of FS contains binary (1/0) values indicating the presence/absence
#' of omics features.
#' @param FS_mat a matrix of binary features where rows represent features of
#' interest (e.g. genes, transcripts, exons, etc...) and columns represent
#' the samples.
#' @param input_score a vector of continuous scores of a molecular phenotype of
#' interest such as protein expression, pathway activity, etc.
#' NOTE: The \code{input_score} object must have names or labels that
#' match the column names of FS object.
#' match the column names of FS_mat object.
#' @param do_check a logical value indicates whether or not to validate if the
#' given parameters (FS and input_score) are valid inputs.
#' given parameters (FS_mat and input_score) are valid inputs.
#' Default is \code{TRUE}
#'
#'
#' @noRd
#'
#' @examples
#'
#' # Load pre-computed feature set
#' data(sim_FS)
#'
#' # Load pre-computed input-score
#' data(sim_Scores)
#' # Create a feature matrix
#' FS_mat <- matrix(c(1,0,1,0,0,0,0,0,1,0,
#' 0,0,1,0,1,0,1,0,0,0,
#' 0,0,0,0,1,0,1,0,1,0), nrow=3)
#'
#' colnames(FS_mat) <- 1:10
#' row.names(FS_mat) <- c("TP_1", "TP_2", "TP_3")
#'
#' # Create a vector of observed input scores
#' set.seed(42)
#' input_score = rnorm(n = ncol(FS_mat))
#' names(input_score) <- colnames(FS_mat)
#'
#' check_data_input(
#' FS = sim_FS,
#' input_score = sim_Scores
#' FS_mat = FS_mat,
#' input_score = input_score
#' )
#'
#' @return a filtered feature set and input scores with overlapping samples
#' @import SummarizedExperiment
#' @return If do_check=FALSE, return NULL, otherwise, check if FS_mat and
#' input_score are valid inputs
check_data_input <- function(
FS,
FS_mat,
input_score,
do_check = TRUE
){

if(do_check == FALSE) return(NULL)

# Check if FS is a matrix or a SummarizedExperiment class object
if(!is(FS, "SummarizedExperiment") && !is(FS, "matrix"))
stop("'FS' must be a matrix or a SummarizedExperiment class object
from SummarizedExperiment package")

# Retrieve the feature binary matrix
if(is(FS, "SummarizedExperiment")){
mat <- as.matrix(SummarizedExperiment::assay(FS))
}else{
mat <- FS
}


# Check if the matrix has only binary 0 or 1 values
if(length(mat) == 0 || any(!mat %in% c(0,1)) || any(is.na(mat)))
stop("FS object must contain binary values 0s or 1s (no empty values).")
if(length(FS_mat) == 0 || any(!FS_mat %in% c(0,1)))
stop("FS object must contain binary values 0s or 1s.")

# Make sure the FS object has row names for features tracking
if(is.null(rownames(mat)))
if(is.null(rownames(FS_mat)))
stop("The FS object does not have row names to ",
"track features by. Please provide unique features or row names ",
"for the FS object.\n")

# Make sure the FS object has row names for features tracking
if(is.null(colnames(mat)))
if(is.null(colnames(FS_mat)))
stop("The FS object does not have column names to ",
"track samples by. Please provide unique sample names ",
"for the FS object.\n")
Expand All @@ -153,13 +148,13 @@ check_data_input <- function(
# the column names of the FS object
if(length(input_score) == 0 || any(!is.numeric(input_score)) ||
any(is.na(input_score)) || is.null(names(input_score)) ||
any(!names(input_score) %in% colnames(mat)))
any(!names(input_score) %in% colnames(FS_mat)))
stop("input_score must contain a vector of continuous scores ",
"(with no NAs), and its vector names or labels must match the column ",
"names of the FS object.\n")

# Check if the features have either all 0s or 1s values
if(any(rowSums(mat) %in% c(0, ncol(mat)) ))
if(any(rowSums(FS_mat) %in% c(0, ncol(FS_mat)) ))
stop("The FS object has features that are either all 0s or 1s. ",
"These features must be removed from the FS object as ",
"they are uninformative.")
Expand Down Expand Up @@ -219,7 +214,7 @@ check_data_input <- function(
#' the best scores will be returned and used to start the candidate_search()
#' with.
#' Otherwise, the candidate_search() will start the search with a list of
#' indices of features defined in search_start.
#' indices of starting features defined in search_start.
check_top_N <- function(
rowscore,
feature_names,
Expand Down Expand Up @@ -329,6 +324,7 @@ ks_test_double_wrap <- function(n_x, y, alt=c("less", "greater", "two.sided")) {
#' samples by.
#' @param n_perm a number of permutations to generate. This determines
#' the number of rows in the permutation matrix.
#'
#' @return a matrix of values where each row contains scores of a single
#' permuted \code{input_score}.
#'
Expand Down

0 comments on commit 500f4f2

Please sign in to comment.