allow data matrix or SummarizedExperiment as input to functions

montilab · May 1, 2023 · 500f4f2 · 500f4f2
1 parent 9226dbd
commit 500f4f2
Show file tree

Hide file tree

Showing 27 changed files with 366 additions and 320 deletions.
diff --git a/.github/workflows/R-CMD-CHECK.yaml b/.github/workflows/R-CMD-CHECK.yaml
@@ -2,13 +2,13 @@
 # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 on:
   push:
-    branches: master
+    branches: [master, devel]
     paths-ignore:
       - '**.Rmd'
       - 'R/**'
       - '.gitignore'
   pull_request:
-    branches: master
+    branches: [master, devel]
   release:
     types: [published]
   workflow_dispatch:

diff --git a/.github/workflows/document.yaml b/.github/workflows/document.yaml
@@ -2,12 +2,12 @@
 # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 on:
   push:
-    branches: master
+    branches: [master, devel]
     paths: 
       - '.github/workflows/document.yaml'
       - 'R/**'
   pull_request:
-    branches: master
+    branches: [master, devel]
   release:
     types: [published]
   workflow_dispatch:
@@ -19,7 +19,7 @@ jobs:
     runs-on: ubuntu-latest
     # Only restrict concurrency for non-PR jobs
     concurrency:
-      group: document-${{ github.event_name != 'pull_request' }}
+      group: document-${{ github.event_name != 'pull_request' || github.run_id }}
       cancel-in-progress: true
     env:
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}

diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml
@@ -2,7 +2,7 @@
 # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 on:
   push:
-    branches: master
+    branches: [master, devel]
     paths: 
       - '.github/workflows/pkgdown.yaml'
       - 'vignettes/**'
@@ -13,7 +13,7 @@ on:
       - 'NEWS.md'
       - '_pkgdown.yml'
   pull_request:
-    branches: master
+    branches: [master, devel]
   release:
     types: [published]
   workflow_dispatch:
@@ -31,7 +31,7 @@ jobs:
     runs-on: ubuntu-latest
     # Only restrict concurrency for non-PR jobs
     concurrency:
-      group: pkgdown-${{ github.event_name != 'pull_request' }}
+      group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
       cancel-in-progress: true
     env:
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}

diff --git a/.github/workflows/render-news-rmd.yml b/.github/workflows/render-news-rmd.yml
@@ -2,12 +2,12 @@
 # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 on:
   push:
-    branches: master
+    branches: [master, devel]
     paths: 
       - '.github/workflows/render-news-rmd.yaml'
       - 'NEWS.Rmd'
   pull_request:
-    branches: master
+    branches: [master, devel]
   release:
     types: [published]
   workflow_dispatch:
@@ -18,7 +18,7 @@ jobs:
   render-news-rmd:
     runs-on: ubuntu-latest
     concurrency:
-      group: news-${{ github.event_name != 'pull_request' }}
+      group: news-${{ github.event_name != 'pull_request' || github.run_id }}
       cancel-in-progress: true
     env:
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}

diff --git a/.github/workflows/render-readme-rmd.yml b/.github/workflows/render-readme-rmd.yml
@@ -2,12 +2,12 @@
 # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 on:
   push:
-    branches: master
+    branches: [master, devel]
     paths: 
       - '.github/workflows/render-readme-rmd.yaml'
       - 'README.Rmd'
   pull_request:
-    branches: master
+    branches: [master, devel]
   release:
     types: [published]
   workflow_dispatch:
@@ -18,7 +18,7 @@ jobs:
   render-readme-rmd:
     runs-on: ubuntu-latest
     concurrency:
-      group: readme-${{ github.event_name != 'pull_request' }}
+      group: readme-${{ github.event_name != 'pull_request' || github.run_id }}
       cancel-in-progress: true
     env:
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: CaDrA
 Type: Package
 Title: Candidate Driver Analysis
-Version: 0.99.4
+Version: 0.99.5
 Date: 2022-11-20
 Authors@R: 
     c(person(given="Reina", family="Chau", role=c("aut","cre"), 
@@ -48,7 +48,7 @@ Suggests:
     devtools,
     rmarkdown,
     knitr,
-    testthat (>= 3.0.0)
+    testthat (>= 3.1.6)
 Config/testthat/edition: 3
 biocViews: Microarray, RNASeq, GeneExpression, Software, FeatureExtraction
 VignetteBuilder: knitr

diff --git a/R/cadra.R b/R/cadra.R
@@ -3,9 +3,11 @@
 #' Perform permutation-based testings on a sample of permuted input scores 
 #' using \code{candidate_search} as the main iterative function for each run.
 #'
-#' @param FS a SummarizedExperiment object containing binary features where
-#' rows represent features of interest (e.g. genes, transcripts, exons, etc...)
-#' and columns represent the samples.
+#' @param FS a matrix of binary features or a SummarizedExperiment class object 
+#' from SummarizedExperiment package where rows represent features of interest 
+#' (e.g. genes, transcripts, exons, etc...) and columns represent the samples. 
+#' The assay of FS contains binary (1/0) values indicating the presence/absence 
+#' of omics features.
 #' @param input_score a vector of continuous scores representing a phenotypic
 #' readout of interest such as protein expression, pathway activity, etc.
 #' 
@@ -67,9 +69,20 @@
 #' @param verbose a logical value indicates whether or not to print the
 #' diagnostic messages. Default is \code{FALSE}.
 #'
-#' @return a list of key parameters that are used to cache the result of
-#' permutation-based testing, a vector of permuted best scores for a given
-#' \code{n_perm}, an observed best score, and a permutation p-value.
+#' @return a list of 4 objects: \code{key}, \code{perm_best_scores}, 
+#' \code{obs_best_score}, \code{perm_pval}
+#' \code{key}: a list of parameters that are used to cache the 
+#' result of the permutation-based testing. This is useful as the
+#' permuted scores are recycled to save time for future loading.
+#' \code{perm_best_scores}: a vector of permuted best scores obtained 
+#' by performing \code{candidate_search} over (\code{n_perm}) iterations of 
+#' permuted input scores.
+#' \code{obs_best_score}: the observed best score is calculated by performing
+#' \code{candidate_search} on the given dataset and parameters. This value is 
+#' later used to compare against the permuted best scores 
+#' (\code{perm_best_scores}).
+#' \code{perm_pval}: a permutation-based p-value obtained by calculating
+#' sum(perm_best_scores > obs_best_score)/n_perm
 #'
 #' @examples
 #'
@@ -95,9 +108,7 @@
 #' closeAllConnections()
 #' 
 #' @export
-#' @import R.cache doParallel ggplot2 plyr methods
-#'
-#' @author Reina Chau
+#' @import R.cache doParallel ggplot2 plyr methods SummarizedExperiment
 #'
 CaDrA <- function(
     FS,
@@ -147,7 +158,13 @@ CaDrA <- function(
     R.cache::setCacheRootPath()
     message("Setting cache root path as: ", getCacheRootPath(), "\n")
   }
-
+
+  # Retrieve the original class object of feature set 
+  # If FS is a SummarizedExperiment, convert it to a matrix object
+  # used its matrix form as a default caching key
+  if(is(FS, "SummarizedExperiment"))
+    FS <- SummarizedExperiment::assay(FS)
+
   # Define the key for each cached result
   key <- list(FS = FS,
               input_score = if(method %in% c("revealer", "custom"))
@@ -161,7 +178,7 @@ CaDrA <- function(
               search_start = search_start,
               search_method = search_method,
               max_size = max_size)
-
+  
   # Load perm_best_scores with the given key parameters
   perm_best_scores <- R.cache::loadCache(key)
 

diff --git a/R/cadra_functions.R b/R/cadra_functions.R
@@ -26,10 +26,12 @@ verbose <- function(...){
 #' prevalence of a feature across all samples in the FS object which the
 #' feature will be filtered out. Default is 0.03 (feature that occur in
 #' 3 percent or less of the samples will be removed)
-#' @return A SummarizedExperiment object with only the filtered-in features
-#' given the filtered thresholds
 #' @param verbose a logical value indicates whether or not to print the
 #' diagnostic messages. Default is \code{FALSE}.
+#' 
+#' @return A SummarizedExperiment object with only the filtered-in features
+#' given the filtered thresholds
+#' 
 #' @examples
 #'
 #' # Load pre-computed feature set
@@ -82,68 +84,61 @@ prefilter_data <- function(
 
 #' Checks if feature set and input scores are valid dataset
 #'
-#' @param FS a matrix of binary features or a SummarizedExperiment class object 
-#' from SummarizedExperiment package where rows represent features of interest 
-#' (e.g. genes, transcripts, exons, etc...) and columns represent the samples. 
-#' The assay of FS contains binary (1/0) values indicating the presence/absence 
-#' of omics features.
+#' @param FS_mat a matrix of binary features where rows represent features of 
+#' interest (e.g. genes, transcripts, exons, etc...) and columns represent 
+#' the samples. 
 #' @param input_score a vector of continuous scores of a molecular phenotype of
 #' interest such as protein expression, pathway activity, etc.
 #' NOTE: The \code{input_score} object must have names or labels that 
-#' match the column names of FS object.
+#' match the column names of FS_mat object.
 #' @param do_check a logical value indicates whether or not to validate if the  
-#' given parameters (FS and input_score) are valid inputs. 
+#' given parameters (FS_mat and input_score) are valid inputs. 
 #' Default is \code{TRUE}
-#' 
+#'
 #' @noRd
 #' 
 #' @examples 
 #' 
-#' # Load pre-computed feature set
-#' data(sim_FS)
-#'
-#' # Load pre-computed input-score
-#' data(sim_Scores)
+#' # Create a feature matrix
+#' FS_mat <- matrix(c(1,0,1,0,0,0,0,0,1,0, 
+#'                 0,0,1,0,1,0,1,0,0,0,
+#'                 0,0,0,0,1,0,1,0,1,0), nrow=3)
+#' 
+#' colnames(FS_mat) <- 1:10
+#' row.names(FS_mat) <- c("TP_1", "TP_2", "TP_3")
+#' 
+#' # Create a vector of observed input scores
+#' set.seed(42)
+#' input_score = rnorm(n = ncol(FS_mat))
+#' names(input_score) <- colnames(FS_mat)
 #'
 #' check_data_input(
-#'  FS = sim_FS,
-#'  input_score = sim_Scores
+#'  FS_mat = FS_mat,
+#'  input_score = input_score
 #' )
 #' 
-#' @return a filtered feature set and input scores with overlapping samples
-#' @import SummarizedExperiment
+#' @return If do_check=FALSE, return NULL, otherwise, check if FS_mat and 
+#' input_score are valid inputs 
 check_data_input <- function(
-    FS,
+    FS_mat,
     input_score,
     do_check = TRUE
 ){
 
   if(do_check == FALSE) return(NULL)
-
-  # Check if FS is a matrix or a SummarizedExperiment class object
-  if(!is(FS, "SummarizedExperiment") && !is(FS, "matrix"))
-    stop("'FS' must be a matrix or a SummarizedExperiment class object
-         from SummarizedExperiment package")
-
-  # Retrieve the feature binary matrix
-  if(is(FS, "SummarizedExperiment")){
-    mat <- as.matrix(SummarizedExperiment::assay(FS))
-  }else{
-    mat <- FS
-  }
-
+
   # Check if the matrix has only binary 0 or 1 values
-  if(length(mat) == 0 || any(!mat %in% c(0,1)) || any(is.na(mat)))
-    stop("FS object must contain binary values 0s or 1s (no empty values).")
+  if(length(FS_mat) == 0 || any(!FS_mat %in% c(0,1)))
+    stop("FS object must contain binary values 0s or 1s.")
 
   # Make sure the FS object has row names for features tracking
-  if(is.null(rownames(mat)))
+  if(is.null(rownames(FS_mat)))
     stop("The FS object does not have row names to ",
          "track features by. Please provide unique features or row names ",
          "for the FS object.\n")
 
   # Make sure the FS object has row names for features tracking
-  if(is.null(colnames(mat)))
+  if(is.null(colnames(FS_mat)))
     stop("The FS object does not have column names to ",
          "track samples by. Please provide unique sample names ",
          "for the FS object.\n")
@@ -153,13 +148,13 @@ check_data_input <- function(
   # the column names of the FS object
   if(length(input_score) == 0 || any(!is.numeric(input_score)) || 
      any(is.na(input_score)) || is.null(names(input_score)) ||
-     any(!names(input_score) %in% colnames(mat)))
+     any(!names(input_score) %in% colnames(FS_mat)))
     stop("input_score must contain a vector of continuous scores ",
          "(with no NAs), and its vector names or labels must match the column ",
          "names of the FS object.\n")
 
   # Check if the features have either all 0s or 1s values
-  if(any(rowSums(mat) %in% c(0, ncol(mat)) ))
+  if(any(rowSums(FS_mat) %in% c(0, ncol(FS_mat)) ))
     stop("The FS object has features that are either all 0s or 1s. ",
          "These features must be removed from the FS object as ",
          "they are uninformative.")
@@ -219,7 +214,7 @@ check_data_input <- function(
 #' the best scores will be returned and used to start the candidate_search() 
 #' with. 
 #' Otherwise, the candidate_search() will start the search with a list of  
-#' indices of features defined in search_start.
+#' indices of starting features defined in search_start.
 check_top_N <- function(
     rowscore, 
     feature_names,
@@ -329,6 +324,7 @@ ks_test_double_wrap <- function(n_x, y, alt=c("less", "greater", "two.sided")) {
 #' samples by.
 #' @param n_perm a number of permutations to generate. This determines
 #' the number of rows in the permutation matrix.
+#' 
 #' @return a matrix of values where each row contains scores of a single 
 #' permuted \code{input_score}.
 #'