Merge pull request #16 from ocbe-uio/release-1.0

Bioconductor submission 0.99.8
ocbe-uio · May 8, 2020 · 196d59f · 196d59f
2 parents 753d94e + b1c85f3
commit 196d59f
Show file tree

Hide file tree

Showing 5 changed files with 59 additions and 48 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: DIscBIO
 Date: 2020-05-04
 Title: A User-Friendly Pipeline for Biomarker Discovery in Single-Cell Transcriptomics
-Version: 0.99.7
+Version: 0.99.8
 Authors@R:
     c(
         person(given = "Salim",

diff --git a/R/Jaccard.R b/R/Jaccard.R
@@ -9,6 +9,8 @@
 #'   ["K-means","MB"]. Default is "K-means"
 #' @param K A numeric value of the number of clusters
 #' @param plot if `TRUE`, plots the mean Jaccard similarities
+#' @param R number of bootstrap replicates
+#' @param ... Further arguments passed to \code{boot::boot}
 #' @importFrom philentropy distance
 #' @importFrom boot boot
 #' @importFrom graphics barplot box
@@ -17,7 +19,14 @@
 #' sc <- DISCBIO(valuesG1msReduced)
 #' sc <- Clustexp(sc, cln=3, quiet=TRUE) # K-means clustering
 #' Jaccard(sc, Clustering="K-means", K=3)
-Jaccard <- function(object, Clustering = "K-means", K, plot = TRUE) {
+Jaccard <- function(
+    object,
+    Clustering = "K-means",
+    K,
+    plot = TRUE,
+    R = 100,
+    ...
+) {
     JACCARD <- c()
 
     # Validation
@@ -29,46 +38,40 @@ Jaccard <- function(object, Clustering = "K-means", K, plot = TRUE) {
     }
 
     JS <- function(data, indices) {
-        d <- data[indices,] # allows boot to select sample
-        jac <- suppressMessages(distance(t(d), method = "jaccard"))
-        jac1 <- 1 - jac
+        d      <- data[indices,] # allows boot to select sample
+        jac    <- suppressMessages(distance(t(d), method = "jaccard"))
+        jac1   <- 1 - jac
         JSmean <- mean(jac1)
         return(JSmean)
     }
     for (i in 1:K) {
     # Optimize by avoiding if every loop. Only thing variable is data
     if (Clustering == "K-means") {
-        results <- boot(
-            data = object@fdata[, which(object@kmeans$kpart == i)],
-            statistic = JS,
-            R = 100,
-            stype = "f"
-        )
-        # to get the mean of all bootstrappings (mean of mean Jaccard values)
-        JACCARD[i] <- round(mean(results$t), digits = 3)
-    }
-    if (Clustering == "MB") {
-        results <- boot(
-            data = object@fdata[, which(object@MBclusters$clusterid == i)],
-            statistic = JS,
-            R = 100,
-            stype = "f"
-        )
-        # to get the mean of all bootstrappings (mean of mean Jaccard values)
-        JACCARD[i] <- round(mean(results$t), digits = 3)
+        target_col <- object@kmeans$kpart
+    } else if (Clustering == "MB") {
+        target_col <- object@MBclusters$clusterid
     }
+    results <- boot(
+        data      = object@fdata[, which(target_col == i)],
+        statistic = JS,
+        R         = R,
+        stype     = "f",
+        ...
+    )
+    # to get the mean of all bootstrappings (mean of mean Jaccard values)
+    JACCARD[i] <- round(mean(results$t), digits = 3)
     }
     if (plot) {
-    barplot(
-        height = JACCARD,
-        names.arg = 1:length(JACCARD),
-        ylab = "Mean Jaccard's similarity values",
-        xlab = "Clusters",
-        las = 1,
-        ylim = c(0, 1),
-        col = c("black", "blue", "green", "red", "yellow", "gray")
-    )
-    box()
+        barplot(
+            height    = JACCARD,
+            names.arg = 1: length(JACCARD),
+            ylab      = "Mean Jaccard's similarity values",
+            xlab      = "Clusters",
+            las       = 1,
+            ylim      = c(0, 1),
+            col       = c("black", "blue", "green", "red", "yellow", "gray")
+        )
+        box()
     }
     return(JACCARD)
 }
diff --git a/man/Jaccard.Rd b/man/Jaccard.Rd
diff --git a/tests/testthat/test.DIscBIO.IMP.R b/tests/testthat/test.DIscBIO.IMP.R
@@ -56,7 +56,7 @@ test_that("tSNE is computed", {
 
 test_that("Cluster plots output is as expexted", {
     expect_equivalent(
-        object = Jaccard(sc, Clustering="K-means", K=2, plot = FALSE),
+        object = Jaccard(sc, Clustering="K-means", K=2, plot=FALSE, R=5),
         expected = c(.790, .653),
         tolerance = .01
     )
@@ -111,7 +111,7 @@ cdiff2 <- DEGanalysis(
 # differential expression analysis between two particular clusters.
 cdiff3 <- DEGanalysis2clust(
     sc, Clustering="K-means", K=2, fdr=.15, name="Name", First="CL1",
-    Second="CL2", export=FALSE, quiet=TRUE, plot=FALSE
+    Second="CL2", export=FALSE, quiet=TRUE, plot=FALSE, nperms=5, nresamp=2
 )
 
 test_that("DEGs are calculated", {
@@ -150,9 +150,9 @@ test_that("Decision tree elements are defined", {
     expect_output(str(DATAforDT), "3 obs. of  30 variables")
     expect_s3_class(j48dt, "J48")
     expect_s3_class(summary(j48dt), "Weka_classifier_evaluation")
-    expect_identical(j48dt_eval, c(TP = 14, FN = 4, FP = 5, TN = 7))
+    expect_identical(j48dt_eval, c(TP = 14, FN = 4, FP = 3, TN = 9))
     expect_s3_class(rpartDT, "rpart")
-    expect_identical(rpartEVAL, c(TP = 15, FN = 3, FP = 3, TN = 9))
+    expect_identical(rpartEVAL, c(TP = 16, FN = 2, FP = 4, TN = 8))
 })
 
 # ---------------------------------------------------------------------------- #
@@ -196,7 +196,7 @@ Outliers2 <- FindOutliersMB(
 
 test_that("MB clustering and outliers work as expected", {
     expect_equivalent(
-        object = Jaccard(sc, Clustering="MB", K=2, plot = FALSE),
+        object = Jaccard(sc, Clustering="MB", K=2, plot = FALSE, R=5),
         expected = c(.819, .499),
         tolerance = 0.01
     )
@@ -229,13 +229,13 @@ cdiff1 <- MBClustDiffGenes(sc, K=2, fdr=.2, export=FALSE, quiet=TRUE)
 # DE analysis between all clusters
 cdiff2 <- DEGanalysis(
     sc, Clustering="MB", K=2, fdr=.2, name="Name", export=FALSE, quiet=TRUE,
-    plot = FALSE
+    plot = FALSE, nperms=5, nresamp=2
 )
 
 # differential expression analysis between particular clusters.
 cdiff3 <- DEGanalysis2clust(
     sc, Clustering="MB", K=2, fdr=.15, name="Name", First="CL1", Second="CL2",
-    export = FALSE, plot = FALSE, quiet = TRUE
+    export = FALSE, plot = FALSE, quiet = TRUE, nperms=5, nresamp=2
 )
 
 test_that("DEGs are calculated", {
@@ -268,10 +268,10 @@ rpartEVAL <- RpartEVAL(
 )
 
 test_that("Decision tree elements are defined", {
-    expect_output(str(DATAforDT), "23 obs. of  30 variables") # used to be 31
+    expect_output(str(DATAforDT), "29 obs. of  30 variables") # used to be 31
     expect_s3_class(j48dt, "J48")
     expect_s3_class(summary(j48dt), "Weka_classifier_evaluation")
     expect_identical(j48dt_eval, c(TP = 21, FN = 2, FP = 4, TN = 3))
     expect_s3_class(rpartDT, "rpart")
-    expect_identical(rpartEVAL, c(TP = 19, FN = 4, FP = 4, TN = 3))
+    expect_identical(rpartEVAL, c(TP = 20, FN = 3, FP = 4, TN = 3))
 })
diff --git a/tests/testthat/test.integration.R b/tests/testthat/test.integration.R
@@ -2,17 +2,21 @@
 # Testing integration with SingleCellExperiment
 # ==============================================================================
 context("Converting other formats to DISCBIO")
-
+# ------------------------------------------------------------------------------
+# Setting up datasets
+# ------------------------------------------------------------------------------
 pmbc_seurat <- Seurat::pbmc_small
 pmbc_sce <- Seurat::as.SingleCellExperiment(pmbc_seurat)
 g1_sce <- SingleCellExperiment::SingleCellExperiment(
     list(counts=as.matrix(valuesG1msReduced))
 )
-
+# ------------------------------------------------------------------------------
+# Performing unit tests
+# ------------------------------------------------------------------------------
 test_that("Pure text gets formatted as DISCBIO", {
-    expect_true(is(as.DISCBIO(pmbc_seurat), "DISCBIO"))
+    expect_s4_class(as.DISCBIO(pmbc_seurat), "DISCBIO")
 })
 test_that("SCE file gets formatted as DISCBIO", {
-    expect_true(is(as.DISCBIO(pmbc_sce), "DISCBIO"))
-    expect_true(is(as.DISCBIO(g1_sce), "DISCBIO"))
+    expect_s4_class(as.DISCBIO(pmbc_sce), "DISCBIO")
+    expect_s4_class(as.DISCBIO(g1_sce), "DISCBIO")
 })