diff --git a/DESCRIPTION b/DESCRIPTION index 5db6c3d..4fe7894 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,15 +1,20 @@ Package: sparsebn Title: Learning Sparse Bayesian Networks from High-Dimensional Data Version: 0.0.5 -Date: 2017-03-15 -Authors@R: person("Bryon", "Aragam", email = "sparsebn@gmail.com", role = c("aut", "cre")) +Date: 2017-09-11 +Authors@R: c( + person("Bryon", "Aragam", email = "sparsebn@gmail.com", role = c("aut", "cre")), + person("Jiaying", "Gu", role = c("aut")), + person("Dacheng", "Zhang", role = c("aut")), + person("Qing", "Zhou", role = c("aut")) + ) Maintainer: Bryon Aragam -Description: Fast methods for learning sparse Bayesian networks from high-dimensional data using sparse regularization, as described in Aragam, Gu, and Zhou (2017) . Designed to handle mixed experimental and observational data with thousands of variables with either continuous or discrete observations. +Description: Fast methods for learning sparse Bayesian networks from high-dimensional data using sparse regularization, as described in Aragam, Gu, and Zhou (2017) . Designed to handle mixed experimental and observational data with thousands of variables with either continuous or discrete observations. Depends: R (>= 3.2.3), sparsebnUtils (>= 0.0.5), ccdrAlgorithm (>= 0.0.4), - discretecdAlgorithm (>= 0.0.3) + discretecdAlgorithm (>= 0.0.5) Suggests: knitr, rmarkdown, @@ -17,10 +22,6 @@ Suggests: igraph, graph, testthat -Remotes: - itsrainingdata/sparsebnUtils@dev, - itsrainingdata/ccdrAlgorithm@dev, - itsrainingdata/discretecdAlgorithm URL: https://github.com/itsrainingdata/sparsebn BugReports: https://github.com/itsrainingdata/sparsebn/issues License: GPL (>= 2) diff --git a/NEWS.md b/NEWS.md index d4a90ec..b563900 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,12 @@ +# sparsebn 0.0.5 + +## Features + +* `estimate.dag` now supports white lists and black lists (#6) +* Cytoscape compatibility now available via the method `openCytoscape` (#4) +* `plotDAG` now includes labels for each subplot by default +* See NEWS files for sparsebnUtils, discretecdAlgorithm, and ccdrAlgorithm for more updates + # sparsebn 0.0.4 ## Notes diff --git a/R/data.R b/R/data.R index 4081d02..1999ea0 100644 --- a/R/data.R +++ b/R/data.R @@ -1,10 +1,13 @@ #' The pathfinder network #' -#' Simulated data and network for the pathfinder network from the \href{http://www.bnlearn.com/bnrepository/#pathfinder}{Bayesian network repository}. -#' Pathfinder is an expert system developed by Heckerman et. al (1992) +#' Simulated data and network for the pathfinder network from the +#' \href{http://www.bnlearn.com/bnrepository/#pathfinder}{Bayesian network repository}. +#' Pathfinder is an expert system developed by +#' \href{http://heckerman.com/david/HN92cbr.pdf}{Heckerman et. al (1992)} [1] #' to assist with the diagnosis of lymph-node diseases. #' -#' The data is simulated from a Gaussian SEM assuming unit edge weights and +#' This is a benchmark network used to test algorithms for learning Bayesian +#' networks. The data is simulated from a Gaussian SEM assuming unit edge weights and #' unit variances for all nodes. #' #' @format A \code{\link{list}} with four components: @@ -22,28 +25,36 @@ #' @usage #' data(pathfinder) #' +#' @references +#' [1] Heckerman, David E., and Bharat N. Nathwani. "\href{http://heckerman.com/david/HN92cbr.pdf}{An evaluation of the diagnostic accuracy of Pathfinder}." Computers and Biomedical Research 25.1 (1992): 56-74. +#' #' @examples -#' \dontrun{ -#' # Create a valid sparsebnData object from the simulated pathfinder data +#' +#' ### Create a valid sparsebnData object from the simulated pathfinder data #' data(pathfinder) #' dat <- sparsebnData(pathfinder$data, type = "c") #' -#' # If desired, change the edge weights to be randomly generated -#' coefs <- get.adjacency.matrix(pathfinder$dag) -#' coefs[coefs != 0] <- runif(n = num.edges(pathfinderDAG), min = 0.5, max = 2) -#' vars <- Matrix::Diagonal(n = num.nodes(pathfinderDAG), x = rep(1, num.nodes(pathfinderDAG))) -#' id <- vars -#' covMat <- t(solve(id - coefs)) %*% vars %*% solve(id - coefs) -#' pathfinder.data <- rmvnorm(n = 1000, sigma = as.matrix(covMat)) -#' } +#' ### Code to reproduce this dataset by randomly generating edge weights +#' coefs <- runif(n = num.edges(pathfinder$dag), min = 0.5, max = 2) # coefficients +#' vars <- rep(1, num.nodes(pathfinder$dag)) # variances +#' params <- c(coefs, vars) # parameter vector +#' pathfinder.data <- generate_mvn_data(graph = pathfinder$dag, +#' params = params, +#' n = 1000) #' "pathfinder" #' The discrete cytometry network #' #' Data and network for analyzing the flow cytometry experiment -#' from \href{http://science.sciencemag.org/content/308/5721/523.long}{Sachs et al. (2005)}. -#' The data is a cleaned and discretized version of the raw data (see \code{\link{cytometryContinuous}}) from these experiments. +#' from \href{http://science.sciencemag.org/content/308/5721/523.long}{Sachs et al. (2005)} [1]. +#' The data is a cleaned and discretized version of the raw data (see +#' \code{\link{cytometryContinuous}} for details) from these experiments. +#' +#' After cleaning and pre-processing, the raw continuous measurements have been +#' binned into one of three levels: low = 0, medium = 1, or high = 2. Due to the +#' pre-processing, the discrete data contains fewer observations (n = 5400) +#' compared to the raw, continuous data. #' #' @format A \code{\link{list}} with three components: #' @@ -57,6 +68,9 @@ #' @usage #' data(cytometryDiscrete) #' +#' @references +#' [1] Sachs, Karen, et al. "\href{http://science.sciencemag.org/content/308/5721/523.long}{Causal protein-signaling networks derived from multiparameter single-cell data}." Science 308.5721 (2005): 523-529. +#' #' @examples #' # Create a valid sparsebnData object from the cytometry data #' data(cytometryDiscrete) @@ -67,9 +81,16 @@ #' The continuous cytometry network #' #' Data and network for analyzing the flow cytometry experiment -#' from \href{http://science.sciencemag.org/content/308/5721/523.long}{Sachs et al. (2005)}. +#' from \href{http://science.sciencemag.org/content/308/5721/523.long}{Sachs et al. (2005)} [1]. #' This dataset contains the raw measurements from these experiments. #' +#' The dataset consists of n = 7466 observations of p = 11 continuous +#' variables corresponding to different proteins and phospholipids in human +#' immune system cells, and each observation indicates the measured level of +#' each biomolecule in a single cell under different experimental interventions. +#' Based on this data, a consensus network was reconstructed and validated, which +#' is included as well. +#' #' @format A \code{\link{list}} with three components: #' #' \itemize{ @@ -82,6 +103,9 @@ #' @usage #' data(cytometryContinuous) #' +#' @references +#' [1] Sachs, Karen, et al. "\href{http://science.sciencemag.org/content/308/5721/523.long}{Causal protein-signaling networks derived from multiparameter single-cell data}." Science 308.5721 (2005): 523-529. +#' #' @examples #' # Create a valid sparsebnData object from the cytometry data #' data(cytometryContinuous) diff --git a/R/sparsebn-main.R b/R/sparsebn-main.R index e3ba8d7..888aa4d 100644 --- a/R/sparsebn-main.R +++ b/R/sparsebn-main.R @@ -29,6 +29,14 @@ #' used based on a decreasing log-scale (see also \link[sparsebnUtils]{generate.lambdas}). #' @param lambdas.length Integer number of values to include in the solution path. If \code{lambdas} #' has also been specified, this value will be ignored. +#' @param whitelist A two-column matrix of edges that are guaranteed to be in each +#' estimate (a "white list"). Each row in this matrix corresponds +#' to an edge that is to be whitelisted. These edges can be +#' specified by node name (as a \code{character} matrix), or by +#' index (as a \code{numeric} matrix). +#' @param blacklist A two-column matrix of edges that are guaranteed to be absent +#' from each estimate (a "black list"). See argument +#' "\code{whitelist}" above for more details. #' @param error.tol Error tolerance for the algorithm, used to test for convergence. #' @param max.iters Maximum number of iterations for each internal sweep. #' @param edge.threshold Threshold parameter used to terminate the algorithm whenever the number of edges in the @@ -56,6 +64,8 @@ estimate.dag <- function(data, lambdas = NULL, lambdas.length = 20, + whitelist = NULL, + blacklist = NULL, error.tol = 1e-4, max.iters = NULL, edge.threshold = NULL, @@ -89,11 +99,22 @@ estimate.dag <- function(data, ### Is the data gaussian, binomial, or multinomial? (Other data not supported yet.) data_family <- sparsebnUtils::pick_family(data) + ### If intervention list contains character names, convert to indices + if("character" %in% sparsebnUtils::list_classes(data$ivn)){ + data$ivn <- lapply(data$ivn, function(x){ + idx <- match(x, names(data$data)) + if(length(idx) == 0) NULL # return NULL if no match (=> observational) + else idx + }) + } + ### Run the main algorithms if(data_family == "gaussian"){ ccdrAlgorithm::ccdr.run(data = data, lambdas = lambdas, lambdas.length = lambdas.length, + whitelist = whitelist, + blacklist = blacklist, gamma = concavity, error.tol = error.tol, max.iters = max.iters, @@ -104,6 +125,8 @@ estimate.dag <- function(data, discretecdAlgorithm::cd.run(indata = data, lambdas = lambdas, lambdas.length = lambdas.length, + whitelist = whitelist, + blacklist = blacklist, error.tol = error.tol, convLb = convLb, weight.scale = weight.scale, diff --git a/R/sparsebn-plotting.R b/R/sparsebn-plotting.R index d38104f..97bf24c 100644 --- a/R/sparsebn-plotting.R +++ b/R/sparsebn-plotting.R @@ -72,6 +72,7 @@ plotDAG.sparsebnPath <- function(x, ...){ # plot(x, + labels = TRUE, vertex.size = 4, vertex.label = NA, vertex.label.color = gray(0), diff --git a/README.Rmd b/README.Rmd index 8288528..2b5cf24 100644 --- a/README.Rmd +++ b/README.Rmd @@ -14,6 +14,7 @@ knitr::opts_chunk$set( # sparsebn +[![Project Status: Active The project has reached a stable, usable state and is being actively developed.](http://www.repostatus.org/badges/latest/active.svg)](http://www.repostatus.org/#active) [![Travis-CI Build Status](https://travis-ci.org/itsrainingdata/sparsebn.svg?branch=master)](https://travis-ci.org/itsrainingdata/sparsebn) [![](http://www.r-pkg.org/badges/version/sparsebn)](http://www.r-pkg.org/pkg/sparsebn) [![CRAN RStudio mirror downloads](http://cranlogs.r-pkg.org/badges/sparsebn)](http://www.r-pkg.org/pkg/sparsebn) diff --git a/README.md b/README.md index bf1feeb..74e2782 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ sparsebn ======== -[![Travis-CI Build Status](https://travis-ci.org/itsrainingdata/sparsebn.svg?branch=master)](https://travis-ci.org/itsrainingdata/sparsebn) [![](http://www.r-pkg.org/badges/version/sparsebn)](http://www.r-pkg.org/pkg/sparsebn) [![CRAN RStudio mirror downloads](http://cranlogs.r-pkg.org/badges/sparsebn)](http://www.r-pkg.org/pkg/sparsebn) +[![Project Status: Active The project has reached a stable, usable state and is being actively developed.](http://www.repostatus.org/badges/latest/active.svg)](http://www.repostatus.org/#active) [![Travis-CI Build Status](https://travis-ci.org/itsrainingdata/sparsebn.svg?branch=master)](https://travis-ci.org/itsrainingdata/sparsebn) [![](http://www.r-pkg.org/badges/version/sparsebn)](http://www.r-pkg.org/pkg/sparsebn) [![CRAN RStudio mirror downloads](http://cranlogs.r-pkg.org/badges/sparsebn)](http://www.r-pkg.org/pkg/sparsebn) Introducing `sparsebn`: A new R package for learning sparse Bayesian networks and other graphical models from high-dimensional data via sparse regularization. Designed from the ground up to handle: diff --git a/cran-comments.md b/cran-comments.md index d26ca9a..2c72869 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,20 +1,23 @@ ## Test environments -* local OS X install, R 3.3.3 -* ubuntu 12.04.5 (travis-ci), R 3.3.3 (oldrel, devel, and release) +* local OS X install, R 3.4.1 +* ubuntu 12.04.5 (travis-ci: oldrel, devel, and release) * win-builder (devel and release) -* r-hub (oldrel, devel, and release) +* r-hub (devel) ## R CMD check results -There were no ERRORs or WARNINGs. There was one NOTE: +There were no ERRORs, WARNINGs, or NOTEs. -* checking CRAN incoming feasibility ... NOTE -Maintainer: ‘Bryon Aragam ’ +## CRAN Package Check Results for Package sparsebn -Days since last update: 4 +From https://cran.rstudio.com/web/checks/check_results_sparsebn.html -- This is a very minor release to update the package metadata to include a reference -to a new preprint discussing this package. No DOI is available yet since it is currently -under review. +Version: 0.0.4 +Check: package dependencies +Result: NOTE + Package suggested but not available for checking: ‘graph’ +Flavor: r-release-osx-x86_64 + +This has been fixed. ## Dependencies diff --git a/data/cytometryContinuous.rda b/data/cytometryContinuous.rda index 5e1589b..e18dfa3 100644 Binary files a/data/cytometryContinuous.rda and b/data/cytometryContinuous.rda differ diff --git a/data/cytometryDiscrete.rda b/data/cytometryDiscrete.rda index dbf6ac4..596164d 100644 Binary files a/data/cytometryDiscrete.rda and b/data/cytometryDiscrete.rda differ diff --git a/man/cytometryContinuous.Rd b/man/cytometryContinuous.Rd index 424497b..b613cf8 100644 --- a/man/cytometryContinuous.Rd +++ b/man/cytometryContinuous.Rd @@ -17,13 +17,24 @@ data(cytometryContinuous) } \description{ Data and network for analyzing the flow cytometry experiment -from \href{http://science.sciencemag.org/content/308/5721/523.long}{Sachs et al. (2005)}. +from \href{http://science.sciencemag.org/content/308/5721/523.long}{Sachs et al. (2005)} [1]. This dataset contains the raw measurements from these experiments. } +\details{ +The dataset consists of n = 7466 observations of p = 11 continuous +variables corresponding to different proteins and phospholipids in human +immune system cells, and each observation indicates the measured level of +each biomolecule in a single cell under different experimental interventions. +Based on this data, a consensus network was reconstructed and validated, which +is included as well. +} \examples{ # Create a valid sparsebnData object from the cytometry data data(cytometryContinuous) dat <- sparsebnData(cytometryContinuous$data, type = "c", ivn = cytometryContinuous$ivn) +} +\references{ +[1] Sachs, Karen, et al. "\href{http://science.sciencemag.org/content/308/5721/523.long}{Causal protein-signaling networks derived from multiparameter single-cell data}." Science 308.5721 (2005): 523-529. } \keyword{datasets} diff --git a/man/cytometryDiscrete.Rd b/man/cytometryDiscrete.Rd index 09cc2ca..bb48282 100644 --- a/man/cytometryDiscrete.Rd +++ b/man/cytometryDiscrete.Rd @@ -17,13 +17,23 @@ data(cytometryDiscrete) } \description{ Data and network for analyzing the flow cytometry experiment -from \href{http://science.sciencemag.org/content/308/5721/523.long}{Sachs et al. (2005)}. -The data is a cleaned and discretized version of the raw data (see \code{\link{cytometryContinuous}}) from these experiments. +from \href{http://science.sciencemag.org/content/308/5721/523.long}{Sachs et al. (2005)} [1]. +The data is a cleaned and discretized version of the raw data (see +\code{\link{cytometryContinuous}} for details) from these experiments. +} +\details{ +After cleaning and pre-processing, the raw continuous measurements have been +binned into one of three levels: low = 0, medium = 1, or high = 2. Due to the +pre-processing, the discrete data contains fewer observations (n = 5400) +compared to the raw, continuous data. } \examples{ # Create a valid sparsebnData object from the cytometry data data(cytometryDiscrete) dat <- sparsebnData(cytometryDiscrete$data, type = "d", ivn = cytometryDiscrete$ivn) +} +\references{ +[1] Sachs, Karen, et al. "\href{http://science.sciencemag.org/content/308/5721/523.long}{Causal protein-signaling networks derived from multiparameter single-cell data}." Science 308.5721 (2005): 523-529. } \keyword{datasets} diff --git a/man/estimate.dag.Rd b/man/estimate.dag.Rd index 9bc5d46..8024ac7 100644 --- a/man/estimate.dag.Rd +++ b/man/estimate.dag.Rd @@ -4,10 +4,10 @@ \alias{estimate.dag} \title{Estimate a DAG from data} \usage{ -estimate.dag(data, lambdas = NULL, lambdas.length = 20, error.tol = 1e-04, - max.iters = NULL, edge.threshold = NULL, concavity = 2, - weight.scale = 1, convLb = 0.01, upperbound = 100, adaptive = FALSE, - verbose = FALSE) +estimate.dag(data, lambdas = NULL, lambdas.length = 20, whitelist = NULL, + blacklist = NULL, error.tol = 1e-04, max.iters = NULL, + edge.threshold = NULL, concavity = 2, weight.scale = 1, convLb = 0.01, + upperbound = 100, adaptive = FALSE, verbose = FALSE) } \arguments{ \item{data}{Data as \code{\link[sparsebnUtils]{sparsebnData}}.} @@ -19,6 +19,16 @@ used based on a decreasing log-scale (see also \link[sparsebnUtils]{generate.la \item{lambdas.length}{Integer number of values to include in the solution path. If \code{lambdas} has also been specified, this value will be ignored.} +\item{whitelist}{A two-column matrix of edges that are guaranteed to be in each +estimate (a "white list"). Each row in this matrix corresponds +to an edge that is to be whitelisted. These edges can be +specified by node name (as a \code{character} matrix), or by +index (as a \code{numeric} matrix).} + +\item{blacklist}{A two-column matrix of edges that are guaranteed to be absent +from each estimate (a "black list"). See argument +"\code{whitelist}" above for more details.} + \item{error.tol}{Error tolerance for the algorithm, used to test for convergence.} \item{max.iters}{Maximum number of iterations for each internal sweep.} diff --git a/man/pathfinder.Rd b/man/pathfinder.Rd index dcede12..dc6db8f 100644 --- a/man/pathfinder.Rd +++ b/man/pathfinder.Rd @@ -17,28 +17,33 @@ is purely observational, this is just \code{NULL}. Compatible with the input to data(pathfinder) } \description{ -Simulated data and network for the pathfinder network from the \href{http://www.bnlearn.com/bnrepository/#pathfinder}{Bayesian network repository}. -Pathfinder is an expert system developed by Heckerman et. al (1992) +Simulated data and network for the pathfinder network from the +\href{http://www.bnlearn.com/bnrepository/#pathfinder}{Bayesian network repository}. +Pathfinder is an expert system developed by +\href{http://heckerman.com/david/HN92cbr.pdf}{Heckerman et. al (1992)} [1] to assist with the diagnosis of lymph-node diseases. } \details{ -The data is simulated from a Gaussian SEM assuming unit edge weights and +This is a benchmark network used to test algorithms for learning Bayesian +networks. The data is simulated from a Gaussian SEM assuming unit edge weights and unit variances for all nodes. } \examples{ -\dontrun{ -# Create a valid sparsebnData object from the simulated pathfinder data + +### Create a valid sparsebnData object from the simulated pathfinder data data(pathfinder) dat <- sparsebnData(pathfinder$data, type = "c") -# If desired, change the edge weights to be randomly generated -coefs <- get.adjacency.matrix(pathfinder$dag) -coefs[coefs != 0] <- runif(n = num.edges(pathfinderDAG), min = 0.5, max = 2) -vars <- Matrix::Diagonal(n = num.nodes(pathfinderDAG), x = rep(1, num.nodes(pathfinderDAG))) -id <- vars -covMat <- t(solve(id - coefs)) \%*\% vars \%*\% solve(id - coefs) -pathfinder.data <- rmvnorm(n = 1000, sigma = as.matrix(covMat)) -} +### Code to reproduce this dataset by randomly generating edge weights +coefs <- runif(n = num.edges(pathfinder$dag), min = 0.5, max = 2) # coefficients +vars <- rep(1, num.nodes(pathfinder$dag)) # variances +params <- c(coefs, vars) # parameter vector +pathfinder.data <- generate_mvn_data(graph = pathfinder$dag, + params = params, + n = 1000) +} +\references{ +[1] Heckerman, David E., and Bharat N. Nathwani. "\href{http://heckerman.com/david/HN92cbr.pdf}{An evaluation of the diagnostic accuracy of Pathfinder}." Computers and Biomedical Research 25.1 (1992): 56-74. } \keyword{datasets} diff --git a/tests/testthat/helper-sparsebnUtils-generate_objects.R b/tests/testthat/helper-sparsebnUtils-generate_objects.R index 8d974b9..f7d3366 100644 --- a/tests/testthat/helper-sparsebnUtils-generate_objects.R +++ b/tests/testthat/helper-sparsebnUtils-generate_objects.R @@ -91,6 +91,12 @@ generate_fixed_edgeList <- function(){ } generate_fixed_graphNEL <- function(){ + ### This function requires the 'graph' package to be installed + if (!requireNamespace("graph", quietly = TRUE)) { + stop("graph package (from BioConductor) required!", call. = FALSE) + } + + V <- helper_node_names() edL <- vector("list", length=5) names(edL) <- V diff --git a/tests/testthat/test-bwlist.R b/tests/testthat/test-bwlist.R new file mode 100644 index 0000000..5bb70e6 --- /dev/null +++ b/tests/testthat/test-bwlist.R @@ -0,0 +1,95 @@ +context("black/white lists") + +dat_cts <- generate_continuous_sparsebnData() +dat_disc <- generate_discrete_sparsebnData() + +### Get all blocks at once +nodes <- names(dat_cts$data) # same names for continuous / discrete +blocks <- lapply(nodes, function(x){ + # Allow all off-diagonal entries since we are no longer using the block decomposition + row <- (nodes)[nodes != x] + col <- rep(x, length(col)) + cbind(row, col) +}) +blocks <- do.call("rbind", blocks) + +pp <- ncol(dat_cts$data) +node_names <- names(dat_cts$data) +len_saturate <- pp*(pp-1)/2 +nlambda <- 20 + +test_that("White lists work OK", { + dags <- estimate.dag(dat_cts, lambdas.length = nlambda, whitelist = blocks) + expect_equal(num.edges(dags), rep(len_saturate, nlambda)) + + dags <- estimate.dag(dat_disc, lambdas.length = nlambda, whitelist = blocks) + expect_equal(num.edges(dags), rep(len_saturate, nlambda)) + + # One edge + from <- "x" + to <- "y" + from.idx <- match(from, node_names) + to.idx <- match(to, node_names) + white <- rbind(c(from, to)) + + dags <- estimate.dag(dat_cts, lambdas.length = nlambda, whitelist = white) + check_edge <- all(unlist(lapply(dags, function(x) from.idx %in% as.list(x$edges)[[to]]))) + expect_true(check_edge) + + dags <- estimate.dag(dat_disc, lambdas.length = nlambda, whitelist = white) + check_edge <- all(unlist(lapply(dags, function(x) from.idx %in% as.list(x$edges)[[to]]))) + expect_true(check_edge) + + # Two edges + from <- "b" + to <- "z" + from.idx <- match(from, node_names) + to.idx <- match(to, node_names) + white <- rbind(c("x", "y"), c(from, to)) + + dags <- estimate.dag(dat_cts, lambdas.length = nlambda, whitelist = white) + check_edge <- all(unlist(lapply(dags, function(x) from.idx %in% as.list(x$edges)[[to]]))) + expect_true(check_edge) + + dags <- estimate.dag(dat_disc, lambdas.length = nlambda, whitelist = white) + check_edge <- all(unlist(lapply(dags, function(x) from.idx %in% as.list(x$edges)[[to]]))) + expect_true(check_edge) +}) + +test_that("Black lists work OK", { + dags <- ccdr.run(dat_cts, lambdas.length = nlambda, blacklist = blocks) + expect_equal(num.edges(dags), rep(0, nlambda)) + + dags <- ccdr.run(dat_disc, lambdas.length = nlambda, blacklist = blocks) + expect_equal(num.edges(dags), rep(0, nlambda)) + + # One edge + from <- "x" + to <- "y" + from.idx <- match(from, node_names) + to.idx <- match(to, node_names) + black <- rbind(c(from, to)) + + dags <- estimate.dag(dat_cts, lambdas.length = nlambda, blacklist = black) + check_edge <- any(unlist(lapply(dags, function(x) from.idx %in% as.list(x$edges)[[to]]))) + expect_false(check_edge) + + dags <- estimate.dag(dat_disc, lambdas.length = nlambda, blacklist = black) + check_edge <- any(unlist(lapply(dags, function(x) from.idx %in% as.list(x$edges)[[to]]))) + expect_false(check_edge) + + # Two edges + from <- "b" + to <- "z" + from.idx <- match(from, node_names) + to.idx <- match(to, node_names) + black <- rbind(c("x", "y"), c(from, to)) + + dags <- estimate.dag(dat_cts, lambdas.length = nlambda, blacklist = black) + check_edge <- any(unlist(lapply(dags, function(x) from.idx %in% as.list(x$edges)[[to]]))) + expect_false(check_edge) + + dags <- estimate.dag(dat_disc, lambdas.length = nlambda, blacklist = black) + check_edge <- any(unlist(lapply(dags, function(x) from.idx %in% as.list(x$edges)[[to]]))) + expect_false(check_edge) +}) diff --git a/tests/testthat/test-cytometry.R b/tests/testthat/test-cytometry.R new file mode 100644 index 0000000..204a6fc --- /dev/null +++ b/tests/testthat/test-cytometry.R @@ -0,0 +1,28 @@ +context("datasets") + +### continuous +test_that("Continuous cytometry data runs OK", { + expect_error(data(cytometryContinuous), NA) + expect_error(dat <- sparsebnData(cytometryContinuous$data, type = "c"), NA) + expect_error(out <- estimate.dag(dat), NA) + expect_error(estimate.parameters(out, dat), NA) + expect_error(select.parameter(out, dat), NA) +}) + +## discrete +test_that("Discrete cytometry data runs OK", { + expect_error(data(cytometryDiscrete), NA) + expect_error(dat <- sparsebnData(cytometryDiscrete$data, type = "c"), NA) + expect_error(out <- estimate.dag(dat), NA) + expect_error(estimate.parameters(out, dat), NA) + expect_error(select.parameter(out, dat), NA) +}) + +### pathfinder +test_that("Pathfinder data runs OK", { + expect_error(data(pathfinder), NA) + expect_error(dat <- sparsebnData(pathfinder$data, type = "c"), NA) + expect_error(out <- estimate.dag(dat), NA) + expect_error(estimate.parameters(out, dat), NA) + expect_error(select.parameter(out, dat), NA) +}) diff --git a/tests/testthat/test-dag.R b/tests/testthat/test-dag.R index 7c0b893..2335ff5 100644 --- a/tests/testthat/test-dag.R +++ b/tests/testthat/test-dag.R @@ -3,19 +3,62 @@ context("estimate.dag") dat_cts <- generate_continuous_sparsebnData() dat_disc <- generate_discrete_sparsebnData() +### Helper functions for unit tests +check_edgelist_integer <- function(out){ + sapply(out, function(x) all(sapply(x$edges, function(e) is.integer(e)))) +} + +check_edgelist_names <- function(out){ + sapply(out, function(x) !is.null(names(x$edges))) +} + +edgelist_names_equal <- function(out, compare){ + sapply(out, function(x) isTRUE(all.equal(names(x$edges), compare))) +} + test_that("", { ### More tests added here }) test_that("DAG estimation runs without errors", { + ### Test main algorithms expect_error(estimate.dag(dat_cts), NA) # continuous data expect_error(estimate.dag(dat_disc), NA) # discrete data expect_error(estimate.dag(dat_disc, adaptive = TRUE), NA) # adaptive algorithm + + ### White and black lists + expect_error(estimate.dag(dat_cts, whitelist = rbind(c(1,2))), NA) # non-NULL whitelist + expect_error(estimate.dag(dat_cts, blacklist = rbind(c(1,2))), NA) # non-NULL blacklist + expect_error(estimate.dag(dat_cts, whitelist = rbind(c(1,2)), blacklist = rbind(c(2,3))), NA) # non-NULL black- AND whitelist + expect_error(estimate.dag(dat_disc, whitelist = rbind(c(1,2))), NA) # non-NULL whitelist + expect_error(estimate.dag(dat_disc, blacklist = rbind(c(1,2))), NA) # non-NULL blacklist + expect_error(estimate.dag(dat_disc, whitelist = rbind(c(1,2)), blacklist = rbind(c(2,3))), NA) # non-NULL black- AND whitelist }) -test_that("estimate.dag returns expected output", { +test_that("estimate.dag returns expected output: Continuous input", { + ### Continuous data out <- estimate.dag(dat_cts) expect_is(out, "sparsebnPath") expect_true(check_list_class(as.list(out), "sparsebnFit")) + expect_true(check_list_class(lapply(out, function(x) x$edges), "edgeList")) expect_true(is.zero(out[[1]]$edges)) # first estimate should always be null + + # Check that edgeLists have proper format + expect_true(all(check_edgelist_integer(out))) # contents are indices, not characters + expect_true(all(check_edgelist_names(out))) # names is not NULL + expect_true(all(edgelist_names_equal(out, names(dat_cts$data)))) # names is correct +}) + +test_that("estimate.dag returns expected output: Discrete input", { + ### Discrete data + out <- estimate.dag(dat_disc) + expect_is(out, "sparsebnPath") + expect_true(check_list_class(as.list(out), "sparsebnFit")) + expect_true(check_list_class(lapply(out, function(x) x$edges), "edgeList")) + expect_true(is.zero(out[[1]]$edges)) # first estimate should always be null + + # Check that edgeLists have proper format + expect_true(all(check_edgelist_integer(out))) # contents are indices, not characters + expect_true(all(check_edgelist_names(out))) # names is not NULL + expect_true(all(edgelist_names_equal(out, names(dat_disc$data)))) # names is correct })