Commit made by the Bioconductor Git-SVN bridge.

Consists of 4 commit(s). Commit information: Commit id: c1f953e Commit message: roxygen changed its mind about indentation again, i guess Committed by Kevin Ushey <kevinushey at gmail.com> Commit date: 2014-04-04T11:22:43-07:00 Commit id: effa131 Commit message: Support '*' expansion in CellCounts (#30) Committed by Kevin Ushey <kevinushey at gmail.com> Commit date: 2014-04-04T11:23:54-07:00 Commit id: d095642 Commit message: Merge branch 'master' of https://github.com/RGLab/COMPASS Committed by Kevin Ushey <kevinushey at gmail.com> Commit date: 2014-04-04T11:24:12-07:00 Commit id: 1ffd1bb Commit message: Update version Committed by Kevin Ushey <kevinushey at gmail.com> Commit date: 2014-04-04T11:25:00-07:00 From: Bioconductor Git-SVN Bridge <bioc-sync@bioconductor.org> git-svn-id: https://hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/COMPASS@88433 bc3139a8-67e5-0310-9ffc-ced21a209358
RGLab · Apr 4, 2014 · ded53e6 · ded53e6
2 parents b50cb2b + 1ffd1bb
commit ded53e6
Show file tree

Hide file tree

Showing 30 changed files with 403 additions and 439 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: COMPASS
 Type: Package
 Title: Combinatorial Polyfunctionality Analysis of Single Cells
-Version: 1.1.7
+Version: 1.1.8
 Date: 2014-02-11
 Authors@R: c( person("Lynn", "Lin", role="aut", email="llin@fhcrc.org"),
     person("Kevin", "Ushey", role=c("aut", "cre"), email="kushey@fhcrc.org"),

diff --git a/R/CellCounts.R b/R/CellCounts.R
@@ -75,12 +75,61 @@ CellCounts.COMPASSContainer <- function(data, combinations) {
 
 .CellCounts_character <- function(data, combinations) {
 
+  ## Pre-parse the combinations by expanding entries of the form
+  ## "A*B*C" to
+  ##
+  ## A & B & C
+  ## A & B & !C
+  ## A & !B & C
+  ## ...
+  ##
+  ## TODO: Handle things like A&(B*C)
+  combos <- lapply(combinations, function(x) {
+
+    ## Bail if no '*'
+    if (!grepl("*", x, fixed=TRUE)) return(x)
+
+    ## Bail if unsupported combination seen
+    if (grepl("*", x, fixed=TRUE) && grepl("[&|]", x, perl=TRUE)) {
+      stop("currently cannot combine '*' expander with '&' or '|'",
+        call.=FALSE)
+    }
+
+    ## Generate a matrix of 0s and 1s that forms the same 'structure'
+    splat <- unlist(strsplit(x, "*", fixed = TRUE))
+    n <- length(splat)
+    values <- do.call( 
+      function(...) {
+        expand.grid(..., KEEP.OUT.ATTRS = FALSE)
+      },
+      replicate(n, c(0, 1), simplify = FALSE)
+    )
+
+    ## Replace the 0s and 1s with appropriate names
+    for (i in seq_along(values)) {
+      values[, i] <- swap(values[, i],
+        c(0, 1),
+        c(splat[i], paste0("!", splat[i]))
+      )
+    }
+
+    ## Paste and return the output
+    do.call(
+      function(...) paste(..., sep = "&"),
+      values,
+
+    )
+
+  })
+
+  combos <- unlist(combos)
+
   output <- .Call(C_COMPASS_CellCounts_character, 
     data, 
-    lapply(combinations, function(x) parse(text=x))
+    lapply(combos, function(x) parse(text=x))
   )
   rownames(output) <- names(data)
-  colnames(output) <- unlist(combinations)
+  colnames(output) <- combos
   return(output)
 }
 

diff --git a/man/COMPASS.Rd b/man/COMPASS.Rd
@@ -10,61 +10,50 @@ COMPASS(data, treatment, control, subset = NULL,
   keep_original_data = TRUE, verbose = TRUE, ...)
 }
 \arguments{
-  \item{data}{An object of class \code{COMPASSContainer}.}
-
-  \item{treatment}{An \R expression, evaluated within the
-  metadata, that returns \code{TRUE} for those samples that
-  should belong to the treatment group. For example, if the
-  samples that received a positive stimulation were named
-  \code{"92TH023 Env"} within a variable in \code{meta}
-  called \code{Stim}, you could write \code{Stim ==
-  "92TH023 Env"}.  The expression should have the name of
-  the stimulation vector on the left hand side.}
-
-  \item{control}{An \R expression, evaluated within the
-  metadata, that returns \code{TRUE} for those samples that
-  should belong to the control group. See above for
-  details.}
-
-  \item{subset}{An expression used to subset the data. We
-  keep only the samples for which the expression evaluates
-  to \code{TRUE} in the metadata.}
-
-  \item{category_filter}{A filter for the categories that
-  are generated. This is a function that will be applied to
-  the \emph{treatment counts} matrix generated from the
-  intensities. Only categories meeting the
-  \code{category_filter} criteria will be kept.}
-
-  \item{filter_lowest_frequency}{A number specifying how
-  many of the least expressed markers should be removed.}
-
-  \item{filter_specific_markers}{Similar to
-  \code{filter_lowest_frequency}, but lets you explicitly
-  exclude markers.}
-
-  \item{model}{A string denoting which model to fit;
-  currently, only the discrete model (\code{"discrete"}) is
-  available.}
-
-  \item{iterations}{The number of iterations (per
-  'replication') to perform.}
-
-  \item{replications}{The number of 'replications' to
-  perform. In order to conserve memory, we only keep the
-  model estimates from the last replication.}
-
-  \item{keep_original_data}{Keep the original
-  \code{COMPASSContainer} as part of the \code{COMPASS}
-  output? Note that if you want to run \code{shinyCOMPASS}
-  later, it is necessary that you set this as \code{TRUE}.
-  If memory or disk space is an issue, you may set this to
-  \code{FALSE}.}
-
-  \item{verbose}{Boolean; if \code{TRUE} we output progress
-  information.}
-
-  \item{...}{Other arguments; currently unused.}
+\item{data}{An object of class \code{COMPASSContainer}.}
+
+\item{treatment}{An \R expression, evaluated within the metadata, that
+returns \code{TRUE} for those samples that should belong to the
+treatment group. For example, if the samples that received a positive
+stimulation were named \code{"92TH023 Env"} within a variable in
+\code{meta} called \code{Stim}, you could write \code{Stim == "92TH023 Env"}.
+The expression should have the name of the stimulation vector on the
+left hand side.}
+
+\item{control}{An \R expression, evaluated within the metadata, that
+returns \code{TRUE} for those samples that should belong to the
+control group. See above for details.}
+
+\item{subset}{An expression used to subset the data. We keep only the samples
+for which the expression evaluates to \code{TRUE} in the metadata.}
+
+\item{category_filter}{A filter for the categories that are generated. This is a
+function that will be applied to the \emph{treatment counts} matrix generated from
+the intensities. Only categories meeting the \code{category_filter} criteria will
+be kept.}
+
+\item{filter_lowest_frequency}{A number specifying how many of the least
+expressed markers should be removed.}
+
+\item{filter_specific_markers}{Similar to \code{filter_lowest_frequency},
+but lets you explicitly exclude markers.}
+
+\item{model}{A string denoting which model to fit; currently, only
+the discrete model (\code{"discrete"}) is available.}
+
+\item{iterations}{The number of iterations (per 'replication') to perform.}
+
+\item{replications}{The number of 'replications' to perform. In order to
+conserve memory, we only keep the model estimates from the last replication.}
+
+\item{keep_original_data}{Keep the original \code{COMPASSContainer}
+as part of the \code{COMPASS} output? Note that if you want to run
+\code{shinyCOMPASS} later, it is necessary that you set this as \code{TRUE}.
+If memory or disk space is an issue, you may set this to \code{FALSE}.}
+
+\item{verbose}{Boolean; if \code{TRUE} we output progress information.}
+
+\item{...}{Other arguments; currently unused.}
 }
 \value{
 A \code{COMPASSResult} is a list with the following components:
@@ -130,19 +119,18 @@ fit.
 This function fits the \code{COMPASS} model.
 }
 \section{Category Filter}{
-  The category filter is used to exclude categories
-  (combinations of markers expressed for a particular cell)
-  that are expressed very rarely. It is applied to the
-  \code{treatment} \emph{counts} matrix, which is a
-  \code{N} samples by \code{K} categories matrix. Those
-  categories which are mostly unexpressed can be excluded
-  here. For example, the default criteria,
-
-  \code{category_filter=function(x) colSums(x > 5) > 2}
-
-  indicates that we should only retain categories for which
-  at least two samples had at least 5 cells expressing that
-  particular combination of markers.
+
+The category filter is used to exclude categories (combinations of
+markers expressed for a particular cell) that are expressed very rarely.
+It is applied to the \code{treatment} \emph{counts} matrix, which is a
+\code{N} samples by \code{K} categories matrix. Those categories which
+are mostly unexpressed can be excluded here. For example, the default
+criteria,
+
+\code{category_filter=function(x) colSums(x > 5) > 2}
+
+indicates that we should only retain categories for which at least two samples
+had at least 5 cells expressing that particular combination of markers.
 }
 \examples{
 data(COMPASS) ## loads the COMPASSContainer 'CC'

diff --git a/man/COMPASSContainer.Rd b/man/COMPASSContainer.Rd
@@ -6,29 +6,25 @@
 COMPASSContainer(data, counts, meta, individual_id, sample_id)
 }
 \arguments{
-  \item{data}{A list of matrices. Each matrix \code{M_i} is
-  made up of \code{N_i} cells by \code{K} markers; for
-  example, it could be the intensity information from an
-  intracellular cytokine experiment.  Each element of the
-  list should be named; this name denotes which sample the
-  cell intensities were measured from.}
+\item{data}{A list of matrices. Each matrix \code{M_i} is made up of
+\code{N_i} cells by \code{K} markers; for example, it could be the
+intensity information from an intracellular cytokine experiment.
+Each element of the list should be named; this name denotes which
+sample the cell intensities were measured from.}
 
-  \item{counts}{A named integer vector of the cell counts
-  for each sample in \code{data}.}
+\item{counts}{A named integer vector of the cell counts for each
+sample in \code{data}.}
 
-  \item{meta}{A \code{data.frame} of metadata, describing
-  the individuals in the experiment. Each row in
-  \code{meta} should correspond to a row in \code{data}.
-  There should be one row for each sample; i.e., one row
-  for each element of \code{data}.}
+\item{meta}{A \code{data.frame} of metadata, describing the individuals
+in the experiment. Each row in \code{meta} should correspond to a row
+in \code{data}. There should be one row for each sample;
+i.e., one row for each element of \code{data}.}
 
-  \item{individual_id}{The name of the vector in
-  \code{meta} that denotes the individuals from which
-  samples were drawn.}
+\item{individual_id}{The name of the vector in \code{meta} that denotes the
+individuals from which samples were drawn.}
 
-  \item{sample_id}{The name of the vector in \code{meta}
-  that denotes the samples.  This vector should contain all
-  of the names in the \code{data} input.}
+\item{sample_id}{The name of the vector in \code{meta} that denotes the samples.
+  This vector should contain all of the names in the \code{data} input.}
 }
 \value{
 A \code{COMPASSContainer} returns a list made up of the same

diff --git a/man/COMPASSContainerFromGatingSet.Rd b/man/COMPASSContainerFromGatingSet.Rd
@@ -9,37 +9,26 @@ COMPASSContainerFromGatingSet(gs = NULL, node = NULL, filter.fun = NULL,
   markers = NA)
 }
 \arguments{
-  \item{gs}{a \code{GatingSet} or \code{GatingSetList}}
+\item{gs}{a \code{GatingSet} or \code{GatingSetList}}
 
-  \item{node}{a \code{regular expression} to match a single
-  node in the gating tree. If more than one node is
-  matched, an error is thrown.}
+\item{node}{a \code{regular expression} to match a single node in the gating tree. If more than one node is matched, an error is thrown.}
 
-  \item{filter.fun}{a \code{function} that does string
-  substitution to clean up node names, i.e. turns a 'CD4+'
-  into a 'CD4' to try and match against the
-  \code{parameters} slot of the \code{flowFrames} in
-  \code{gs}}
+\item{filter.fun}{a \code{function} that does string substitution to clean up node names, i.e. turns a 'CD4+' into a 'CD4' to try and
+match against the \code{parameters} slot of the \code{flowFrames} in \code{gs}}
 
-  \item{individual_id}{a \code{character} identifying the
-  subject id column in the \code{gs} metadata}
+\item{individual_id}{a \code{character} identifying the subject id column in the \code{gs} metadata}
 
-  \item{sample_id}{a \code{character} idetifying the sample
-  id column in the \code{gs} metadata.}
+\item{sample_id}{a \code{character} idetifying the sample id column in the \code{gs} metadata.}
 
-  \item{mp}{a \code{list} mapping node names to markers.
-  This function tries to guess, but may fail. The user can
-  override the guesswork.}
+\item{mp}{a \code{list} mapping node names to markers. This function tries to guess, but may fail. The user can override the guesswork.}
 
-  \item{countFilterThreshold}{Numeric; if the number of
-  cells expressing at least one marker of interest is less
-  than this threshold, we remove that file.}
+\item{countFilterThreshold}{Numeric; if the number of cells expressing at
+least one marker of interest is less than this threshold, we remove that
+file.}
 
-  \item{matchmethod}{a \code{character} either 'regex' or
-  'Levenshtein' for matching nodes to markers.}
+\item{matchmethod}{a \code{character} either 'regex' or 'Levenshtein' for matching nodes to markers.}
 
-  \item{markers}{a \code{character} vector of marker names
-  to include.}
+\item{markers}{a \code{character} vector of marker names to include.}
 }
 \description{
 This code expects a \code{GatingSet} or \code{GatingSetList}.

diff --git a/man/COMPASSDescription.Rd b/man/COMPASSDescription.Rd
@@ -9,10 +9,10 @@ COMPASSDescription(x)
 COMPASSDescription(x) <- value
 }
 \arguments{
-  \item{x}{A \code{COMPASS} fit.}
+\item{x}{A \code{COMPASS} fit.}
 
-  \item{value}{A set of paragraphs describing the
-  experiment, as a character vector.}
+\item{value}{A set of paragraphs describing the experiment, as a character
+vector.}
 }
 \description{
 This is used for setting an informative description used in the Shiny

diff --git a/man/CellCounts.Rd b/man/CellCounts.Rd
@@ -6,14 +6,12 @@
 CellCounts(data, combinations)
 }
 \arguments{
-  \item{data}{Either a \code{COMPASSContainer}, or a list
-  of matrices.  Each matrix \code{i} is of dimension
-  \code{N_i} cells (rows) by \code{K} common markers
-  (columns).}
+\item{data}{Either a \code{COMPASSContainer}, or a list of matrices.
+Each matrix \code{i} is of dimension \code{N_i} cells (rows) by
+\code{K} common markers (columns).}
 
-  \item{combinations}{A list of 'combinations', used to
-  denote the subsets of interest. See the examples for
-  usage.}
+\item{combinations}{A list of 'combinations', used to denote the
+subsets of interest. See the examples for usage.}
 }
 \description{
 Compute the number of cells expressing a particular

diff --git a/man/Combinations.Rd b/man/Combinations.Rd
@@ -6,7 +6,7 @@
 Combinations(n)
 }
 \arguments{
-  \item{n}{An integer.}
+\item{n}{An integer.}
 }
 \description{
 Given an intenger \code{n}, generate all binary combinations of

diff --git a/man/FunctionalityScore.Rd b/man/FunctionalityScore.Rd
@@ -12,10 +12,9 @@ FunctionalityScore(x)
 \method{FunctionalityScore}{default}(x)
 }
 \arguments{
-  \item{x}{An object of class \code{COMPASSResult}, as
-  returned by \code{\link{COMPASS}}. Alternatively, a
-  matrix of functionality scores, used under the assumption
-  that the 'null' category has been dropped.}
+\item{x}{An object of class \code{COMPASSResult}, as returned by
+\code{\link{COMPASS}}. Alternatively, a matrix of functionality scores,
+used under the assumption that the 'null' category has been dropped.}
 }
 \value{
 A numeric vector of functionality scores.

diff --git a/man/GetThresholdedIntensities.Rd b/man/GetThresholdedIntensities.Rd
@@ -6,12 +6,12 @@
 GetThresholdedIntensities(gs, node, map)
 }
 \arguments{
-  \item{gs}{A \code{GatingSet} or \code{GatingSetList}.}
+\item{gs}{A \code{GatingSet} or \code{GatingSetList}.}
 
-  \item{node}{The name, or path, of a single node in a
-  \code{GatingSet} / \code{GatingSetList}.}
+\item{node}{The name, or path, of a single node in a
+\code{GatingSet} / \code{GatingSetList}.}
 
-  \item{map}{A \code{list}, mapping node names to markers.}
+\item{map}{A \code{list}, mapping node names to markers.}
 }
 \value{
 A \code{list} with two components: