Skip to content

Commit

Permalink
STAAR v0.9.6
Browse files Browse the repository at this point in the history
  • Loading branch information
xihaoli committed Aug 16, 2021
2 parents 08472ca + 0e02430 commit c0ddb15
Show file tree
Hide file tree
Showing 15 changed files with 196 additions and 82 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: STAAR
Type: Package
Title: STAAR Procedure for Dynamic Incorporation of Multiple Functional Annotations in Whole-Genome Sequencing Studies
Version: 0.9.5
Date: 2020-09-14
Version: 0.9.6
Date: 2021-08-16
Author: Xihao Li [aut, cre], Zilin Li [aut, cre], Han Chen [aut]
Maintainer: Xihao Li <xihaoli@g.harvard.edu>, Zilin Li <li@hsph.harvard.edu>
Description: An R package for performing STAAR procedure in whole-genome sequencing studies.
Expand All @@ -13,6 +13,6 @@ Encoding: UTF-8
LazyData: true
Depends: R (>= 3.2.0)
LinkingTo: Rcpp, RcppArmadillo
RoxygenNote: 7.1.0
RoxygenNote: 7.1.1
Suggests: knitr, rmarkdown
VignetteBuilder: knitr
14 changes: 9 additions & 5 deletions R/CCT.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,18 @@
#' @param pvals a numeric vector of p-values, where each of the element is
#' between 0 to 1, to be combined.
#' @param weights a numeric vector of non-negative weights. If \code{NULL}, the
#' equal weights are assumed.
#' equal weights are assumed (default = NULL).
#' @return the aggregated p-value combining p-values from the vector \code{pvals}.
#' @examples pvalues <- c(2e-02,4e-04,0.2,0.1,0.8)
#' @examples CCT(pvals=pvalues)
#' @examples pvalues <- c(2e-02, 4e-04, 0.2, 0.1, 0.8)
#' @examples CCT(pvals = pvalues)
#' @references Liu, Y., & Xie, J. (2020). Cauchy combination test: a powerful test
#' with analytic p-value calculation under arbitrary dependency structures.
#' \emph{Journal of the American Statistical Association 115}(529), 393-402.
#' (\href{https://www.tandfonline.com/doi/full/10.1080/01621459.2018.1554485}{pub})
#' \emph{Journal of the American Statistical Association}, \emph{115}(529), 393-402.
#' (\href{https://doi.org/10.1080/01621459.2018.1554485}{pub})
#' @references Liu, Y., et al. (2019). Acat: A fast and powerful p value combination
#' method for rare-variant analysis in sequencing studies.
#' \emph{The American Journal of Human Genetics}, \emph{104}(3), 410-421.
#' (\href{https://doi.org/10.1016/j.ajhg.2019.01.002}{pub})
#' @export

CCT <- function(pvals, weights=NULL){
Expand Down
4 changes: 2 additions & 2 deletions R/Indiv_Score_Test_Region.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
#' \code{\link{fit_null_glmmkin}} function for related samples. Note that \code{\link{fit_null_glmmkin}}
#' is a wrapper of \code{\link{glmmkin}} function from the \code{\link{GMMAT}} package.
#' @param rare_maf_cutoff the cutoff of maximum minor allele frequency in
#' defining rare variants (default is 0.01).
#' defining rare variants (default = 0.01).
#' @param rv_num_cutoff the cutoff of minimum number of variants of analyzing
#' a given variant-set (default is 2).
#' a given variant-set (default = 2).
#' @return a data frame with p rows corresponding to the p genetic variants in the given variant-set
#' and three columns: \code{Score} (the score test statistic), \code{SE} (the standard error associated
#' with the score test statistic), and \code{pvalue} (the score test p-value).
Expand Down
43 changes: 34 additions & 9 deletions R/Indiv_Score_Test_Region_cond.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,31 @@
#' \code{\link{fit_null_glmmkin}} function for related samples. Note that \code{\link{fit_null_glmmkin}}
#' is a wrapper of \code{\link{glmmkin}} function from the \code{\link{GMMAT}} package.
#' @param rare_maf_cutoff the cutoff of maximum minor allele frequency in
#' defining rare variants (default is 0.01).
#' defining rare variants (default = 0.01).
#' @param rv_num_cutoff the cutoff of minimum number of variants of analyzing
#' a given variant-set (default is 2).
#' a given variant-set (default = 2).
#' @param method_cond a character value indicating the method for conditional analysis.
#' \code{optimal} refers to regressing residuals from the null model on \code{genotype_adj}
#' as well as all covariates used in fitting the null model (fully adjusted) and taking the residuals;
#' \code{naive} refers to regressing residuals from the null model on \code{genotype_adj}
#' and taking the residuals (default = \code{optimal}).
#' @return a data frame with p rows corresponding to the p genetic variants in the given variant-set
#' and three columns: \code{Score_cond} (the conditional score test statistic adjusting for variants
#' in \code{genotype_adj}), \code{SE_cond} (the standard error associated with the
#' conditional score test statistic), and \code{pvalue_cond} (the conditional score test p-value).
#' If a variant in the given variant-set has minor allele frequency = 0 or
#' greater than \code{rare_maf_cutoff}, the corresponding row will be \code{NA}. If a variant in
#' the given variant-set has standard error equal to 0, the p-value will be set as 1.
#' @references Sofer, T., et al. (2019). A fully adjusted two-stage procedure for rank-normalization
#' in genetic association studies. \emph{Genetic Epidemiology}, \emph{43}(3), 263-275.
#' (\href{https://doi.org/10.1002/gepi.22188}{pub})
#' @export

Indiv_Score_Test_Region_cond <- function(genotype,genotype_adj,obj_nullmodel,
rare_maf_cutoff=0.01,rv_num_cutoff=2){
rare_maf_cutoff=0.01,rv_num_cutoff=2,
method_cond=c("optimal","naive")){

method_cond <- match.arg(method_cond) # evaluate choices
if(class(genotype) != "matrix" && !(!is.null(attr(class(genotype), "package")) && attr(class(genotype), "package") == "Matrix")){
stop("genotype is not a matrix!")
}
Expand Down Expand Up @@ -75,8 +85,13 @@ Indiv_Score_Test_Region_cond <- function(genotype,genotype_adj,obj_nullmodel,

residuals.phenotype <- obj_nullmodel$scaled.residuals
residuals.phenotype <- residuals.phenotype*sqrt(P_scalar)
residuals.phenotype <- lm(residuals.phenotype~genotype_adj)$residuals
X_adj <- cbind(rep(1,length(residuals.phenotype)),genotype_adj)
if(method_cond == "optimal"){
residuals.phenotype.fit <- lm(residuals.phenotype~genotype_adj+obj_nullmodel$X-1)
}else{
residuals.phenotype.fit <- lm(residuals.phenotype~genotype_adj)
}
residuals.phenotype <- residuals.phenotype.fit$residuals
X_adj <- model.matrix(residuals.phenotype.fit)
PX_adj <- P%*%X_adj
P_cond <- P - X_adj%*%solve(t(X_adj)%*%X_adj)%*%t(PX_adj) -
PX_adj%*%solve(t(X_adj)%*%X_adj)%*%t(X_adj) +
Expand All @@ -91,8 +106,13 @@ Indiv_Score_Test_Region_cond <- function(genotype,genotype_adj,obj_nullmodel,
cov <- obj_nullmodel$cov

residuals.phenotype <- obj_nullmodel$scaled.residuals
residuals.phenotype <- lm(residuals.phenotype~genotype_adj)$residuals
X_adj <- cbind(rep(1,length(residuals.phenotype)),genotype_adj)
if(method_cond == "optimal"){
residuals.phenotype.fit <- lm(residuals.phenotype~genotype_adj+obj_nullmodel$X-1)
}else{
residuals.phenotype.fit <- lm(residuals.phenotype~genotype_adj)
}
residuals.phenotype <- residuals.phenotype.fit$residuals
X_adj <- model.matrix(residuals.phenotype.fit)

results[RV_label,] <- do.call(cbind,Indiv_Score_Test_SMMAT_sparse_cond(G,Sigma_i,Sigma_iX,cov,X_adj,residuals.phenotype))
}
Expand All @@ -107,8 +127,13 @@ Indiv_Score_Test_Region_cond <- function(genotype,genotype_adj,obj_nullmodel,
}

residuals.phenotype <- obj_nullmodel$y - obj_nullmodel$fitted.values
residuals.phenotype <- lm(residuals.phenotype~genotype_adj)$residuals
X_adj <- cbind(rep(1,length(residuals.phenotype)),genotype_adj)
if(method_cond == "optimal"){
residuals.phenotype.fit <- lm(residuals.phenotype~genotype_adj+model.matrix(obj_nullmodel)-1)
}else{
residuals.phenotype.fit <- lm(residuals.phenotype~genotype_adj)
}
residuals.phenotype <- residuals.phenotype.fit$residuals
X_adj <- model.matrix(residuals.phenotype.fit)
PX_adj <- P%*%X_adj
P_cond <- P - X_adj%*%solve(t(X_adj)%*%X_adj)%*%t(PX_adj) -
PX_adj%*%solve(t(X_adj)%*%X_adj)%*%t(X_adj) +
Expand Down
22 changes: 15 additions & 7 deletions R/STAAR.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,15 @@
#' SKAT(1,25), SKAT(1,1), Burden(1,25), Burden(1,1), ACAT-V(1,25), ACAT-V(1,1)
#' and ACAT-O tests (default = NULL).
#' @param rare_maf_cutoff the cutoff of maximum minor allele frequency in
#' defining rare variants (default is 0.01).
#' defining rare variants (default = 0.01).
#' @param rv_num_cutoff the cutoff of minimum number of variants of analyzing
#' a given variant-set (default is 2).
#' a given variant-set (default = 2).
#' @return a list with the following members:
#' @return \code{num_variant}: the number of variants with minor allele frequency > 0 and less than
#' \code{rare_maf_cutoff} in the given variant-set that are used for performing the
#' variant-set using STAAR.
#' @return \code{cMAC}: the cumulative minor allele count of variants with
#' minor allele frequency > 0 and less than \code{rare_maf_cutoff} in the given variant-set.
#' @return \code{RV_label}: the boolean vector indicating whether each variant in the given
#' variant-set has minor allele frequency > 0 and less than \code{rare_maf_cutoff}.
#' @return \code{results_STAAR_O}: the STAAR-O p-value that aggregated SKAT(1,25),
Expand Down Expand Up @@ -60,14 +62,18 @@
#' including ACAT-V(1,1) p-value weighted by MAF, the ACAT-V(1,1)
#' p-values weighted by each annotation, and a STAAR-A(1,1)
#' p-value by aggregating these p-values using Cauchy method.
#' @references Li, X., Li, Z. et al. (2020). Dynamic incorporation of multiple
#' @references Li, X., Li, Z., et al. (2020). Dynamic incorporation of multiple
#' in silico functional annotations empowers rare variant association analysis of
#' large whole-genome sequencing studies at scale. \emph{Nature Genetics}.
#' (\href{https://www.nature.com/articles/s41588-020-0676-4}{pub})
#' large whole-genome sequencing studies at scale. \emph{Nature Genetics}, \emph{52}(9), 969-983.
#' (\href{https://doi.org/10.1038/s41588-020-0676-4}{pub})
#' @references Liu, Y., et al. (2019). Acat: A fast and powerful p value combination
#' method for rare-variant analysis in sequencing studies.
#' \emph{The American Journal of Humann Genetics 104}(3), 410-421.
#' (\href{https://www.sciencedirect.com/science/article/pii/S0002929719300023}{pub})
#' \emph{The American Journal of Human Genetics}, \emph{104}(3), 410-421.
#' (\href{https://doi.org/10.1016/j.ajhg.2019.01.002}{pub})
#' @references Li, Z., Li, X., et al. (2020). Dynamic scan procedure for
#' detecting rare-variant association regions in whole-genome sequencing studies.
#' \emph{The American Journal of Human Genetics}, \emph{104}(5), 802-814.
#' (\href{https://doi.org/10.1016/j.ajhg.2019.03.002}{pub})
#' @export

STAAR <- function(genotype,obj_nullmodel,annotation_phred=NULL,
Expand Down Expand Up @@ -181,6 +187,7 @@ STAAR <- function(genotype,obj_nullmodel,annotation_phred=NULL,
}

num_variant <- sum(RV_label) #dim(G)[2]
cMAC <- sum(G)
num_annotation <- dim(annotation_phred)[2]+1
results_STAAR_O <- CCT(pvalues)
results_ACAT_O <- CCT(pvalues[c(1,num_annotation+1,2*num_annotation+1,3*num_annotation+1,4*num_annotation+1,5*num_annotation+1)])
Expand Down Expand Up @@ -238,6 +245,7 @@ STAAR <- function(genotype,obj_nullmodel,annotation_phred=NULL,
}

return(list(num_variant = num_variant,
cMAC = cMAC,
RV_label = RV_label,
results_STAAR_O = results_STAAR_O,
results_ACAT_O = results_ACAT_O,
Expand Down
61 changes: 47 additions & 14 deletions R/STAAR_cond.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,20 @@
#' SKAT(1,25), SKAT(1,1), Burden(1,25), Burden(1,1), ACAT-V(1,25), ACAT-V(1,1)
#' and ACAT-O tests (default = NULL).
#' @param rare_maf_cutoff the cutoff of maximum minor allele frequency in
#' defining rare variants (default is 0.01).
#' defining rare variants (default = 0.01).
#' @param rv_num_cutoff the cutoff of minimum number of variants of analyzing
#' a given variant-set (default is 2).
#' a given variant-set (default = 2).
#' @param method_cond a character value indicating the method for conditional analysis.
#' \code{optimal} refers to regressing residuals from the null model on \code{genotype_adj}
#' as well as all covariates used in fitting the null model (fully adjusted) and taking the residuals;
#' \code{naive} refers to regressing residuals from the null model on \code{genotype_adj}
#' and taking the residuals (default = \code{optimal}).
#' @return a list with the following members:
#' @return \code{num_variant}: the number of variants with minor allele frequency > 0 and less than
#' \code{rare_maf_cutoff} in the given variant-set that are used for performing the
#' variant-set using STAAR.
#' @return \code{cMAC}: the cumulative minor allele count of variants with
#' minor allele frequency > 0 and less than \code{rare_maf_cutoff} in the given variant-set.
#' @return \code{RV_label}: the boolean vector indicating whether each variant in the given
#' variant-set has minor allele frequency > 0 and less than \code{rare_maf_cutoff}.
#' @return \code{results_STAAR_O_cond}: the conditional STAAR-O p-value that aggregated conditional
Expand Down Expand Up @@ -66,19 +73,28 @@
#' including conditional ACAT-V(1,1) p-value weighted by MAF, the conditional ACAT-V(1,1)
#' p-values weighted by each annotation, and a conditional STAAR-A(1,1)
#' p-value by aggregating these p-values using Cauchy method.
#' @references Li, X., Li, Z. et al. (2020). Dynamic incorporation of multiple
#' @references Li, X., Li, Z., et al. (2020). Dynamic incorporation of multiple
#' in silico functional annotations empowers rare variant association analysis of
#' large whole-genome sequencing studies at scale. \emph{Nature Genetics}.
#' (\href{https://www.nature.com/articles/s41588-020-0676-4}{pub})
#' large whole-genome sequencing studies at scale. \emph{Nature Genetics}, \emph{52}(9), 969-983.
#' (\href{https://doi.org/10.1038/s41588-020-0676-4}{pub})
#' @references Liu, Y., et al. (2019). Acat: A fast and powerful p value combination
#' method for rare-variant analysis in sequencing studies.
#' \emph{The American Journal of Humann Genetics 104}(3), 410-421.
#' (\href{https://www.sciencedirect.com/science/article/pii/S0002929719300023}{pub})
#' \emph{The American Journal of Human Genetics}, \emph{104}(3), 410-421.
#' (\href{https://doi.org/10.1016/j.ajhg.2019.01.002}{pub})
#' @references Li, Z., Li, X., et al. (2020). Dynamic scan procedure for
#' detecting rare-variant association regions in whole-genome sequencing studies.
#' \emph{The American Journal of Human Genetics}, \emph{104}(5), 802-814.
#' (\href{https://doi.org/10.1016/j.ajhg.2019.03.002}{pub})
#' @references Sofer, T., et al. (2019). A fully adjusted two-stage procedure for rank-normalization
#' in genetic association studies. \emph{Genetic Epidemiology}, \emph{43}(3), 263-275.
#' (\href{https://doi.org/10.1002/gepi.22188}{pub})
#' @export

STAAR_cond <- function(genotype,genotype_adj,obj_nullmodel,annotation_phred=NULL,
rare_maf_cutoff=0.01,rv_num_cutoff=2){
rare_maf_cutoff=0.01,rv_num_cutoff=2,
method_cond=c("optimal","naive")){

method_cond <- match.arg(method_cond) # evaluate choices
if(class(genotype) != "matrix" && !(!is.null(attr(class(genotype), "package")) && attr(class(genotype), "package") == "Matrix")){
stop("genotype is not a matrix!")
}
Expand Down Expand Up @@ -162,8 +178,13 @@ STAAR_cond <- function(genotype,genotype_adj,obj_nullmodel,annotation_phred=NULL

residuals.phenotype <- obj_nullmodel$scaled.residuals
residuals.phenotype <- residuals.phenotype*sqrt(P_scalar)
residuals.phenotype <- lm(residuals.phenotype~genotype_adj)$residuals
X_adj <- cbind(rep(1,length(residuals.phenotype)),genotype_adj)
if(method_cond == "optimal"){
residuals.phenotype.fit <- lm(residuals.phenotype~genotype_adj+obj_nullmodel$X-1)
}else{
residuals.phenotype.fit <- lm(residuals.phenotype~genotype_adj)
}
residuals.phenotype <- residuals.phenotype.fit$residuals
X_adj <- model.matrix(residuals.phenotype.fit)
PX_adj <- P%*%X_adj
P_cond <- P - X_adj%*%solve(t(X_adj)%*%X_adj)%*%t(PX_adj) -
PX_adj%*%solve(t(X_adj)%*%X_adj)%*%t(X_adj) +
Expand All @@ -180,8 +201,13 @@ STAAR_cond <- function(genotype,genotype_adj,obj_nullmodel,annotation_phred=NULL
cov <- obj_nullmodel$cov

residuals.phenotype <- obj_nullmodel$scaled.residuals
residuals.phenotype <- lm(residuals.phenotype~genotype_adj)$residuals
X_adj <- cbind(rep(1,length(residuals.phenotype)),genotype_adj)
if(method_cond == "optimal"){
residuals.phenotype.fit <- lm(residuals.phenotype~genotype_adj+obj_nullmodel$X-1)
}else{
residuals.phenotype.fit <- lm(residuals.phenotype~genotype_adj)
}
residuals.phenotype <- residuals.phenotype.fit$residuals
X_adj <- model.matrix(residuals.phenotype.fit)

pvalues <- STAAR_O_SMMAT_sparse_cond(G,Sigma_i,Sigma_iX,cov,X_adj,residuals.phenotype,
weights_B=w_B,weights_S=w_S,weights_A=w_A,
Expand All @@ -198,8 +224,13 @@ STAAR_cond <- function(genotype,genotype_adj,obj_nullmodel,annotation_phred=NULL
}

residuals.phenotype <- obj_nullmodel$y - obj_nullmodel$fitted.values
residuals.phenotype <- lm(residuals.phenotype~genotype_adj)$residuals
X_adj <- cbind(rep(1,length(residuals.phenotype)),genotype_adj)
if(method_cond == "optimal"){
residuals.phenotype.fit <- lm(residuals.phenotype~genotype_adj+model.matrix(obj_nullmodel)-1)
}else{
residuals.phenotype.fit <- lm(residuals.phenotype~genotype_adj)
}
residuals.phenotype <- residuals.phenotype.fit$residuals
X_adj <- model.matrix(residuals.phenotype.fit)
PX_adj <- P%*%X_adj
P_cond <- P - X_adj%*%solve(t(X_adj)%*%X_adj)%*%t(PX_adj) -
PX_adj%*%solve(t(X_adj)%*%X_adj)%*%t(X_adj) +
Expand All @@ -213,6 +244,7 @@ STAAR_cond <- function(genotype,genotype_adj,obj_nullmodel,annotation_phred=NULL
}

num_variant <- sum(RV_label) #dim(G)[2]
cMAC <- sum(G)
num_annotation <- dim(annotation_phred)[2]+1
results_STAAR_O <- CCT(pvalues)
results_ACAT_O <- CCT(pvalues[c(1,num_annotation+1,2*num_annotation+1,3*num_annotation+1,4*num_annotation+1,5*num_annotation+1)])
Expand Down Expand Up @@ -270,6 +302,7 @@ STAAR_cond <- function(genotype,genotype_adj,obj_nullmodel,annotation_phred=NULL
}

return(list(num_variant = num_variant,
cMAC = cMAC,
RV_label = RV_label,
results_STAAR_O_cond = results_STAAR_O,
results_ACAT_O_cond = results_ACAT_O,
Expand Down

0 comments on commit c0ddb15

Please sign in to comment.