STAARpipelineSummary v0.9.7

xihaoli · Mar 24, 2024 · 1bd1a62 · 1bd1a62
1 parent cd63287
commit 1bd1a62
Show file tree

Hide file tree

Showing 15 changed files with 49 additions and 65 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -2,7 +2,7 @@ Package: STAARpipelineSummary
 Type: Package
 Title: Summarization and Visualization of Analysis Results Generated by STAARpipeline
 Version: 0.9.7
-Date: 2023-11-11
+Date: 2024-03-23
 Author: Xihao Li [aut, cre], Zilin Li [aut, cre]
 Maintainer: Xihao Li <xihaoli@unc.edu>, Zilin Li <li@nenu.edu.cn>
 Description: An R package for summarizing analysis results generated by STAARpipeline.

diff --git a/R/Gene_Centric_Coding_Results_Summary.R b/R/Gene_Centric_Coding_Results_Summary.R
@@ -24,16 +24,11 @@
 #' as well as all covariates used in fitting the null model (fully adjusted) and taking the residuals;
 #' \code{naive} refers to regressing residuals from the null model on \code{known_loci}
 #' and taking the residuals (default = \code{optimal}).
-#' @param geno_missing_imputation method of handling missing genotypes. Either "mean" or "minor" (default = "mean").
-#' @param variant_type type of variant included in the analysis. Choices include "SNV", "Indel", or "variant" (default = "SNV").
-#' @param method_cond a character value indicating the method for conditional analysis.
-#' \code{optimal} refers to regressing residuals from the null model on \code{known_loci}
-#' as well as all covariates used in fitting the null model (fully adjusted) and taking the residuals;
-#' \code{naive} refers to regressing residuals from the null model on \code{known_loci}
-#' and taking the residuals (default = \code{optimal}).
 #' @param rare_maf_cutoff the cutoff of maximum minor allele frequency in
 #' defining rare variants (default = 0.01).
 #' @param QC_label channel name of the QC label in the GDS/aGDS file  (default = "annotation/filter").
+#' @param variant_type type of variant included in the analysis. Choices include "SNV", "Indel", or "variant" (default = "SNV").
+#' @param geno_missing_imputation method of handling missing genotypes. Either "mean" or "minor" (default = "mean").
 #' @param Annotation_dir channel name of the annotations in the aGDS file \cr (default = "annotation/info/FunctionalAnnotation").
 #' @param Annotation_name_catalog a data frame containing the name and the corresponding channel name in the aGDS file.
 #' @param Use_annotation_weights use annotations as weights or not (default = FALSE).
@@ -79,7 +74,7 @@
 Gene_Centric_Coding_Results_Summary <- function(agds_dir,gene_centric_coding_jobs_num,input_path,output_path,gene_centric_results_name,
                                                 obj_nullmodel,known_loci=NULL,cMAC_cutoff=0,
                                                 method_cond=c("optimal","naive"),rare_maf_cutoff=0.01,
-                                                QC_label="annotation/filter",geno_missing_imputation=c("mean","minor"),variant_type=c("SNV","Indel","variant"),
+                                                QC_label="annotation/filter",variant_type=c("SNV","Indel","variant"),geno_missing_imputation=c("mean","minor"),
                                                 Annotation_dir="annotation/info/FunctionalAnnotation",Annotation_name_catalog,
                                                 Use_annotation_weights=FALSE,Annotation_name=NULL,
                                                 alpha=2.5E-06,manhattan_plot=FALSE,QQ_plot=FALSE,

diff --git a/R/Gene_Centric_Coding_Results_Summary_incl_ptv.R b/R/Gene_Centric_Coding_Results_Summary_incl_ptv.R
@@ -24,16 +24,11 @@
 #' as well as all covariates used in fitting the null model (fully adjusted) and taking the residuals;
 #' \code{naive} refers to regressing residuals from the null model on \code{known_loci}
 #' and taking the residuals (default = \code{optimal}).
-#' @param geno_missing_imputation method of handling missing genotypes. Either "mean" or "minor" (default = "mean").
-#' @param variant_type type of variant included in the analysis. Choices include "SNV", "Indel", or "variant" (default = "SNV").
-#' @param method_cond a character value indicating the method for conditional analysis.
-#' \code{optimal} refers to regressing residuals from the null model on \code{known_loci}
-#' as well as all covariates used in fitting the null model (fully adjusted) and taking the residuals;
-#' \code{naive} refers to regressing residuals from the null model on \code{known_loci}
-#' and taking the residuals (default = \code{optimal}).
 #' @param rare_maf_cutoff the cutoff of maximum minor allele frequency in
 #' defining rare variants (default = 0.01).
 #' @param QC_label channel name of the QC label in the GDS/aGDS file  (default = "annotation/filter").
+#' @param variant_type type of variant included in the analysis. Choices include "SNV", "Indel", or "variant" (default = "SNV").
+#' @param geno_missing_imputation method of handling missing genotypes. Either "mean" or "minor" (default = "mean").
 #' @param Annotation_dir channel name of the annotations in the aGDS file \cr (default = "annotation/info/FunctionalAnnotation").
 #' @param Annotation_name_catalog a data frame containing the name and the corresponding channel name in the aGDS file.
 #' @param Use_annotation_weights use annotations as weights or not (default = FALSE).
@@ -83,14 +78,14 @@
 #' @export
 
 Gene_Centric_Coding_Results_Summary_incl_ptv <- function(agds_dir,gene_centric_coding_jobs_num,input_path,output_path,gene_centric_results_name,
-                                                obj_nullmodel,known_loci=NULL,cMAC_cutoff=0,
-                                                method_cond=c("optimal","naive"),rare_maf_cutoff=0.01,
-                                                QC_label="annotation/filter",geno_missing_imputation=c("mean","minor"),variant_type=c("SNV","Indel","variant"),
-                                                Annotation_dir="annotation/info/FunctionalAnnotation",Annotation_name_catalog,
-                                                Use_annotation_weights=FALSE,Annotation_name=NULL,
-                                                alpha=2.5E-06,manhattan_plot=FALSE,QQ_plot=FALSE,
-                                                cond_null_model_name=NULL,cond_null_model_dir=NULL,
-                                                SPA_p_filter=FALSE,p_filter_cutoff=0.05){
+                                                         obj_nullmodel,known_loci=NULL,cMAC_cutoff=0,
+                                                         method_cond=c("optimal","naive"),rare_maf_cutoff=0.01,
+                                                         QC_label="annotation/filter",variant_type=c("SNV","Indel","variant"),geno_missing_imputation=c("mean","minor"),
+                                                         Annotation_dir="annotation/info/FunctionalAnnotation",Annotation_name_catalog,
+                                                         Use_annotation_weights=FALSE,Annotation_name=NULL,
+                                                         alpha=2.5E-06,manhattan_plot=FALSE,QQ_plot=FALSE,
+                                                         cond_null_model_name=NULL,cond_null_model_dir=NULL,
+                                                         SPA_p_filter=FALSE,p_filter_cutoff=0.05){
 
 	## evaluate choices
 	method_cond <- match.arg(method_cond)

diff --git a/R/Gene_Centric_Noncoding_Results_Summary.R b/R/Gene_Centric_Noncoding_Results_Summary.R
@@ -27,16 +27,11 @@
 #' as well as all covariates used in fitting the null model (fully adjusted) and taking the residuals;
 #' \code{naive} refers to regressing residuals from the null model on \code{known_loci}
 #' and taking the residuals (default = \code{optimal}).
-#' @param geno_missing_imputation method of handling missing genotypes. Either "mean" or "minor" (default = "mean").
-#' @param variant_type type of variant included in the analysis. Choices include "SNV", "Indel", or "variant" (default = "SNV").
-#' @param method_cond a character value indicating the method for conditional analysis.
-#' \code{optimal} refers to regressing residuals from the null model on \code{known_loci}
-#' as well as all covariates used in fitting the null model (fully adjusted) and taking the residuals;
-#' \code{naive} refers to regressing residuals from the null model on \code{known_loci}
-#' and taking the residuals (default = \code{optimal}).
 #' @param rare_maf_cutoff the cutoff of maximum minor allele frequency in
 #' defining rare variants (default = 0.01).
 #' @param QC_label channel name of the QC label in the GDS/aGDS file (default = "annotation/filter").
+#' @param variant_type type of variant included in the analysis. Choices include "SNV", "Indel", or "variant" (default = "SNV").
+#' @param geno_missing_imputation method of handling missing genotypes. Either "mean" or "minor" (default = "mean").
 #' @param Annotation_dir channel name of the annotations in the aGDS file \cr (default = "annotation/info/FunctionalAnnotation").
 #' @param Annotation_name_catalog a data frame containing the name and the corresponding channel name in the aGDS file.
 #' @param Use_annotation_weights use annotations as weights or not (default = FALSE).
@@ -92,7 +87,7 @@ Gene_Centric_Noncoding_Results_Summary <- function(agds_dir,gene_centric_noncodi
                                                    ncRNA_jobs_num,ncRNA_input_path,ncRNA_output_path,ncRNA_results_name,
                                                    obj_nullmodel,known_loci=NULL,cMAC_cutoff=0,
                                                    method_cond=c("optimal","naive"),rare_maf_cutoff=0.01,
-                                                   QC_label="annotation/filter",geno_missing_imputation=c("mean","minor"),variant_type=c("SNV","Indel","variant"),
+                                                   QC_label="annotation/filter",variant_type=c("SNV","Indel","variant"),geno_missing_imputation=c("mean","minor"),
                                                    Annotation_dir="annotation/info/FunctionalAnnotation",Annotation_name_catalog,
                                                    Use_annotation_weights=FALSE,Annotation_name=NULL,
                                                    alpha=2.5E-06,alpha_ncRNA=2.5E-06,

diff --git a/R/Individual_Analysis_Results_Summary.R b/R/Individual_Analysis_Results_Summary.R
@@ -21,8 +21,9 @@
 #' as well as all covariates used in fitting the null model (fully adjusted) and taking the residuals;
 #' \code{naive} refers to regressing residuals from the null model on \code{known_loci}
 #' and taking the residuals (default = \code{optimal}).
-#' @param geno_missing_imputation method of handling missing genotypes. Either "mean" or "minor" (default = "mean").
 #' @param QC_label channel name of the QC label in the GDS/aGDS file.
+#' @param variant_type type of variant included in the analysis. Choices include "variant", "SNV", or "Indel" (default = "variant").
+#' @param geno_missing_imputation method of handling missing genotypes. Either "mean" or "minor" (default = "mean").
 #' @param alpha p-value threshold of significant results (default = 5E-09).
 #' @param manhattan_plot output manhattan plot or not (default = FALSE).
 #' @param QQ_plot output Q-Q plot or not (default = FALSE).
@@ -44,7 +45,7 @@
 Individual_Analysis_Results_Summary <- function(agds_dir,jobs_num,input_path,output_path,individual_results_name,
                                                 obj_nullmodel,known_loci=NULL,
                                                 method_cond=c("optimal","naive"),
-                                                QC_label="annotation/filter",geno_missing_imputation=c("mean","minor"),
+                                                QC_label="annotation/filter",variant_type=c("variant","SNV","Indel"),geno_missing_imputation=c("mean","minor"),
                                                 alpha=5E-09,manhattan_plot=FALSE,QQ_plot=FALSE,
                                                 SPA_p_filter=FALSE,p_filter_cutoff=0.05,
                                                 cond_null_model_name=NULL,cond_null_model_dir=NULL){
@@ -183,9 +184,9 @@ Individual_Analysis_Results_Summary <- function(agds_dir,jobs_num,input_path,out
 					gds.path <- agds_dir[chr]
 					genofile <- seqOpen(gds.path)
 
-					results_sig_cond_chr <- Individual_Analysis_cond(chr=chr,individual_results=results_sig_chr,genofile,obj_nullmodel=obj_nullmodel,
-					                                                 known_loci=known_loci,variant_type="variant",
-					                                                 QC_label=QC_label,geno_missing_imputation=geno_missing_imputation,method_cond=method_cond)
+					results_sig_cond_chr <- Individual_Analysis_cond(chr=chr,individual_results=results_sig_chr,genofile=genofile,obj_nullmodel=obj_nullmodel,
+					                                                 known_loci=known_loci,method_cond=method_cond,QC_label=QC_label,
+					                                                 variant_type=variant_type,geno_missing_imputation=geno_missing_imputation)
 
 					results_sig_cond <- rbind(results_sig_cond,results_sig_cond_chr)
 
@@ -210,8 +211,8 @@ Individual_Analysis_Results_Summary <- function(agds_dir,jobs_num,input_path,out
 
 						obj_nullmodel_cond <- get(load(paste0(cond_null_model_dir,cond_null_model_name,".chr",chr,".Rdata")))
 
-						results_sig_cond_chr <- Individual_Analysis_cond_spa(chr=chr,individual_results=results_sig_chr,genofile,obj_nullmodel=obj_nullmodel_cond,
-						                                                     variant_type="variant",QC_label=QC_label,geno_missing_imputation=geno_missing_imputation,
+						results_sig_cond_chr <- Individual_Analysis_cond_spa(chr=chr,individual_results=results_sig_chr,genofile=genofile,obj_nullmodel=obj_nullmodel_cond,
+						                                                     QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation,
 						                                                     SPA_p_filter=SPA_p_filter,p_filter_cutoff=p_filter_cutoff)
 
 						results_sig_cond <- rbind(results_sig_cond,results_sig_cond_chr)
@@ -224,8 +225,8 @@ Individual_Analysis_Results_Summary <- function(agds_dir,jobs_num,input_path,out
 						gds.path <- agds_dir[chr]
 						genofile <- seqOpen(gds.path)
 
-						results_sig_cond_chr <- Individual_Analysis_cond_spa(chr=chr,individual_results=results_sig_chr,genofile,obj_nullmodel=obj_nullmodel,
-						                                                     variant_type="variant",QC_label=QC_label,geno_missing_imputation=geno_missing_imputation,
+						results_sig_cond_chr <- Individual_Analysis_cond_spa(chr=chr,individual_results=results_sig_chr,genofile=genofile,obj_nullmodel=obj_nullmodel,
+						                                                     QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation,
 						                                                     SPA_p_filter=SPA_p_filter,p_filter_cutoff=p_filter_cutoff)
 
 						results_sig_cond <- rbind(results_sig_cond,results_sig_cond_chr)

diff --git a/R/Single_Variants_List_Analysis.R b/R/Single_Variants_List_Analysis.R
@@ -8,8 +8,8 @@
 #' the following names: "CHR" (chromosome number), "POS" (position), "REF" (reference allele), and "ALT" (alternative allele).
 #' @param obj_nullmodel an object from fitting the null model, which is either the output from \code{fit_nullmodel} function in the \code{STAARpipeline} package,
 #' or the output from \code{fitNullModel} function in the \code{GENESIS} package and transformed using the \code{genesis2staar_nullmodel} function in the \code{STAARpipeline} package.
-#' @param geno_missing_imputation method of handling missing genotypes. Either "mean" or "minor" (default = "mean").
 #' @param QC_label channel name of the QC label in the GDS/aGDS file (default = "annotation/filter").
+#' @param geno_missing_imputation method of handling missing genotypes. Either "mean" or "minor" (default = "mean").
 #' @param p_filter_cutoff threshold for the p-value recalculation using the SPA method (default = 0.05)
 #' @param tol a positive number specifying tolerance, the difference threshold for parameter
 #' estimates in saddlepoint approximation algorithm below which iterations should be stopped (default = ".Machine$double.eps^0.25").

diff --git a/R/Sliding_Window_Results_Summary.R b/R/Sliding_Window_Results_Summary.R
@@ -23,16 +23,11 @@
 #' as well as all covariates used in fitting the null model (fully adjusted) and taking the residuals;
 #' \code{naive} refers to regressing residuals from the null model on \code{known_loci}
 #' and taking the residuals (default = \code{optimal}).
-#' @param geno_missing_imputation method of handling missing genotypes. Either "mean" or "minor" (default = "mean").
-#' @param variant_type variants include in the conditional analysis. Choices include "variant", "SNV", or "Indel" (default = "SNV").
-#' @param method_cond a character value indicating the method for conditional analysis.
-#' \code{optimal} refers to regressing residuals from the null model on \code{known_loci}
-#' as well as all covariates used in fitting the null model (fully adjusted) and taking the residuals;
-#' \code{naive} refers to regressing residuals from the null model on \code{known_loci}
-#' and taking the residuals (default = \code{optimal}).
 #' @param rare_maf_cutoff the cutoff of maximum minor allele frequency in
 #' defining rare variants (default = 0.01).
 #' @param QC_label channel name of the QC label in the GDS/aGDS file (default = "annotation/filter").
+#' @param variant_type variants include in the conditional analysis. Choices include "variant", "SNV", or "Indel" (default = "SNV").
+#' @param geno_missing_imputation method of handling missing genotypes. Either "mean" or "minor" (default = "mean").
 #' @param Annotation_dir channel name of the annotations in the aGDS file \cr (default = "annotation/info/FunctionalAnnotation").
 #' @param Annotation_name_catalog a data frame containing the name and the corresponding channel name in the aGDS file.
 #' @param Use_annotation_weights use annotations as weights or not (default = FALSE).
@@ -58,7 +53,7 @@
 Sliding_Window_Results_Summary <- function(agds_dir,jobs_num,input_path,output_path,sliding_window_results_name,
                                            obj_nullmodel,known_loci=NULL,cMAC_cutoff=0,
                                            method_cond=c("optimal","naive"),rare_maf_cutoff=0.01,
-                                           QC_label="annotation/filter",geno_missing_imputation=c("mean","minor"),variant_type=c("SNV","Indel","variant"),
+                                           QC_label="annotation/filter",variant_type=c("SNV","Indel","variant"),geno_missing_imputation=c("mean","minor"),
                                            Annotation_dir="annotation/info/FunctionalAnnotation",Annotation_name_catalog,
                                            Use_annotation_weights=FALSE,Annotation_name=NULL,
                                            alpha=0.05,manhattan_plot=FALSE,QQ_plot=FALSE,

diff --git a/R/manhattan_plot.R b/R/manhattan_plot.R
@@ -2,8 +2,8 @@ manhattan_plot<-function(chr, pos, pvalue,
                          sig.level=NA, annotate=NULL, ann.default=list(),
                          should.thin=T, thin.pos.places=2, thin.logp.places=2,
                          xlab="Chromosome", ylab=expression(-log[10](p-value)),
-                         col=c("gray","darkgray"), panel.extra=NULL, pch=20, 
-						 use_logp=FALSE,cex=0.8,...) {
+                         col=c("gray","darkgray"), panel.extra=NULL, pch=20,
+                         use_logp=FALSE,cex=0.8,...) {
 
 	if (length(chr)==0) stop("chromosome vector is empty")
 	if (length(pos)==0) stop("position vector is empty")

diff --git a/README.md b/README.md
@@ -31,7 +31,7 @@ Please see the <a href="docs/STAARpipelineSummary_manual.pdf">**STAARpipelineSum
 ## Data Availability
 The whole-genome functional annotation data assembled from a variety of sources and the precomputed annotation principal components are available at the [Functional Annotation of Variant - Online Resource (FAVOR)](https://favor.genohub.org) site and [FAVOR Essential Database](https://doi.org/10.7910/DVN/1VGTJI).
 ## Version
-The current version is 0.9.7 (January 29, 2024).
+The current version is 0.9.7 (March 23, 2024).
 ## Citation
 If you use **STAARpipeline** and **STAARpipelineSummary** for your work, please cite:
 

diff --git a/docs/STAARpipelineSummary_manual.pdf b/docs/STAARpipelineSummary_manual.pdf