LMSstat

Automation of statistical test with an identical data input aiming to reduce arduous work searching for packages and changing data input.

The package includes

Simple Statistics :u-test, t-test, post hocs of Anova and Kruskal Wallis with FDR adjusted values
Bar, Box, Dot, Violin plots with significance (u-test, t-test, post hocs of Anova and Kruskal Wallis)
Scaling & Transformation
Normality check (Shapiro Wilk test)
Scheirer–Ray–Hare Test
Volcano plot
Heatmap
PERMANOVA
NMDS
PCA
PCoA

Contribution acknowledgement

Oct.01/2021 Daehwan Kim

Allstats_new optimization for faster processing
bug fix of Allstats (regarding LETTERS210729)

Instructions

Installation

Download package in R

install.packages("devtools")

devtools::install_github("CHKim5/LMSstat")

library(LMSstat)

Basic structure of the Data

Used in

Simple statistics
Barplot, Boxplot, Dotplot
Volcano plot
Scheirer–Ray–Hare Test
PERMANOVA
NMDS
PCA
Scaling & Transformation
Normality check (Shapiro Wilk test)
Heatmap

#Sample Data provided within the package

data("Data")

# Uploading your own Data

setwd("C:/Users/82102/Desktop")

Data<-read.csv("statT.csv",header = F)

The column "Multilevel" is mandatory for the code to run flawlessly.

If Multilevel is not used, fill the column with random characters

Datafile needs to follow the following format

Care for Capitals: Sample, Multilevel, Group

statT.csv

Used in

PERMANOVA

#Sample Data provided within the package
data("Classification")

# Uploading your own Data
Classification<-read.csv("statT_G.csv",header = F)

statT_G.csv

Univariate statistics

Statfile<-Allstats_new(Data,Adjust_p_value = T, Adjust_method = "BH") # Optimized code using lapply / data.table for faster processing contributed by Daehwan Kim

Statfile<-Allstats(Data,Adjust_p_value = T, Adjust_method = "BH") # Previous version using for-loop

Adjustable parameters

Adjust_p_value = T # Set True if adjustment is needed
Adjust_method = F # Adjustment methods frequently used. c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY","fdr", "none")

head(Statfile[["Result"]]) # includes all statistical results

write.csv(Statfile[["Result"]],"p_value_result.csv")  # Write csv with all the p-value included

Plots

# Makes a subdirectory and saves box plots for all the variables
AS_boxplot(Statfile,asterisk = "u_test") 

# Makes a subdirectory and saves dot plots for all the variables
AS_dotplot(Statfile,asterisk = "t_test") 

# Makes a subdirectory and saves bar plots for all the variables
AS_barplot(Statfile,asterisk = "Scheffe")

# Makes a subdirectory and saves violin plots for all the variables
AS_violinplot(Statfile,asterisk = "Scheffe")

AS_boxplot(Statfile) AS_dotplot(Statfile)

AS_barplot(Statfile) AS_violinplot(Statfile)

Adjustable parameters

asterisk = "t_test" #c("Dunn","Scheffe","u_test","t_test")
significant_variable_only = F # If set to TRUE, insignificant results will not be plotted
color = c("#FF3300", "#FF6600", "#FFCC00", "#99CC00", "#0066CC", "#660099") # Colors for the plots
legend_position = "none" # "none","left","right","bottom","top"
order = NULL # Order of the groups c("LAC","LUE","WEI","SDF","HGH","ASH")
tip_length = 0.01 # significance tip length
label_size = 2.88 # significance label size
step_increase = 0.05 #significance step increase
width = 0.3 # box width ; size = 3 # dot size
fig_width = NA #figure size
fig_height = NA #figure size
Y_text = 12 # Y title size
X_text = 10 # X text size
Y_lab = 10 #y axis text size
T_size = 15 # Title size
sig_int = c(0.1,0.05) # significance interval

Scaling & Transformation

scaled_data<-D_tran(Data,param = "Auto")

Raw_Data Scaled_Data

Adjustable parameters

param = "None" # "None","Auto","log10","Pareto"
save = F #Set true if datafile is to be saved

Normality check

#Shapiro Wilk test

Result<-Norm_test(Data)

write.csv(Result,"Normality_test_Result.csv")

Scheirer–Ray–Hare Test

# csv files including significant variables (Multilevel, Group, interaction) and a Venn diagram are downloaded
SRH(Data)

Adjustable parameters

Adjust_p_value = T # Set True if adjustment is needed
Adjust_method = "BH" # Adjustment methods frequently used. c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY","fdr", "none")

Volcano plot

# Makes a subdirectory and saves Volcano plots for different combination of groups
Test<-Allstats(Data)
Volcano(Test,asterisk = "t-test")

Adjustable parameters

asterisk = "t-test" #statistics inheriting from Allstats "Scheffe", "t-test", "u-test", "Dunn"
reverse = T # T, F reverse the direction of fold change
fig_width = NA #figure size
fig_height = NA #figure size
FC_log = 2 # Fold change log transformation value
pval_log = 10 #p_value log transformation value
dotsize = 3 #dotsize
x_limit = c(-2,2) #x axis limt
y_limit =c(0,6) #y axis limit
pval_intercept = 0.05 # intercept for identification
sig_label = T # T,F label significant variables
color=c("#FF3300","#FF6600","#FFCC00") #colors used for ggplots.
fixed_limit = F #whether the limit should be fixed or not T, F
max_overlap = 20 #maximum overlap for labels
FC_range = c(-1.5,1.5) #significant fold change range

Heatmap

# Makes a subdirectory and saves Heatmap

scaled_data<-D_tran(Data,param = "Auto")

AS_heatmap(scaled_data) #data inheriting from D_tran

dev.off() # Saved as PDF

Adjustable parameters

col =c("green", "white", "red") # colors for heatmap
col_lim = c(-3, 0, 3) # color boundaries
reverse = T # T,F Reverse column and rows
distance = "pearson" # Distance matrix for HCA "pearson", "manhattan","euclidean","spearman","kendall" ,
rownames = T # T,F
colnames = T # T,F
Hsize = (3,6) # Width & Height c(a,b)
g_legend = "Group" # Annotation legend title
h_legend = "Color Key" # Heatmap legend title
Title ="Title" # Title
T_size = 10 # Title text size
R_size = 3 # row text size
C_size = 3 # column text size
Gcol =c("ASD" = "black","HGH"="red","LAC"="blue","LUE" ="grey","SDF" = "yellow","WEI"="green") # Color for top_annotation bar
dend_h = 0.5 #dendrite height
a_h = 0.2 # top annotation hegiht

Multivariate statistics

PERMANOVA

data("Data")

data("Classification")

Single factor

PERMANOVA done with the Group column

Indiv_Perm(Data) # The group information is treated as a factor

Multiple Factors

Loops PERMANOVA over different classes provided by Classification

Result<-Multi_Perm(Data,Classification) # The group information is treated as factors

Adjustable parameters

method = Dissimilarity index c("manhattan", "euclidean", "canberra", "clark", "bray", "kulczynski", "jaccard", "gower", "altGower", "morisita", "horn", "mountford", "raup", "binomial", "chao", "cao", "mahalanobis", "chisq",chord")

NMDS

# Makes a subdirectory and saves NMDS plots for all of the distance metrics
NMDS(Data,methods = c("manhattan","bray","euclidean"))

NMDS plot with bray distance and p-value from PERMANOVA

Adjustable parameters

methods = Dissimilarity index c("manhattan", "euclidean", "canberra", "clark", "bray", "kulczynski", "jaccard", "gower", "altGower", "morisita", "horn", "mountford", "raup", "binomial", "chao", "cao", "mahalanobis", "chisq",chord")
color = c("#FF3300", "#FF6600", "#FFCC00", "#99CC00", "#0066CC", "#660099") # Colors for the plots
legend_position = "none" # "none","left","right","bottom","top"
fig_width = NA #figure size
fig_height = NA #figure size
names = F # used to indicate sample names
dotsize = 3 # dotsize
labsize = 3 # label size

PCA

# Makes a subdirectory and saves PCA plot
PCA(Data,components = c(1,2),legend_position = "none"))

PCA plot with selected components

Adjustable parameters

color = c("#FF3300", "#FF6600", "#FFCC00", "#99CC00", "#0066CC", "#660099") # Colors for the plots
legend_position = "none" # "none","left","right","bottom","top"
fig_width = NA #figure size
fig_height = NA #figure size
components = c(1,2) # selected components
names = F # used to indicate sample names
dotsize = 3 # dotsize
labsize = 3 # label size
ellipse = T # T or F to show ellipse

PCoA

# Makes a subdirectory and saves PCoA plot
PCoA(Data,components = c(1,2),methods = c("bray", "manhattan"))

PCoA plot with selected components

Adjustable parameters

color = c("#FF3300", "#FF6600", "#FFCC00", "#99CC00", "#0066CC", "#660099") # Colors for the plots
legend_position = "none" # "none","left","right","bottom","top"
fig_width = NA #figure size
fig_height = NA #figure size
components = c(1,2) # selected components
names = F # used to indicate sample names
dotsize = 3 # dotsize
labsize = 3 # label size
ellipse = T # T or F to show ellipse
methods = Dissimilarity index c("manhattan", "euclidean", "canberra", "clark", "bray", "kulczynski", "jaccard", "gower", "altGower", "morisita", "horn", "mountford", "raup", "binomial", "chao", "cao", "mahalanobis", "chisq",chord")

Name		Name	Last commit message	Last commit date
Latest commit History 111 Commits
R		R
data		data
man		man
renv		renv
.Rbuildignore		.Rbuildignore
.Rprofile		.Rprofile
.gitignore		.gitignore
CODE_OF_CONDUCT.md		CODE_OF_CONDUCT.md
DESCRIPTION		DESCRIPTION
LICENSE		LICENSE
LMSstat.Rproj		LMSstat.Rproj
NAMESPACE		NAMESPACE
README.md		README.md

License

CHKim5/LMSstat

Folders and files

Latest commit

History

Repository files navigation

LMSstat

Contribution acknowledgement

Oct.01/2021 Daehwan Kim

Instructions

Installation

Download R

Download R Studio

Download Rtools

Download package in R

Basic structure of the Data

Used in

Datafile needs to follow the following format

Care for Capitals: Sample, Multilevel, Group

Used in

Univariate statistics

Adjustable parameters

Plots

Adjustable parameters

Scaling & Transformation

Adjustable parameters

Normality check

Scheirer–Ray–Hare Test

Adjustable parameters

Volcano plot

Adjustable parameters

Heatmap

Adjustable parameters

Multivariate statistics

PERMANOVA

Single factor

Multiple Factors

Adjustable parameters

NMDS

Adjustable parameters

PCA

Adjustable parameters

PCoA

Adjustable parameters

About

Topics

Resources

License

Code of conduct

Stars

Watchers

Forks

Languages