Skip to content

Commit

Permalink
Merge pull request #14 from Corey-Bryant/Corey-Dev
Browse files Browse the repository at this point in the history
Added new HTML out style and new CSV output style
  • Loading branch information
Corey-Bryant committed Oct 22, 2021
2 parents 700766b + 73e86a7 commit 37bd9f8
Show file tree
Hide file tree
Showing 4 changed files with 276 additions and 31 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: approxmapR
Type: Package
Title: ApproxMAP (APPROXimate Multiple Alignment Pattern)
Version: 1.6.5
Version: 1.6.7
Author: Corey Bryant, Gurudev Ilngovan, and Hye Chung Kum.
Maintainer: The package maintainer <yourself@somewhere.net>
Description: ApproxMAP (APPROXimate Multiple Alignment Pattern). Is an efficient and effective mining algorithm which finds maximal approximate sequential patterns.
Expand Down
203 changes: 174 additions & 29 deletions R/reports.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,25 @@ file_check <- function(dir = ".", file_name) {

#' @export
generate_reports <- function(w_sequence_dataframe,
sil_table = NULL,
html_format = TRUE,
# truncate_patterns = FALSE,
output_directory = "~",
end_filename_with = "",
sequence_analysis_details = NULL,
sequence_analysis_details_definitions = NULL,
algorithm_comparison = FALSE) {
stopifnot("W_Sequence_Dataframe" %in% class(w_sequence_dataframe))

if (!is.null(sil_table)) {

if (!identical(names(sil_table), c("id", "cluster", "neighbor", "sil_width"))) {

stop("Error: Columns must be in the following order 'id', 'cluster', 'neighbor', 'sil_width'")

}
}

if (!is.null(sequence_analysis_details)) {
if (!"list" %in% class(sequence_analysis_details)) {
stop("Error: sequence_analysis_details must be a list class.")
Expand Down Expand Up @@ -80,10 +91,18 @@ generate_reports <- function(w_sequence_dataframe,


# Checking which report file structure to be used
if (!is.null(sequence_analysis_details)) {
if (!is.null(sequence_analysis_details) & is.null(sequence_analysis_details_definitions)) {

report_rmd <- system.file("rmd_w_sequence_analysis_details.Rmd", package = "approxmapR")

} else if (!is.null(sequence_analysis_details) & !is.null(sequence_analysis_details_definitions)) {

report_rmd <- system.file("rmd_w_sequence_analysis_details_definitions.Rmd", package = "approxmapR")

} else {

report_rmd <- system.file("rmd_w_sequence.Rmd", package = "approxmapR")

}


Expand Down Expand Up @@ -125,14 +144,20 @@ generate_reports <- function(w_sequence_dataframe,
) %>%
select(-ends_with("_pattern"), -weighted_sequence, -df_sequences)


if (!is.null(sequence_analysis_details_definitions)) {
event_defs <- subset(sequence_analysis_details_definitions, event %in% unique(str_split(df_unique_items$sequence, ", ") %>% unlist()))
}


formatted <-
formatted %>%
bind_rows(formatted_trunc) %>%
arrange(cluster, pattern) %>%
bind_rows(df_unique_items) %>%
arrange(cluster)

if (!is.null(sequence_analysis_details)) {
if (!is.null(sequence_analysis_details) & is.null(sequence_analysis_details_definitions)) {
rmarkdown::render(
report_rmd,
params = append(list(input = formatted,
Expand All @@ -141,6 +166,18 @@ generate_reports <- function(w_sequence_dataframe,
output_file = file_check(output_directory_private, paste0("all_sequences", end_filename_with, ".html")),
output_dir = output_directory_private
)

} else if (!is.null(sequence_analysis_details) & !is.null(sequence_analysis_details_definitions)) {

rmarkdown::render(
report_rmd,
params = append(list(input = formatted,
title = "All Sequences",
event_definitions = event_defs), sequence_analysis_details),
#output_file = file_check(output_directory_private, "all_sequences.html"),
output_file = file_check(output_directory_private, paste0("all_sequences", end_filename_with, ".html")),
output_dir = output_directory_private
)
} else {
rmarkdown::render(
report_rmd,
Expand Down Expand Up @@ -207,23 +244,70 @@ generate_reports <- function(w_sequence_dataframe,

message("saving alignments...")

w_sequence_dataframe %>%
save_alignment(save_date=FALSE, algorithm_comparison = algorithm_comparison) %>%
write_file(paste0(
output_directory_private,
"/",
#file_check(output_directory_private, "alignments.csv")
file_check(output_directory_private, paste0("alignments", end_filename_with, ".csv"))
))
if (!is.null(sil_table)) {

sil_table <- sil_table %>% group_by(cluster) %>% mutate(cluster_average = mean(sil_width))
sil_table$grand_avg_sil <- mean(sil_table$sil_width)
names(sil_table) <- c("id2", "cluster", "neighbor", "sil_width", "cluster_average", "grand_avg_sil")

}


if (!is.null(sil_table)) {

paste0("Grand Silhouette Width", ", ", sil_table$grand_avg_sil[[1]], "\n", "\n",
"Cluster", ", ", "Cluster Silhoutte Width", ", ", "Neighbor Cluster", ", ", "id", ", ", "id Silhouette Width", ", ", "Sequence", "\n",
w_sequence_dataframe %>% save_alignment(save_date = FALSE, sil_table = sil_table, algorithm_comparison = algorithm_comparison)
) %>%
write_file(paste0(
output_directory_private,
"/",
#file_check(output_directory_private, "alignments_with_date.csv")
file_check(output_directory_private, paste0("alignments", end_filename_with, ".csv"))
))

} else {

w_sequence_dataframe %>%
save_alignment(save_date = FALSE, sil_table =sil_table, algorithm_comparison = algorithm_comparison) %>%
write_file(paste0(
output_directory_private,
"/",
#file_check(output_directory_private, "alignments.csv")
file_check(output_directory_private, paste0("alignments", end_filename_with, ".csv"))
))

}


if (!is.null(sil_table)) {

paste0("Grand Silhouette Width", ", ", sil_table$grand_avg_sil[[1]], "\n", "\n",
"Cluster", ", ", "Cluster Silhoutte Width", ", ", "Neighbor Cluster", ", ", "id", ", ", "id Silhouette Width", ", ", "Sequence", "\n",
w_sequence_dataframe %>% save_alignment(save_date = TRUE, sil_table = sil_table, algorithm_comparison = algorithm_comparison)
) %>%
write_file(paste0(
output_directory_private,
"/",
#file_check(output_directory_private, "alignments_with_date.csv")
file_check(output_directory_private, paste0("alignments_with_date", end_filename_with, ".csv"))
))

} else {

w_sequence_dataframe %>%
save_alignment(save_date = TRUE, sil_table = sil_table, algorithm_comparison = algorithm_comparison) %>%
write_file(paste0(
output_directory_private,
"/",
#file_check(output_directory_private, "alignments_with_date.csv")
file_check(output_directory_private, paste0("alignments_with_date", end_filename_with, ".csv"))
))

}



w_sequence_dataframe %>%
save_alignment(save_date=TRUE, algorithm_comparison = algorithm_comparison) %>%
write_file(paste0(
output_directory_private,
"/",
#file_check(output_directory_private, "alignments_with_date.csv")
file_check(output_directory_private, paste0("alignments_with_date", end_filename_with, ".csv"))
))



Expand Down Expand Up @@ -476,7 +560,7 @@ save_alignment.Sequence <- function(sequence) {
paste0(collapse = ", ")
}

save_alignment.Sequence_List <- function(alignment, save_date=TRUE, algorithm_comparison = algorithm_comparison) {
save_alignment.Sequence_List <- function(alignment, save_date=TRUE, sil_table = NULL, algorithm_comparison = algorithm_comparison) {
map2_chr(alignment, names(alignment), function(seq, id) {

if (algorithm_comparison) {
Expand All @@ -487,14 +571,41 @@ save_alignment.Sequence_List <- function(alignment, save_date=TRUE, algorithm_co

} else {

seqs <- str_c(id, ", ", save_alignment(seq), "\n")
if (!is.null(sil_table)) {

seqs <- str_c((filter(sil_table, id2 == id))$cluster, ", ",
(filter(sil_table, id2 == id))$cluster_average, ", ",
(filter(sil_table, id2 == id))$neighbor, ", ",
id, ", ",
(filter(sil_table, id2 == id))$sil_width, ", ",
save_alignment(seq), "\n")

} else {

seqs <- str_c(id, ", ", save_alignment(seq), "\n")

}

}

if(save_date) {
if (save_date == TRUE) {

if (!is.null(sil_table)) {

dates <- str_c((filter(sil_table, id2 == id))$cluster, ", ",
(filter(sil_table, id2 == id))$cluster_average, ", ",
(filter(sil_table, id2 == id))$neighbor, ", ",
id, ", ",
(filter(sil_table, id2 == id))$sil_width, ", ",
align_date_to_seq(id, seq), "\n")
paste0(seqs, dates)

} else {

dates <- str_c(id, ", ", align_date_to_seq(id, seq), "\n")
paste0(seqs, dates)
dates <- str_c(id, ", ", align_date_to_seq(id, seq), "\n")
paste0(seqs, dates)

}

} else {

Expand All @@ -506,11 +617,46 @@ save_alignment.Sequence_List <- function(alignment, save_date=TRUE, algorithm_co
str_c(collapse = "")
}

save_alignment.W_Sequence_Dataframe <- function(df, ...) {
map2_chr(df$cluster, df$weighted_sequence, function(c, seq) {
str_c("Cluster ", c, "\n", save_alignment(attr(seq, "alignments"), ...))
}) %>%
str_c(collapse = "\n")



save_alignment.W_Sequence_Dataframe <- function(df, sil_table, ...) {

if (!is.null(sil_table)) {

map2_chr(df$cluster, df$weighted_sequence, function(c, seq) {

str_c(save_alignment(attr(seq, "alignments"), sil_table = sil_table, ...))
}) %>% str_c(collapse = "")

} else {

map2_chr(df$cluster, df$weighted_sequence, function(c, seq) {
str_c("Cluster ", c, "\n", save_alignment(attr(seq, "alignments"), ...))
}) %>% str_c(collapse = "\n")
}
}



save_alignment.W_Sequence_Dataframe2 <- function(df, sil_table, ...) {

if (!is.null(sil_table)) {

map2_chr(df$cluster, df$weighted_sequence, function(c, seq) {


str_c("Grand Silhouette Width", ", ", sil_table$grand_avg_sil[[1]], "\n", "\n",
"Cluster", ", ", "Cluster Silhoutte Width", ", ", "Neighbor Cluster", ", ", "id", ", ", "id Silhouette Width", ", ", "Sequence", "\n",
save_alignment(attr(seq, "alignments"), sil_table = sil_table, ...))
}) %>% str_c(collapse = "")

} else {

map2_chr(df$cluster, df$weighted_sequence, function(c, seq) {
str_c("Cluster ", c, "\n", save_alignment(attr(seq, "alignments"), ...))
}) %>% str_c(collapse = "\n")
}
}


Expand Down Expand Up @@ -545,5 +691,4 @@ algorithm_comparison <- function(formatted1, formatted1_pars = "No Pars1",


)

}
93 changes: 93 additions & 0 deletions inst/rmd_w_sequence_analysis_details_definitions.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
---
output:
html_document:
css: priority.css
params:
input: NA
title: NA
algorithm: NA
k_value: NA
time_period: NA
consensus_threshold: NA
cluster_n: NA
notes: NA
event_definitions: NA
---

---
title: `r params$title`
---

The current sequential analysis was conducted using the ApproxMAP algorithm with the following parameters:

* Clustering algorithm: `r params$algorithm`
* K value: `r params$k_value`
* Number of clusters: `r params$cluster_n`
* Grouping time period: `r params$time_period`
* Consensus pattern threshold: `r params$consensus_threshold`
* Any special notes: `r params$notes`






```{r, echo=FALSE, out.width='100%'}
library(knitr)
library(kableExtra)
kable(params$event_definitions, "html", escape = F, align = c("c","c")) %>%
kable_styling(bootstrap_options = "responsive")
perc <- c(1, (1:10)*10)
wseq <-
perc %>%
map(function(x){
a <- list()
a$elements <- paste0(x,"%")
a$element_weights <- x
a$itemset_weights <- x
class_it(a, "W_Sequence_Itemset")
}) %>%
class_it("percentage_bar")
attr(wseq, "n") <- 100
wseq %>% view_formatted_sequence()
df <- params$input
rn <-
df %>%
mutate(rn = row_number()) %>%
filter(cluster%%2 == 0) %>%
pull(rn)
if(nrow(df) > 0){
if(ncol(df) == 5){
kable(df, "html", escape = F, align = c("c","c","c","c","l")) %>%
kable_styling(bootstrap_options = "responsive") %>%
row_spec(rn, background = "#f2f2f2") %>%
column_spec(1, width = "3%") %>%
column_spec(2, width = "3%") %>%
column_spec(3, width = "4%") %>%
column_spec(4, width = "10%") %>%
column_spec(5, width = "80%")
} else {
kable(df, "html", escape = F, align = c("c","c","c","l")) %>%
kable_styling(position = "left") %>%
row_spec(rn, background = "#f2f2f2") %>%
column_spec(1, width = "3%") %>%
column_spec(2, width = "3%") %>%
column_spec(3, width = "4%") %>%
column_spec(4, width = "90%")
}
} else {
cat("Empty Dataframe")
}
```

0 comments on commit 37bd9f8

Please sign in to comment.