forked from 5harad/fasttt
/
pipeline.R
105 lines (90 loc) · 1.96 KB
/
pipeline.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
library(tidyverse)
setClass(
"Pipeline",
representation(
data = "tbl_df",
metadata = "tbl_df"
),
prototype(
data = tibble(),
metadata = tribble(
~action,
~reason,
~result,
~details,
~nrows,
~prop,
~prop_prev
)
)
)
setGeneric(
"add_decision",
function(
pipeline,
action,
reason,
result,
details = list()
) {
standardGeneric("add_decision")
}
)
setGeneric("init", function(pipeline, data) { standardGeneric("init") })
setMethod(
"add_decision",
signature("Pipeline"),
function(
pipeline,
action,
reason,
result,
details = list()
) {
nrows <- nrow(pipeline@data)
prop <- 1.0
prop_prev <- 1.0
n <- nrow(pipeline@metadata)
if (n > 1) {
starting_nrows <- slice(pipeline@metadata, 1) %>% pull(nrows)
prev_nrows <- slice(pipeline@metadata, n) %>% pull(nrows)
prop <- nrows / starting_nrows
prop_prev <- nrows / prev_nrows
if (prev_nrows == 0)
prop_prev <- 0.0
}
# NOTE: add_row doesn't work with list entries; it only takes the last key
pipeline@metadata %<>%
bind_rows(tribble(
~action, ~reason, ~result, ~details, ~nrows, ~prop, ~prop_prev,
action, reason, result, details, nrows, prop, prop_prev
))
pipeline
}
)
setMethod(
"init",
signature("Pipeline"),
function(pipeline, data) {
pipeline@data <- data
add_decision(pipeline, "initialize", "none", "no change")
}
)
# Example
# p <- init(new("Pipeline"), as_tibble(iris))
# p@data <- select(p@data, -Species)
# p <-
# add_decision(
# p,
# "remove Species",
# "because that feature is poorly recorded",
# "resulting in 1 fewer predictor",
# list(
# remaining_columns = colnames(p@data),
# dropped_column = "Species",
# other = "for fun"
# )
# )
# library(knitr)
# library(kableExtra)
# kable(p@metadata, "latex") %>% column_spec(1:4, width = "10em")