/
boncat_activity_evaluation.Rmd
157 lines (135 loc) · 6.21 KB
/
boncat_activity_evaluation.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
---
title: "BONCAT Activity Evaluation"
author: "A. Pasulka, S. Kopf"
date: "`r Sys.Date()`"
output: html_document
---
```{r "load libraries", echo = FALSE, message=FALSE, warning=FALSE}
library(tidyverse)
library(knitr)
source("boncat_activity_functions.R")
opts_chunk$set(dev=c("png", "pdf"), dev.args=list(pdf = list(encoding="WinAnsi", useDingbats=FALSE)),
fig.keep="all", fig.path = file.path("plot", "BONCAT_activity_"))
```
# Activity evaluation
What is the probability that sample from pos. > sample from neg? Use function `calculate_active_cells` in *boncat_activity_functions.R* file.
## Testing with simulated data
```{r}
# based on actual virus data parameters
load(file = file.path("data", "boncat_params.RData"))
```
Model data with a specific level of activity and see how well the activity metric does in terms of getting back out the original activity.
```{r}
# negative data set to start with
load(file = file.path("data", "boncat_data.RData"))
neg_data <- filter(boncat_all, dataset == "T7", grepl("Neg", variable))$value
# simulated result
eval_result <- expand.grid(
approx_sim_active = c(5, 25, 50, 75, 99),
n_particles = c(100, 1000, 10000),
dRG_per_sub = c(0.01, 0.03, 0.05), # single substitution dRG increase, estimate from T7 (0.049)
dRG_per_sub_range = c(0, 20, 60), # what is the +- width of the R/G signal (in percentage)
dRG_eval_noise = c(0, 0.01), # dRG random noise (uniform distribution) for evaluating actual measurements
avg_subs = c(1, 5, 12) # avg number substitutions
) %>%
group_by_(.dots = names(.)) %>%
do(with(., {
neg <- sample(neg_data, size = n_particles)
pos <- sample(neg_data, size = n_particles)
n_active <- floor(approx_sim_active/100*n_particles)
# simulation data
sim_data <- generate_activity_distribution(pos, n_active = n_active, avg_subs = avg_subs,
dRG_per_sub = dRG_per_sub, dRG_per_sub_range = dRG_per_sub_range)
# evaluate outcome
active_cells <- calculate_active_cells(sim_data, neg_data,
a.noise = function(n) runif(n, -dRG_eval_noise, +dRG_eval_noise),
b.noise = function(n) runif(n, -dRG_eval_noise, +dRG_eval_noise))*100
data_frame(
true_sim_active = n_active/n_particles*100,
calc_active = active_cells['active'],
lci = active_cells['lci'],
uci = active_cells['uci']
)
})) %>% ungroup()
eval_result %>% knitr::kable(d=3)
```
#### Visualize evaluation result
```{r "activity_estimate_accuracy_precision", fig.width = 10, fig.height = 8}
# plot outcome
eval_plot_df <- eval_result %>%
arrange(n_particles) %>%
mutate(
dRG_per_sub = paste0("dRG/sub: ", dRG_per_sub),
dRG_per_sub_range = paste0(dRG_per_sub_range, "%"),
dRG_eval_noise = paste0("dRG noise: ", dRG_eval_noise),
avg_subs = sprintf("# subs: %.2d", avg_subs),
active = sprintf("%.2d%% active",approx_sim_active))
base_plot <-
ggplot() +
aes(x = factor(approx_sim_active),
y = calc_active, shape = factor(n_particles), fill = dRG_per_sub_range, color = dRG_per_sub_range) +
geom_hline(data = function(df)
df %>% select(dRG_per_sub, avg_subs, active, approx_sim_active, dRG_per_sub_range) %>% unique(),
map = aes(yintercept = approx_sim_active, color = NULL)) +
geom_errorbar(position=position_dodge(width = 1), map = aes(ymin = lci, ymax = uci), width = 0.5) +
geom_point(position=position_dodge(width = 1), size = 3, color = "black") +
labs(x = "simulated % active", y = "calculated % active", shape = "# particles measured",
fill = "dRG variation [+/-]", color = "dRG variation [+/-]") +
scale_shape_manual("# particles", values = 21:25) +
coord_cartesian(ylim = c(0, 100)) +
facet_grid(avg_subs~dRG_per_sub, scales = "free_x") +
theme_bw() +
guides(fill = guide_legend(override.aes = list(colour = "white", size=8, shape = 22)),
shape = guide_legend(override.aes = list(fill = "gray")))
base_plot %+% filter(eval_plot_df, grepl("0$", dRG_eval_noise)) + labs(title = "no R/G noise")
```
```{r "activity_estimate_accuracy_precision_with_noise", fig.width = 10, fig.height = 8}
base_plot %+% filter(eval_plot_df, grepl("0.01$", dRG_eval_noise)) + labs(title = "systematic R/G noise up to 0.01")
```
## Application to real data
```{r}
dRG_eval_noise <- 0.01
boncat_all %>% group_by(dataset) %>%
do({
active_cells <- calculate_active_cells(filter(., grepl("Pos", variable))$value, filter(., grepl("Neg", variable))$value)*100
active_cells_with_noise <- calculate_active_cells(
filter(., grepl("Pos", variable))$value, filter(., grepl("Neg", variable))$value,
a.noise = function(n) runif(n, -dRG_eval_noise, +dRG_eval_noise),
b.noise = function(n) runif(n, -dRG_eval_noise, +dRG_eval_noise))*100
data_frame(
est_active = active_cells['active'],
lci = active_cells['lci'],
uci = active_cells['uci'],
est_active_w_noise = active_cells_with_noise['active'],
lci_w_noise = active_cells_with_noise['lci'],
uci_w_noise = active_cells_with_noise['uci']
)
}) %>% knitr::kable(d = 2)
```
#### Further breakdown (all different replicate conditions)
```{r}
dRG_eval_noise <- 0.01
full_join(
boncat_all %>% select(dataset, variable) %>% unique() %>% rename(var_a = variable),
boncat_all %>% select(dataset, variable) %>% unique() %>% rename(var_b = variable),
by = "dataset"
) %>%
filter(var_a != var_b) %>%
group_by(dataset, var_a, var_b) %>%
do({
cond_a <- filter(boncat_all, dataset == .$dataset[1], variable == .$var_a[1])$value
cond_b <- filter(boncat_all, dataset == .$dataset[1], variable == .$var_b[1])$value
active_cells <- calculate_active_cells(cond_a, cond_b)*100
active_cells_with_noise <- calculate_active_cells(cond_a, cond_b,
a.noise = function(n) runif(n, -dRG_eval_noise, +dRG_eval_noise),
b.noise = function(n) runif(n, -dRG_eval_noise, +dRG_eval_noise))*100
data_frame(
est_active = active_cells['active'],
lci = active_cells['lci'],
uci = active_cells['uci'],
est_active_w_noise = active_cells_with_noise['active'],
lci_w_noise = active_cells_with_noise['lci'],
uci_w_noise = active_cells_with_noise['uci']
)
})%>% knitr::kable(d = 4)
```