/
isotope_analysis.Rmd
268 lines (239 loc) · 10.2 KB
/
isotope_analysis.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
---
title: "Analysis: Isotope Fractionation"
author: "Silverman et al."
date: "`r format(Sys.Date(), '%d %b %Y')`"
output:
html_document:
css: stylesheet.css
fig_caption: yes
number_sections: yes
toc: yes
toc_float: true
toc_depth: 3
code_folding: show
df_print: paged
subtitle: "Source file: [SI GitHub Repository](http://www.github.com/KopfLab/2018_Silverman_et_al/) / [heterocyst_analysis.Rmd](http://www.github.com/KopfLab/2018_Silverman_et_al/blob/master/isotope_analysis.Rmd)"
editor_options:
chunk_output_type: inline
---
```{r setup, echo = FALSE, message=FALSE, warning=FALSE}
library(tidyverse) # dplyr, tidyr, ggplot
library(readxl) # reading excel file
library(openxlsx) # writing excel file
library(knitr) # generating reports
library(latex2exp) # for latex plot labels
library(boot) # bootstrap sample means and key calculations
library(broom) # efficient retrieval of model parameters
source(file.path("lib", "functions.R"))
opts_chunk$set(
dev=c("png", "pdf"), dev.args=list(pdf = list(encoding="WinAnsi", useDingbats=FALSE)),
fig.keep="all", fig.path=file.path("figures", "2018_Silverman_et_al-"))
```
# Load Data
```{r}
isotope_data <- read_excel(file.path("data", "2018_Silverman_et_al-isotope_data.xlsx"))
growth_rate_data <- read_excel(file.path("data", "2018_Silverman_et_al-growth_rate_data.xlsx"))
heterocyst_data_summary <- read_excel(file.path("data", "2018_Silverman_et_al-heterocyst_data_summary.xlsx"))
```
# Isotope data
## SI plot with epsilons from all biological replicates
```{r SI_all_epsilons, fig.width = 8, fig.height = 6, warning=FALSE}
isotope_data %>%
# overall plot aesthetics
ggplot(aes(x = pN2, shape = organism, fill = organism, y = eps)) +
# all data points with analytical error
geom_errorbar(map = aes(ymin = eps - eps_sigma_analytical, ymax = eps + eps_sigma_analytical, color = organism), width = 0.01, alpha = 0.4) +
geom_point(map = aes(y = eps), alpha = 0.4, size = 3, color = "black") +
# averages
geom_point(data = function(df) group_by(df, organism, pN2) %>% summarize(eps = mean(eps)), color = "black", size = 6) +
# scales
scale_shape_manual("Species", values = c(21:26)) +
scale_fill_manual("Species", values = cbPalette) +
scale_color_manual("Species", values = cbPalette) +
# labels
labs(y = TeX("$^{15}\\epsilon_{\\,\\frac{N_{org}}{N_{2,gas}}}$ \\[\U2030\\]"), x = TeX("$pN_{2}$ \\[bar\\]")) +
# theme
theme_figure() +
theme(legend.position = c(0.825, 0.830), legend.background = element_rect(size = 0.8, color = "black"))
```
## Main plot with bootstrapped means
```{r}
n_bootstrap <- 1000
isotope_data_summary <- isotope_data %>%
nest(-organism, -pN2) %>%
mutate(
# analytical precision
sigma_analytical = map_dbl(data, ~mean(.x$eps_sigma_analytical)),
# bootstrap mean and standard error of the mean
n_bio_reps = map_int(data, nrow),
bootstrap = map(data, ~boot(data = .x$eps, statistic = function(x, idx) mean(x[idx]), R = n_bootstrap)),
eps_mean = map_dbl(bootstrap, ~mean(.x$t)),
eps_mean_se = map_dbl(bootstrap, ~sd(.x$t))
)
# save data summary and data table
isotope_data_summary %>% select(-data, -bootstrap) %>%
write.xlsx(file.path("data", "2018_Silverman_et_al-isotope_data_summary.xlsx"))
isotope_data_table <-
isotope_data_summary %>%
mutate(
pN2 = table_round(pN2, n_decs = 1),
n_bio_reps = table_round(n_bio_reps, n_decs = 0),
eps_mean = table_format_with_err(eps_mean, eps_mean_se, sig = 2, max = 2),
sigma_analytical = table_format_err(sigma_analytical, sig = 1)
) %>%
select(organism, pN2, n_bio_reps, eps_mean, sigma_analytical)
isotope_data_table %>% export_data_table("isotope_numbers.xlsx")
isotope_data_table
```
```{r figure_epsilons, fig.width = 8, fig.height = 6, warning=FALSE}
isotope_data_summary %>%
# overall plot aesthetics
ggplot(aes(x = pN2, shape = organism)) +
# 1 sigma error bars for the analytical precision
geom_errorbar(map = aes(y = eps_mean, ymin = eps_mean - sigma_analytical, ymax = eps_mean + sigma_analytical), width = 0, linetype = 4) +
# 1 S.E. error bars of the bootstrapped biological means
geom_errorbar(map = aes(y = eps_mean, ymin = eps_mean - eps_mean_se, ymax = eps_mean + eps_mean_se), width = 0.02) +
# bootstrapped means
geom_point(map = aes(y = eps_mean), fill = "#999999", color = "black", size = 6) +
# scales
scale_shape_manual("Species", values = c(21:26)) +
# labels
labs(y = TeX("$^{15}\\epsilon_{\\,\\frac{N_{org}}{N_{2,gas}}}$ \\[\U2030\\]"), x = TeX("$pN_{2}$ \\[bar\\]")) +
# theme
theme_figure() +
theme(legend.position = c(0.815, 0.88), legend.background = element_rect(size = 0.8, color = "black"))
```
# Isotope model
## Calculate relevant aqueous/gas fractionation
```{r SI_gas_aq_equilibrium, fig.width = 8, fig.height = 6, warning=FALSE}
# literature data
eq_data <- read_excel(file.path("data", "2018_Silverman_et_al-isotope_literature_data.xlsx"),
sheet = "gas liquid equilibrium")
# calculate regression parameters and estimate eps aq/gas at exp. temperature
eq_regression <- eq_data %>%
nest() %>%
mutate(
# bootstrap slope and intercept from regression model
bootstrap = map(data, ~boot(data = .x, R = n_bootstrap,
statistic = function (x, idx) {
# model fit for resampled data
m <- lm(eps_aq_gas ~ T.C, data = x[idx,])
intercept <- tidy(m)$estimate[1]
slope <- tidy(m)$estimate[2]
return(c(intercept, slope))
})),
intercept = map_dbl(bootstrap, ~mean(.x$t[,1])),
intercept_se = map_dbl(bootstrap, ~sd(.x$t[,1])),
slope = map_dbl(bootstrap, ~mean(.x$t[,2])),
slope_se = map_dbl(bootstrap, ~sd(.x$t[,2]))
) %>%
# calculate at experimental temperature
mutate(
T.C = 27,
eps_aq_gas = intercept + T.C * slope,
eps_aq_gas_se = sqrt(intercept_se^2 + (T.C*slope_se)^2)
)
# display
eq_regression %>% select(-data, -bootstrap)
# visualize data
eq_data %>%
ggplot(aes(x = T.C, y = eps_aq_gas, color = source)) +
# linear fit
geom_smooth(method = "lm", map = aes(color = NULL), color = "black", se = FALSE, fullrange = TRUE) +
# data points
geom_point(size = 4) +
# point at experimental conditions
geom_errorbar(data = eq_regression, color = "black",
map = aes(ymin = eps_aq_gas - eps_aq_gas_se, ymax = eps_aq_gas + eps_aq_gas_se)) +
geom_point(data = eq_regression, size = 6, shape = 18, color = "black") +
# scales
scale_x_continuous(expand = c(0, 0)) +
expand_limits(y = 0, x = c(0, 30)) +
# labels
labs(y = TeX("$^{15}\\epsilon_{\\,\\frac{N_{2,aq}}{N_{2,gas}}}$ \\[\U2030\\]"), x = "Temperature [C]") +
# theme
theme_figure(legend = FALSE, grid = TRUE)
```
## Determine model parameters
```{r}
# model data
model_data <-
isotope_data_summary %>%
filter(organism == "A. cylindrica") %>%
unnest(data) %>%
# add in growth rate information
left_join(growth_rate_data, by = c("pN2", "organism")) %>%
# add in mean heterocyst distances from corresponding data points
left_join(
heterocyst_data_summary %>% filter(growth_phase == "stationary"),
by = c("pN2", "organism")) %>%
mutate(
# calculate the combination of dependent variables for the regression
x = growth_rate.day * n_cbh_mean / pN2,
# standard error propagation for plotting x errorbars (assuming pN2 error to be minimal)
x_se = abs(x) * sqrt((growth_rate_se.day/growth_rate.day)^2 + (n_cbh_mean_se/n_cbh_mean)^2)
)
# bootstrap model
model <-
model_data %>%
nest(-organism) %>%
# add gas/aqueous equilibrium fractionation
merge(select(eq_regression, eps_aq_gas, eps_aq_gas_se)) %>%
as_data_frame() %>%
mutate(
# bootstrap intercept from regression model
bootstrap = map(data, ~boot(data = .x, R = n_bootstrap,
statistic = function (data, idx) {
# model fit for resampled data
m <- lm(eps ~ x, data = data[idx, ])
r2 <- glance(m)$r.squared
intercept <- tidy(m)$estimate[1]
return(c(intercept, r2))
})),
# intercept
intercept = map_dbl(bootstrap, ~mean(.x$t[,1])),
intercept_se = map_dbl(bootstrap, ~sd(.x$t[,1])),
# nitrogenase fractionation factor (eps_fix)
eps_fix = -intercept + eps_aq_gas,
eps_fix_se = sqrt(intercept_se^2 + eps_aq_gas_se^2),
r2 = map_dbl(bootstrap, ~mean(.x$t[,2]))
) %>% select(-bootstrap)
# save and display
model %>% select(-data) %>% write.xlsx(file.path("data", "2018_Silverman_et_al-isotope_model.xlsx"))
model %>% select(-data)
```
## Visualize isotope model
```{r figure_isotope_model, fig.width = 6, fig.height = 6, warning=FALSE}
model %>%
# unnest
unnest(data) %>%
# select averages for plotting
select(x, x_se, eps_mean, eps_mean_se, intercept, intercept_se) %>% unique() %>%
ggplot(aes(x = x, y = eps_mean)) +
# intercept (eps ag/g - eps fix estimate)
geom_rect(map = aes(ymin = intercept - intercept_se,
ymax = intercept + intercept_se,
xmin = -Inf, xmax = Inf), fill = "light grey") +
geom_hline(map = aes(yintercept = intercept), linetype = 2) +
# intercept text label
geom_text(data = function(df) select(df, intercept, intercept_se) %>% unique(),
map = aes(x = 63, y = intercept + intercept_se,
label = TeX("$=\\epsilon_{\\,\\frac{aq}{g}}-\\epsilon_{fix}$") %>%
as.character()),
parse = TRUE, hjust = 1.05, vjust = -0.2, size = 8) +
# linear fit
geom_smooth(method = "lm", map = aes(color = NULL), color = "black", se = FALSE, fullrange = TRUE) +
# 1 sigma error bars for averages
geom_errorbarh(map = aes(y = eps_mean, xmin = x - x_se, xmax = x + x_se), height = 0.03, alpha = 0.8) +
geom_errorbar(map = aes(ymin = eps_mean - eps_mean_se, ymax = eps_mean + eps_mean_se), width = 1, alpha = 0.8) +
# data points for average
geom_point(shape = 21, fill = "#999999", color = "black", size = 5) +
# scales
scale_x_continuous(expand = c(0, 0), breaks = (0:6)*10) + expand_limits(x=0) +
scale_y_continuous(breaks = c(0:20)*-0.2) +
# labels
labs(y = TeX("$^{15}\\epsilon_{\\,\\frac{N_{org}}{N_{2,gas}}}$ \\[\U2030\\]"),
x = TeX("$\\frac{\\mu \\cdot n_{cbh}}{pN_2}\\,\\left[\\frac{cells}{day\\cdot bar}\\right]$")) +
# theme
theme_figure(legend = FALSE, grid = TRUE)
```