isotope_analysis.Rmd

---
title: "Analysis: Isotope Fractionation"
author: "Silverman et al."
date: "`r format(Sys.Date(), '%d %b %Y')`"
output:
  html_document: 
    css: stylesheet.css
    fig_caption: yes
    number_sections: yes
    toc: yes
    toc_float: true
    toc_depth: 3
    code_folding: show
    df_print: paged
subtitle: "Source file: [SI GitHub Repository](http://www.github.com/KopfLab/2018_Silverman_et_al/) / [heterocyst_analysis.Rmd](http://www.github.com/KopfLab/2018_Silverman_et_al/blob/master/isotope_analysis.Rmd)"
editor_options:
  chunk_output_type: inline
---


```{r setup, echo = FALSE, message=FALSE, warning=FALSE}
library(tidyverse) # dplyr, tidyr, ggplot
library(readxl) # reading excel file
library(openxlsx) # writing excel file
library(knitr) # generating reports
library(latex2exp) # for latex plot labels
library(boot) # bootstrap sample means and key calculations
library(broom) # efficient retrieval of model parameters
source(file.path("lib", "functions.R"))
opts_chunk$set(
  dev=c("png", "pdf"), dev.args=list(pdf = list(encoding="WinAnsi", useDingbats=FALSE)),
  fig.keep="all", fig.path=file.path("figures", "2018_Silverman_et_al-"))
```

# Load Data

```{r}
isotope_data <- read_excel(file.path("data", "2018_Silverman_et_al-isotope_data.xlsx"))
growth_rate_data <- read_excel(file.path("data", "2018_Silverman_et_al-growth_rate_data.xlsx"))
heterocyst_data_summary <- read_excel(file.path("data", "2018_Silverman_et_al-heterocyst_data_summary.xlsx"))
```

# Isotope data

## SI plot with epsilons from all biological replicates

```{r SI_all_epsilons, fig.width = 8, fig.height = 6, warning=FALSE}
isotope_data %>% 
  # overall plot aesthetics
  ggplot(aes(x = pN2, shape = organism, fill = organism, y = eps)) + 
  # all data points with analytical error
  geom_errorbar(map = aes(ymin = eps - eps_sigma_analytical, ymax = eps + eps_sigma_analytical, color = organism), width = 0.01, alpha = 0.4) +
  geom_point(map = aes(y = eps), alpha = 0.4, size = 3, color = "black") + 
  # averages
  geom_point(data = function(df) group_by(df, organism, pN2) %>% summarize(eps = mean(eps)), color = "black", size = 6) +
  # scales
  scale_shape_manual("Species", values = c(21:26)) +
  scale_fill_manual("Species", values = cbPalette) +
  scale_color_manual("Species", values = cbPalette) +
  # labels
  labs(y = TeX("$^{15}\\epsilon_{\\,\\frac{N_{org}}{N_{2,gas}}}$ \\[\U2030\\]"), x = TeX("$pN_{2}$ \\[bar\\]")) + 
  # theme
  theme_figure() +
  theme(legend.position = c(0.825, 0.830), legend.background = element_rect(size = 0.8, color = "black"))
```

## Main plot with bootstrapped means

```{r}
n_bootstrap <- 1000
isotope_data_summary <- isotope_data %>% 
  nest(-organism, -pN2) %>% 
  mutate(
    # analytical precision
    sigma_analytical = map_dbl(data, ~mean(.x$eps_sigma_analytical)),
    # bootstrap mean and standard error of the mean
    n_bio_reps = map_int(data, nrow),
    bootstrap = map(data, ~boot(data = .x$eps, statistic = function(x, idx) mean(x[idx]), R = n_bootstrap)),
    eps_mean = map_dbl(bootstrap, ~mean(.x$t)),
    eps_mean_se = map_dbl(bootstrap, ~sd(.x$t))
  )

# save data summary and data table
isotope_data_summary %>% select(-data, -bootstrap) %>%
  write.xlsx(file.path("data", "2018_Silverman_et_al-isotope_data_summary.xlsx"))

isotope_data_table <-
  isotope_data_summary %>% 
  mutate(
    pN2 = table_round(pN2, n_decs = 1),
    n_bio_reps = table_round(n_bio_reps, n_decs = 0),
    eps_mean = table_format_with_err(eps_mean, eps_mean_se, sig = 2, max = 2),
    sigma_analytical = table_format_err(sigma_analytical, sig = 1)
  ) %>% 
  select(organism, pN2, n_bio_reps, eps_mean, sigma_analytical)
isotope_data_table %>% export_data_table("isotope_numbers.xlsx")

isotope_data_table
```


```{r figure_epsilons, fig.width = 8, fig.height = 6, warning=FALSE}
isotope_data_summary %>% 
  # overall plot aesthetics
  ggplot(aes(x = pN2, shape = organism)) + 
  # 1 sigma error bars for the analytical precision
  geom_errorbar(map = aes(y = eps_mean, ymin = eps_mean - sigma_analytical, ymax = eps_mean + sigma_analytical), width = 0, linetype = 4) +
  # 1 S.E. error bars of the bootstrapped biological means
  geom_errorbar(map = aes(y = eps_mean, ymin = eps_mean - eps_mean_se, ymax = eps_mean + eps_mean_se), width = 0.02) +
  # bootstrapped means
  geom_point(map = aes(y = eps_mean), fill = "#999999", color = "black", size = 6) +
  # scales
  scale_shape_manual("Species", values = c(21:26)) +
  # labels
  labs(y = TeX("$^{15}\\epsilon_{\\,\\frac{N_{org}}{N_{2,gas}}}$ \\[\U2030\\]"), x = TeX("$pN_{2}$ \\[bar\\]")) + 
  # theme
  theme_figure() +
  theme(legend.position = c(0.815, 0.88), legend.background = element_rect(size = 0.8, color = "black"))
```

# Isotope model

## Calculate relevant aqueous/gas fractionation

```{r SI_gas_aq_equilibrium, fig.width = 8, fig.height = 6, warning=FALSE}
# literature data
eq_data <- read_excel(file.path("data", "2018_Silverman_et_al-isotope_literature_data.xlsx"), 
                      sheet = "gas liquid equilibrium")

# calculate regression parameters and estimate eps aq/gas at exp. temperature
eq_regression <- eq_data %>% 
  nest() %>%
  mutate(
    # bootstrap slope and intercept from regression model
    bootstrap = map(data, ~boot(data = .x, R = n_bootstrap,
      statistic = function (x, idx) {
        # model fit for resampled data
        m <- lm(eps_aq_gas ~ T.C, data = x[idx,])
        intercept <- tidy(m)$estimate[1]
        slope <- tidy(m)$estimate[2]
        return(c(intercept, slope))
      })),
    intercept = map_dbl(bootstrap, ~mean(.x$t[,1])),
    intercept_se = map_dbl(bootstrap, ~sd(.x$t[,1])),
    slope = map_dbl(bootstrap, ~mean(.x$t[,2])),
    slope_se = map_dbl(bootstrap, ~sd(.x$t[,2]))
  ) %>% 
  # calculate at experimental temperature
  mutate(
    T.C = 27,
    eps_aq_gas = intercept + T.C * slope,
    eps_aq_gas_se = sqrt(intercept_se^2 + (T.C*slope_se)^2)
  )

# display
eq_regression %>% select(-data, -bootstrap)

# visualize data
eq_data %>% 
  ggplot(aes(x = T.C, y = eps_aq_gas, color = source)) +
  # linear fit
  geom_smooth(method = "lm", map = aes(color = NULL), color = "black", se = FALSE, fullrange = TRUE) +
  # data points
  geom_point(size = 4) +
  # point at experimental conditions
  geom_errorbar(data = eq_regression, color = "black", 
                map = aes(ymin = eps_aq_gas - eps_aq_gas_se, ymax = eps_aq_gas + eps_aq_gas_se)) +
  geom_point(data = eq_regression, size = 6, shape = 18, color = "black") +
  # scales
  scale_x_continuous(expand = c(0, 0)) +
  expand_limits(y = 0, x = c(0, 30)) +
  # labels
  labs(y = TeX("$^{15}\\epsilon_{\\,\\frac{N_{2,aq}}{N_{2,gas}}}$ \\[\U2030\\]"), x = "Temperature [C]") + 
  # theme
  theme_figure(legend = FALSE, grid = TRUE)
```


## Determine model parameters

```{r}
# model data
model_data <- 
  isotope_data_summary %>% 
  filter(organism == "A. cylindrica") %>% 
  unnest(data) %>% 
  # add in growth rate information
  left_join(growth_rate_data, by = c("pN2", "organism")) %>% 
  # add in mean heterocyst distances from corresponding data points
  left_join(
    heterocyst_data_summary %>% filter(growth_phase == "stationary"), 
    by = c("pN2", "organism")) %>% 
  mutate(
    # calculate the combination of dependent variables for the regression
    x = growth_rate.day * n_cbh_mean / pN2,
    # standard error propagation for plotting x errorbars (assuming pN2 error to be minimal)
    x_se = abs(x) * sqrt((growth_rate_se.day/growth_rate.day)^2 + (n_cbh_mean_se/n_cbh_mean)^2)
  )

# bootstrap model
model <-
  model_data %>% 
  nest(-organism) %>% 
  # add gas/aqueous equilibrium fractionation
  merge(select(eq_regression, eps_aq_gas, eps_aq_gas_se)) %>% 
  as_data_frame() %>% 
  mutate(
    # bootstrap intercept from regression model
    bootstrap = map(data, ~boot(data = .x, R = n_bootstrap,
      statistic = function (data, idx) {
        # model fit for resampled data
        m <- lm(eps ~ x, data = data[idx, ])
        r2 <- glance(m)$r.squared
        intercept <- tidy(m)$estimate[1]
        return(c(intercept, r2))
      })),
    # intercept
    intercept = map_dbl(bootstrap, ~mean(.x$t[,1])),
    intercept_se = map_dbl(bootstrap, ~sd(.x$t[,1])),
    # nitrogenase fractionation factor (eps_fix)
    eps_fix = -intercept + eps_aq_gas,
    eps_fix_se = sqrt(intercept_se^2 + eps_aq_gas_se^2),
    r2 = map_dbl(bootstrap, ~mean(.x$t[,2]))
  ) %>% select(-bootstrap)

# save and display
model %>% select(-data) %>% write.xlsx(file.path("data", "2018_Silverman_et_al-isotope_model.xlsx"))
model %>% select(-data)
```

## Visualize isotope model

```{r figure_isotope_model, fig.width = 6, fig.height = 6, warning=FALSE}
model %>% 
  # unnest 
  unnest(data) %>% 
  # select averages for plotting
  select(x, x_se, eps_mean, eps_mean_se, intercept, intercept_se) %>% unique() %>% 
  ggplot(aes(x = x, y = eps_mean)) +
  # intercept (eps ag/g - eps fix estimate)
  geom_rect(map = aes(ymin = intercept - intercept_se,
                      ymax = intercept + intercept_se,
                      xmin = -Inf, xmax = Inf), fill = "light grey") +
  geom_hline(map = aes(yintercept = intercept), linetype = 2) +
  # intercept text label
  geom_text(data = function(df) select(df, intercept, intercept_se) %>% unique(), 
            map = aes(x = 63, y = intercept + intercept_se, 
                      label = TeX("$=\\epsilon_{\\,\\frac{aq}{g}}-\\epsilon_{fix}$") %>% 
                        as.character()),
            parse = TRUE, hjust = 1.05, vjust = -0.2, size = 8) +
  # linear fit
  geom_smooth(method = "lm", map = aes(color = NULL), color = "black", se = FALSE, fullrange = TRUE) +
  # 1 sigma error bars for averages
  geom_errorbarh(map = aes(y = eps_mean, xmin = x - x_se, xmax = x + x_se), height = 0.03, alpha = 0.8) +
  geom_errorbar(map = aes(ymin = eps_mean - eps_mean_se, ymax = eps_mean + eps_mean_se), width = 1, alpha = 0.8) +
  # data points for average
  geom_point(shape = 21, fill = "#999999", color = "black", size = 5) + 
  # scales
  scale_x_continuous(expand = c(0, 0), breaks = (0:6)*10) + expand_limits(x=0) + 
  scale_y_continuous(breaks = c(0:20)*-0.2) + 
  # labels
  labs(y = TeX("$^{15}\\epsilon_{\\,\\frac{N_{org}}{N_{2,gas}}}$ \\[\U2030\\]"), 
       x = TeX("$\\frac{\\mu \\cdot n_{cbh}}{pN_2}\\,\\left[\\frac{cells}{day\\cdot bar}\\right]$")) + 
  # theme
  theme_figure(legend = FALSE, grid = TRUE)
```