/
ED50_analysis.Rmd
715 lines (611 loc) · 28.5 KB
/
ED50_analysis.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
---
title: "Thermal tolerance of *A. cervicornis* across 6 Florida coral nurseries"
author: Ross Cunning
date: "`r format(Sys.time(), '%d %B, %Y')`"
output:
html_document:
code_folding: hide
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE, cache = TRUE)
library(readxl)
library(janitor)
library(parzer)
library(broom)
library(lsmeans)
library(drc)
library(tidyverse)
library(ggthemes)
library(multcompView)
library(ggpubr)
library(mcr)
library(cowplot)
library(geodist)
```
This document analyzes thermal tolerance metrics (ED50) derived from 229 coral colonies measured at 6 coral nurseries on the R/V *Coral Reef II* in summer/fall 2020. ED50 values are analyzed for differences among nurseries, and effects of source colony location and temperature regime. For a subset of corals, ED50 values are compared to values obtained from the same genets at the same nurseries in June 2020, in a separate, independent set of CBASS assays.
```{r import_data}
# Import cleaned datasets from each nursery
clean_data_files <- list.files(path = "data/processed",
pattern = "*_clean.csv",
full.names = TRUE)
clean_data <- clean_data_files %>% map_dfr(read_csv)
# Import genotype metadata
gmd <- read_csv("data/genotype_metadata_raw.csv") %>%
mutate(nursery = tolower(nursery),
across(everything(), str_squish),
across(ends_with("lat"), ~signif(parse_lat(.), 6)), # parse latitude
across(ends_with("lon"), ~signif(parse_lon(.), 6))) %>% # parse longitude
select(nursery, name = geno, source_lat, source_lon, MMM_5km)
# Import names key
gkey <- read_csv("data/genotype_name_key.csv") %>%
mutate(alt0 = geno) %>%
pivot_longer(starts_with("alt"), names_to = "alt", values_to = "name") %>%
drop_na() %>%
select(geno, name)
# Combine cleaned data with genotype metadata
df.all <- clean_data %>%
rename(name = geno) %>%
left_join(gkey) %>%
select(nursery, geno, name, max_temp, fvfm, date) %>%
left_join(gmd) %>%
# Order nurseries from south to north
mutate(nursery = factor(nursery, levels = c("mote", "fwc", "rrt", "crf", "um", "nsu"))) %>%
select(nursery, name, geno, date, max_temp, fvfm, source_lat, source_lon, MMM_5km)
# Define dose-response curves using same constraints
ll3 <- function(data) {
drm(fvfm ~ max_temp, data = data,
fct = LL.3(names = c("hill", "max", "ED50")),
upperl = c(120, 0.72, 40),
lowerl = c(10, 0.55, 30))}
# Fit model to each coral, get parameters, fitted values, and residuals
mods <- df.all %>%
nest(data = c(max_temp, fvfm)) %>%
# Fit the model to each coral
mutate(ll3 = map(data, ll3)) %>%
# Get model parameters and fitted values/residuals
mutate(pars = map(ll3, tidy),
pred = map2(ll3, data, ~augment(.x, drop_na(.y, fvfm))))
# Extract parameter values from model fits
res.all <- mods %>%
unnest(pars) %>%
pivot_wider(names_from = term,
values_from = c(estimate, std.error, statistic, p.value)) %>%
clean_names() %>%
select(!where(is.list), -curve)
# Filter to analyze only CBASS runs from Coral Reef II in August and October
res <- res.all %>%
filter(date %in% c("2020-08", "2020-10"))
# Recode nursery abbreviations
res <- res %>%
mutate(nursery = recode(nursery, mote = "MML", fwc = "FWC", rrt = "RR", crf = "CRF", um = "UM", nsu = "NSU"))
# Import symbiont qPCR data
sh <- read_csv("data/processed/acer_SH_data.csv") %>%
clean_names() %>%
filter(species == "Acer") %>%
rename(name = genotype) %>%
mutate(a_acer = replace_na(a_acer, 0),
d_acer = replace_na(d_acer, 0),
totSH = a_acer + d_acer,
logSH = log10(totSH))
```
# How many coral colonies and unique genotypes were tested?
```{r, fig.height = 7, fig.width = 7, fig.asp = 1}
res %>%
summarize(n_colonies = n_distinct(nursery, name),
n_genotypes = n_distinct(geno)) %>%
knitr::kable()
```
# Number of unique genotypes shared across nurseries
```{r eval = FALSE}
library(venn) # install.packages("venn")
df <- res %>%
distinct(nursery, geno) %>%
nest(geno)
df2 <- df$data %>%
map(., ~c(.$geno))
names(df2) <- df$nursery
venn(df2[c("UM", "CRF", "RR", "FWC", "MML")],
zcolor = c("#619CFF", "#00BFC4", "#00BA38", "#B79F00", "#F8766D"))
png(filename = "output/figS1.png", width = 90, height = 90, units = "mm", res = 300)
venn(df2[c("UM", "CRF", "RR", "FWC", "MML")],
zcolor = c("#619CFF", "#00BFC4", "#00BA38", "#B79F00", "#F8766D"))
dev.off()
```
```{r test_geno_effect, eval = FALSE, include = FALSE}
# Do different genotypes have different thermal tolerance?
# Fit single curve without genotype as a factor
mod_nogeno <- drm(fvfm ~ max_temp, data = df,
fct = LL.3(names = c("hill", "max", "ED50")),
upperl = c(120, 0.72, 40),
lowerl = c(10, 0.55, 30))
# Fit curves separately for each genotype
mod_geno <- drm(fvfm ~ max_temp, curveid = geno, data = df,
fct = LL.3(names = c("hill", "max", "ED50")),
upperl = c(120, 0.72, 40),
lowerl = c(10, 0.55, 30))
# Compare model fit with vs. without genotype as factor
anova(mod_nogeno, mod_geno)
## Genotype is highly significant
```
# What is the range in thermal tolerance across all colonies?
```{r ed50_histograms, fig.width = 11}
# Histogram colored by nursery with density plot
hist1 <- res %>%
ggplot(aes(x = estimate_ed50)) +
geom_histogram(aes(fill = nursery), binwidth = 0.1, alpha = 0.7, position = position_stack(reverse = TRUE)) +
geom_density(aes(y = ..count.. * 0.1)) +
theme_few() +
theme(text = element_text(size = 10)) +
theme(legend.position = "none", legend.key.size = unit(0.25, "cm")) +
labs(x = "ED50 (°C)", y = "Number of colonies") +
guides(fill = guide_legend(reverse = TRUE)) +
xlim(qnorm(c(0.005, 0.9999), mean(res$estimate_ed50), sd(res$estimate_ed50)))
# Remove nursery effect
mod <- lm(estimate_ed50 ~ nursery, data = res)
res.adj <- augment(mod, data = res) %>%
mutate(ed50_adj = mean(res$estimate_ed50) + .resid)
# Get mean and sd from adjusted distribution
meanadj <- mean(res.adj$ed50_adj)
sdadj <- sd(res.adj$ed50_adj)
hist2 <- ggplot(res.adj, aes(x = ed50_adj)) +
geom_histogram(aes(y = ..count../sum(..count..)), binwidth = 0.1, alpha = 0.4) +
stat_function(fun = ~ dnorm(.x, meanadj, sdadj) * 0.1) +
geom_vline(aes(xintercept = meanadj)) +
geom_segment(aes(x = meanadj, xend = meanadj + sdadj,
y = dnorm(meanadj + sdadj, meanadj, sdadj) * 0.1,
yend = dnorm(meanadj + sdadj, meanadj, sdadj) * 0.1),
arrow = arrow(length = unit(0.2,"cm"), ends = "both"), lwd = 0.1) +
xlim(qnorm(c(0.00001, 0.99999), meanadj, sdadj)) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
annotate("text", x = meanadj - 0.1, y = 0.125, adj = 1,
label = paste0("mean = ", round(meanadj, 2)), size = 2) +
annotate("text", x = meanadj + sdadj + 0.1,
y = dnorm(meanadj + sdadj, meanadj, sdadj) * 0.1,
adj = 0, label = paste0("s.d. = ", round(sdadj, 2)), size = 2) +
theme_few() +
theme(text = element_text(size = 10)) +
labs(x = "ED50 (°C) adjusted", y = "Percentage")
# Combine histograms into multi-panel figure
fig2 <- cowplot::plot_grid(
hist1,
hist2, labels = "AUTO")
fig2
# Summarize original distribution
tidy(summary(res$estimate_ed50)) %>%
knitr::kable(caption = "Summary of unadjusted ED50 values")
# Is adjusted distribution normal?
tidy(shapiro.test(res.adj$ed50_adj)) %>%
knitr::kable(caption = "Normality test for adjusted ED50 values")
```
After adjusting for variation due to nursery location (i.e., environment), ED50 values are approximately normally distributed, with a mean of 35.98°C and standard deviation of 0.38°C. This is considered the variance due to genetic effects (and measurement error).
# ED50 ± standard error for all colonies
```{r}
res %>%
mutate(nursname = interaction(nursery, name),
nursname = fct_reorder(nursname, estimate_ed50)) %>%
ggplot(aes(x = nursname, y = estimate_ed50, color = nursery)) +
geom_errorbar(aes(ymin = estimate_ed50 - std_error_ed50, ymax = estimate_ed50 + std_error_ed50)) +
geom_point() +
facet_wrap(~nursery, scales = "free_x") +
theme_few() +
theme(legend.position = "none",
axis.text.x = element_blank()) +
labs(x = "Colony", y = "ED50 ± SE (°C)") +
coord_cartesian(ylim = c(34.75, 37.75))
```
# How does thermal tolerance vary across nurseries?
```{r nursery_effects, fig.width = 5}
# Test for equal variance across nurseries
car::leveneTest(estimate_ed50 ~ nursery, data = res) %>%
tidy() %>%
knitr::kable(caption = "Levene's test for homogeneity of variance")
## Unequal variance among nurseries -- use Welch ANOVA
welch <- oneway.test(estimate_ed50 ~ nursery, data = res)
tidy(welch) %>% knitr::kable(caption = "Welch ANOVA table")
# Variance explained by nursery
omega.squared <- function(WelchF, df, N) df * (WelchF - 1) / (df * (WelchF - 1) + N)
tibble(omega.squared = omega.squared(
WelchF = welch$statistic[[1]], df = welch$parameter[[1]], N = nrow(res))) %>%
knitr::kable(caption = "Variance explained by nursery")
# Get mean and sd by nursery for plotting
nurs.stats <- res %>%
group_by(nursery) %>%
summarise(mean = mean(estimate_ed50), sd = sd(estimate_ed50), n = n()) %>%
ungroup()
# Median pairwise difference between nurseries
mpwdiff <- median(dist(nurs.stats$mean))
mpwdiff %>%
knitr::kable(caption = "Median pairwise difference between nurseries (°C)")
# Use pairwise wilcox to test for differences in ED50 among nurseries
test <- pairwise.wilcox.test(res$estimate_ed50, res$nursery, p.adjust.method = "bonf")
pvals <- tibble(reshape2::melt(test$p.value)) %>% drop_na() %>%
mutate(Var1 = factor(Var1, levels = levels(fct_reorder(nurs.stats$nursery, nurs.stats$mean)))) %>%
arrange(Var1)
stats <- setNames(pull(pvals[,3]), paste(pvals$Var1, pvals$Var2, sep = "-")) %>%
multcompLetters(threshold = 0.01)
lett <- enframe(stats$monospacedLetters, name = "nursery", value = "group")
nurs.stats <- left_join(nurs.stats, lett)
# Raincloud plot
source("https://raw.githubusercontent.com/datavizpyr/data/master/half_flat_violinplot.R")
res2 <- res %>%
full_join(sh, by = c("nursery", "name")) %>%
mutate(dominant = case_when(d_acer > a_acer ~ "D", a_acer > d_acer ~ "A", TRUE ~ "A")) %>%
mutate(nursery = factor(nursery, levels = c("MML", "FWC", "RR", "CRF", "UM", "NSU")))
nurs.fig <- res2 %>%
ggplot(aes(x = nursery, y = estimate_ed50, fill = nursery, group = nursery)) +
geom_hline(aes(yintercept = mean(estimate_ed50)), lty = 2, col = "gray") +
geom_flat_violin(position = position_nudge(x = 0.2, y = 0),
adjust = 1, width = 2, alpha = 0.7, lwd = 0) +
geom_boxplot(position = position_nudge(x = 0.2, y = 0), width = 0.1,
outlier.shape = NA, alpha = 1, lwd = 0.25) +
geom_point(data = filter(res2, dominant == "A"), aes(color = nursery), alpha = 0.7, size = 0.5,
position = position_jitter(width = 0.075)) +
geom_point(data = filter(res2, dominant == "D"), size = 0.9, shape = 3,
color = "black", fill = "black", position = position_nudge(x = -0.08)) +
geom_point(data = nurs.stats, aes(x = nursery, y = mean), inherit.aes = FALSE,
position = position_nudge(x = 0, y = 0), size = 1, pch = 5) +
geom_errorbar(data = nurs.stats, inherit.aes = FALSE, width = 0, lwd = 0.25,
aes(x = nursery, y = mean, ymin = mean - sd, ymax = mean + sd),
position = position_nudge(x = 0, y = 0)) +
geom_text(data = nurs.stats, aes(y = 37.5, label = group), size = 2) +
geom_text(data = nurs.stats, aes(y = 37.5, label = paste("n=", n)), nudge_x = 0.2, size = 2) +
coord_flip() +
theme_few() +
theme(text = element_text(size = 10)) + #,
#axis.title.y = element_blank()) +
theme(legend.position = "none") +
labs(y = "ED50 (°C)", x = "Nurseries")
nurs.fig
```
There is variation among nurseries in average ED50 values. This could be due to different environmental conditions, and/or different sets of genotypes present, and/or confounding experimental effects. However, most of the variation in thermal tolerance is found *within* nurseries rather than between them. This means that each nursery tends to have genets with a wide range of thermal tolerance phenotypes.
```{r}
# 3-panel Figure 2
fig2 <- cowplot::plot_grid(hist1, nurs.fig, hist2, ncol = 3, labels = "AUTO")
ggsave("output/fig2.png", plot = fig2, width = 180, height = 60, units = "mm")
```
# How does thermal tolerance vary for the same genotype grown at different nurseries?
Plot ED50s for each genotype
```{r, include = T, eval = T}
# Get ED50s (adjusted for nursery effect) for genotypes present at multiple nurseries
multi.adj <- res.adj %>%
# Filter genotypes present at more than one nursery
group_by(geno) %>%
filter(n_distinct(nursery) > 1) %>%
# If genotype run more than once at a nursery, use mean ed50 within nursery
group_by(geno, nursery) %>%
summarize(ed50_adj = mean(ed50_adj)) %>%
ungroup()
# Reorder genotypes based on mean ed50 across nurseries
multi.adj <- multi.adj %>%
group_by(geno) %>%
summarize(geno_mean_ed50 = mean(ed50_adj)) %>%
right_join(multi.adj) %>%
mutate(geno = fct_reorder(geno, geno_mean_ed50))
# Count number of genotypes present at 2, 3, or 4 nurseries
multi.adj %>%
count(geno, name = "Present at N nurseries") %>%
count(`Present at N nurseries`, name = "Number of genotypes")
# Plot ED50adj by genotype
ggplot(multi.adj, aes(x = geno, y = ed50_adj, color = nursery, shape = nursery, group = geno)) +
geom_point(size = 1.5, alpha = 0.75) +
geom_point(aes(y = geno_mean_ed50), pch = 1, color = "black", stroke = 0.25, size = 2)+
stat_summary(fun.data = mean_se, geom = "errorbar", color = "black", lwd = 0.25, width = 0) +
theme_few() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
axis.text = element_text(size = 8),
legend.position = c(0.15, 0.75),
legend.text = element_text(size = 6),
legend.title = element_text(size = 8),
legend.key.size = unit(0.3, "cm")) +
scale_color_manual(values = c("#F8766D", "#B79F00", "#00BA38", "#00BFC4", "#619CFF")) +
labs(x = "Genotype", y = "Adjusted ED50 (°C)")
ggsave(filename = "output/figS3.png", width = 120, height = 87, units = "mm")
tidy(anova(lm(ed50_adj ~ geno, data = multi.adj))) %>%
knitr::kable(caption = "ANOVA: Effect of genotype on ED50")
```
Plot correlations across nurseries
```{r, fig.height = 7}
gatherpairs <- function(data, ...,
xkey = '.xkey', xvalue = '.xvalue',
ykey = '.ykey', yvalue = '.yvalue',
na.rm = FALSE, convert = FALSE, factor_key = FALSE) {
vars <- quos(...)
xkey <- enquo(xkey)
xvalue <- enquo(xvalue)
ykey <- enquo(ykey)
yvalue <- enquo(yvalue)
data %>% {
cbind(gather(., key = !!xkey, value = !!xvalue, !!!vars,
na.rm = na.rm, convert = convert, factor_key = factor_key),
select(., !!!vars))
} %>% gather(., key = !!ykey, value = !!yvalue, !!!vars,
na.rm = na.rm, convert = convert, factor_key = factor_key)
}
cors <- multi.adj %>%
select(nursery, geno, ed50_adj) %>%
pivot_wider(names_from = nursery, values_from = ed50_adj, values_fn = mean) %>%
gatherpairs(FWC, CRF, RR, MML, UM)
corfig <- ggplot(cors, aes(x = .xvalue, y = .yvalue)) +
geom_point() +
geom_abline(a = 0, b = 1, lty = 2) +
stat_cor(size = 3) +
facet_grid(.ykey ~ .xkey) +
theme_few() +
labs(x = "Adjusted ED50 (°C)", y = "Adjusted ED50 (°C)")
corfig
#ggsave(plot = corfig, filename = "output/figS4.png", width = 190, height = 190, units = "mm")
```
Genotype is not a significant predictor of ED50 among coral genotypes tested at multiple nurseries. This could indicate that: 1) ED50 is plastic and genotype by environment interactions result in genotypes having different ED50 values at different nurseries, and/or 2) small sample sizes for each genotype limit statistical power to detect differences among genotypes. The 33 genotypes in this subset were present at only two (n=18), three (n=13), or four (n=1) nurseries, so the low replication for each genotype is also a limitation.
# Is the average ED50 of corals at each nursery related to the nursery's MMM temperature?
```{r, fig.width = 4, fig.height = 4}
# Relationship between nursery MMM and average nursery ED50?
nursmmm <- res.adj %>%
mutate(nursmmm = case_when(nursery == "MML" ~ 30.2694,
nursery == "UM" ~ 29.5412,
nursery == "NSU" ~ 29.5344,
nursery == "CRF" ~ 29.7307,
nursery == "RR" ~ 29.7307,
nursery == "FWC" ~ 29.9965))
nursmmm %>%
group_by(nursery) %>%
summarize(nursmmm = mean(nursmmm), meaned50 = mean(estimate_ed50)) %>%
ggplot(aes(x = nursmmm, y = meaned50)) +
geom_label(aes(label = nursery))
```
There is no relationship between nursery MMM and average ED50.
# Is thermal tolerance related to symbiont density?
```{r, fig.width = 4.5, fig.height = 3}
sh <- full_join(sh, res.adj)
ggplot(sh, aes(x = logSH, y = ed50_adj)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
theme_few() +
labs(x = "log10 symbiont to host cell ratio", y = "Adjusted ED50 (°C)")
mod <- lm(ed50_adj ~ logSH, data = sh)
anova(mod) %>% knitr::kable()
```
# Is thermal tolerance related to source colony latitude and longitude?
```{r, fig.height = 3}
mod <- lm(ed50_adj ~ source_lat + source_lon, data = res.adj)
anova(mod) %>%
tidy() %>%
knitr::kable()
lat.plot <- ggplot(res.adj, aes(x = source_lat, y = ed50_adj)) +
geom_point(size = 0.25) +
geom_smooth(method = "lm", se = FALSE) +
scale_x_continuous(limits = c(24.45, 26.2), expand = c(0.08,0.08),
breaks = c(25, 26), labels = c(25, 26)) +
theme_few() +
theme(text = element_text(size = 10)) +
labs(x = "Latitude", y = expression(paste(ED50[adj], " (°C)")))
lon.plot <- ggplot(res.adj, aes(x = source_lon, y = ed50_adj)) +
geom_point(size = 0.25) +
scale_x_continuous(limits = c(-82.35, -79.75), expand = c(0, 0),
breaks = c(-82, -81, -80), labels = c(-82, -81, -80)) +
geom_smooth(method = "lm", se = FALSE) +
theme_few() +
theme(text = element_text(size = 10)) +
labs(x = "Longitude", y = expression(paste(ED50[adj], " (°C)")))
cowplot::plot_grid(lat.plot, lon.plot, ncol = 2)
```
There is no relationship between ED50 and source colony latitude and longitude.
# Is thermal tolerance related to source colony temperature regime?
To test this hypothesis, we calculate maximum monthly mean (MMM) temperatures corresponding to each source colony location, using satellite SST data with 5km resolution.
```{r, eval = TRUE, fig.height = 3, fig.width = 3.5}
# Test effect of MMM from 5km data
mod5km <- lm(ed50_adj ~ mmm_5km, data = res.adj)
anova(mod5km) %>%
tidy() %>%
knitr::kable()
# Plot
mmm5.plot <- ggplot(res.adj, aes(x = as.numeric(mmm_5km), y = ed50_adj)) +
geom_point(size = 0.25) +
geom_smooth(method = "lm", se = FALSE) +
theme_few() +
theme(text = element_text(size = 10)) +
scale_x_continuous(labels = function(x) str_trim(x),
expand = c(0.075, 0.075)) +
labs(x = "MMM (°C)", y = expression(paste(ED50[adj], " (°C)")))
mmm5.plot
```
There are no relationships between ED50 and source colony latitude, longitude, or MMM temperature. This suggests that thermally tolerant colonies are not restricted to specific locations, but can be found throughout the Florida reef tract.
# Map the source locations of the most thermally tolerant genets
```{r, eval = TRUE, include = TRUE, fig.width = 3.5, fig.height = 3.5}
top10 <- res.adj %>%
filter(ed50_adj > quantile(ed50_adj, 0.9))
library("rnaturalearth")
library("rnaturalearthdata")
library("sf")
world <- ne_countries(scale = "large", returnclass = "sf")
top10plot <- ggplot(data = world) +
geom_sf(lwd = 0, fill = "gray70") +
coord_sf(xlim = c(-82.35, -79.75), ylim = c(24.3, 26.4), expand = FALSE) +
geom_point(aes(x = source_lon, y = source_lat), data = res.adj, size = 0.1, color = "black", alpha = 0.5) +
geom_point(aes(x = source_lon, y = source_lat), data = top10, color = "red", size = 0.1, alpha = 1) +
scale_x_continuous(breaks = seq(-82, -80, 1), labels = seq(-82, -80, 1)) +
scale_y_continuous(breaks = seq(25, 26, 1), labels = seq(25, 26, 1)) +
theme_few() +
theme(text = element_text(size = 10),
plot.margin = margin(6,3,6,6)) +
labs(y = "Latitude", x = "Longitude") +
geom_segment(aes(x = -81.475, y = 25.325, xend = source_lon, yend = source_lat), data = top10, lwd = 0.1, col = "red", alpha = 0.5)
insetplot <- tibble(x = seq(-3, 3, length.out = 1001), y = dnorm(x, 0, 1)) %>%
mutate(area = x >= qnorm(0.9)) %>%
ggplot(aes(x = x, y = y)) +
geom_line(lwd = 0.1) +
geom_ribbon(aes(ymin = 0, ymax = y, fill = area)) +
scale_fill_manual(values = c(NA, "red")) +
theme_void() +
theme(plot.margin = margin(0,0,0,0)) +
annotate("text", x = -1, y = -0.1, label = expression("ED50" [adj]), size = 2, angle = 0) + #
coord_cartesian(clip = "off") +
theme(legend.position = "none")
plot.with.inset <- ggdraw() +
draw_plot(top10plot) +
draw_plot(insetplot, x = 0.3, y = .55, width = .25, height = .2)
newfig3 <- cowplot::plot_grid(lat.plot, lon.plot, mmm5.plot, plot.with.inset, nrow = 2,
labels = "AUTO")
newfig3
ggsave("output/fig3.png", plot = newfig3, width = 90, height = 80, units = "mm")
```
# Map each nursery's collection sources
```{r, fig.width = 4, fig.height = 8}
nurs <- tribble(
~nursery, ~nurs_lat, ~nurs_lon,
"MML", 24.56, -81.40,
"NSU", 26.12, -80.09,
"UM", 25.67, -80.09,
"CRF", 24.98, -80.43,
"RR", 24.98, -80.43,
"FWC", 24.66, -81.02
)
df <- res.adj %>%
select(source_lat, source_lon, nursery, name) %>%
full_join(nurs) %>%
mutate(nursery = factor(nursery, levels = levels(res.adj$nursery)))
maps <- ggplot(data = world) +
geom_sf(lwd = 0, fill = "gray70") +
coord_sf(xlim = c(-82.35, -79.75), ylim = c(24.3, 26.4), expand = FALSE) +
geom_point(aes(x = source_lon, y = source_lat, color = nursery),
data = df, size = 0.5, alpha = 0.5) +
geom_point(aes(x = nurs_lon, y = nurs_lat, fill = nursery),
data = df, size = 1, pch = 21) +
geom_segment(aes(x = nurs_lon, xend = source_lon, y = nurs_lat, yend = source_lat, color = nursery),
data = df, lwd = 0.2, alpha = 0.5) +
scale_x_continuous(breaks = seq(-82, -80, 1), labels = seq(-82, -80, 1)) +
scale_y_continuous(breaks = seq(25, 26, 1), labels = seq(25, 26, 1)) +
facet_wrap(~nursery, ncol = 2) +
theme_few() +
theme(text = element_text(size = 10),
legend.position = "none") +
labs(y = "Latitude", x = "Longitude")
maps
ggsave("output/figS2.png", plot = maps, width = 80, height = 120, units = "mm")
```
# Thermal tolerance phenotype rarefaction curves
```{r, fig.height = 4}
# Number of population ED50adj deciles captured in sample
adjed50dist <- rnorm(100000, meanadj, sdadj)
deciles <- quantile(adjed50dist, probs = seq(0, 1, 0.1))
sample_ed50s <- function(ed50s, n) {
s <- sample(ed50s, n)
prop_deciles <- length(unique(cut(s, deciles))) / 10
return(prop_deciles)
}
rc <- res.adj %>%
select(nursery, ed50_adj) %>%
nest(data = ed50_adj) %>%
crossing(n = 1:25) %>%
mutate(prop = map2(data, n, ~ replicate(10000, sample_ed50s(.x$ed50_adj, .y))),
mean = map_dbl(prop, mean))
rc1fig <- ggplot(rc, aes(x = n, y = mean, color = nursery)) +
geom_line(lwd = 0.3) +
geom_point(size = 0.5) +
scale_y_continuous(limits = c(0, 1)) +
theme_few() +
theme(text = element_text(size = 8),
legend.position = "none") +
labs(x = "Genets sampled", y = "Proportion of population\nED50 deciles captured")
# Prob of any genos in top 25% of population
top25pop <- quantile(rnorm(10000, mean(res.adj$ed50_adj), sd(res.adj$ed50_adj)), prob = 0.75)
sample_ed50s2 <- function(ed50s, n) {
s <- sample(ed50s, n)
any_top25 <- any(s >= top25pop)
return(any_top25)
}
rc2 <- res.adj %>%
select(nursery, ed50_adj) %>%
nest(data = ed50_adj) %>%
crossing(n = 1:25) %>%
mutate(ntop25 = map2(data, n, ~ replicate(10000, sample_ed50s2(.x$ed50_adj, .y))),
mean = map_dbl(ntop25, mean))
rc2fig <- ggplot(rc2, aes(x = n, y = mean, color = nursery)) +
geom_line(lwd = 0.3) +
geom_point(size = 0.5) +
scale_y_continuous(limits = c(0, 1)) +
theme_few() +
theme(text = element_text(size = 8),
legend.position = c(0.7, 0.4),
legend.key.size = unit(3, 'mm'),
legend.spacing.y = unit(1, "mm")) +
labs(x = "Genets sampled",
y = "Probability top quartile\nof population captured") +
guides(color = guide_legend(reverse = TRUE))
fig4 <- cowplot::plot_grid(rc1fig, rc2fig, ncol = 2, labels = "AUTO")
fig4
ggsave("output/fig4.png", width = 114, height = 50, units = "mm")
rc %>%
group_by(nursery) %>%
filter(mean >= 0.5) %>%
filter(n == min(n)) %>%
select(nursery, n_genets = n, mean_range_captured = mean) %>%
knitr::kable(caption = "Number of genets needed to capture >50% of nursery ED50 range")
rc2 %>%
group_by(nursery) %>%
filter(mean >= 0.9) %>%
filter(n == min(n)) %>%
select(nursery, n_genets = n, mean_prob_top25pct = mean) %>%
knitr::kable(caption = "Number of genets needed for 90% chance of capturing ≥1 in top 25% of population")
```
# Compare October to June CBASS results
27 of the genotypes we tested on the October cruise were also tested in June 2020 by Kelsey Johnson-Sapp at RSMAS in an independent set of CBASS assays (this occurred at the time of the genotype swap). We can compare the ED50 values obtained for these 27 genotypes in June and October to see how reproducible the results are.
```{r}
# Filter data for corals run in October and June
df2 <- df.all %>%
drop_na(date) %>%
group_by(nursery, geno) %>%
filter(n_distinct(date) == 2) %>%
ungroup() %>%
select(nursery, name, geno, date, max_temp, fvfm)
# Fit dose-response curves for each genotype on each date
df2$genoN <- factor(paste0("g", group_indices(df2, nursery, geno, date)))
mod2 <- drm(fvfm ~ max_temp, curveid = genoN, data = df2,
fct = LL.3(names = c("hill", "max", "ED50")),
upperl = c(120, 0.72, 40),
lowerl = c(10, 0.55, 30))
# Extract ED50 estimates and standard error
ed50s <- tidy(mod2) %>%
filter(term == "ED50") %>%
select(genoN = curve, estimate, std.error) %>%
left_join(distinct(df2, nursery, geno, date, genoN))
# Rearrange data to compare ED50 values between dates
ed50s2 <- ed50s %>%
mutate(date = recode(date, "2020-10" = "oct", "2020-06" = "jun")) %>%
select(nursery, geno, date, ed50 = estimate, std.error) %>%
pivot_wider(names_from = date, values_from = c(ed50, std.error)) %>%
filter(geno != "FM19")
# Plot figure
fig <- ggplot(ed50s2, aes(y = ed50_oct, x = ed50_jun)) +
geom_errorbar(aes(xmin = ed50_jun - std.error_jun,
xmax = ed50_jun + std.error_jun), lwd = 0.15) +
geom_errorbar(aes(ymin = ed50_oct - std.error_oct,
ymax = ed50_oct + std.error_oct), lwd = 0.15) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
#geom_label(aes(label = geno), alpha = 0.7, size = 2) +
annotate(geom = "text", x = 34.8, y = 36.5,
label = "Pearson's correlation:", adj = 0, size = 2) +
stat_cor(method = "pearson", label.y = 36.45, label.x = 34.85, size = 2) +
annotate(geom = "text", x = 34.8, y = 36.375,
label = "Spearman's rank correlation:", adj = 0, size = 2) +
stat_cor(method = "spearman", label.y = 36.325, label.x = 34.85, size = 2) +
geom_abline(intercept = 0, slope = 1, lty = 2, color = "black") +
coord_fixed() +
scale_x_continuous(breaks = c(35, 35.5, 36)) +
labs(x = "ED50 (°C) measured in June", y = "ED50 (°C) measured in Oct.") +
theme_few()
fig
ggsave("output/fig5.png", width = 87, height = 97, units = "mm")
# Linear regression
mod <- lm(ed50_oct ~ ed50_jun, data = ed50s2)
## Slope of fit
tibble(slope = coef(mod)[2]) %>% knitr::kable()
## Test if slope is different than 1
car::linearHypothesis(mod, "ed50_jun = 1") %>% knitr::kable()
## Find mean difference between Oct and Jun values
augment(mod) %>%
mutate(diff = ed50_oct - ed50_jun) %>%
summarize("Mean difference Oct. - Jun" = mean(diff)) %>%
knitr::kable()
```
There is a high correlation between ED50 values obtained from the same coral genets (from the same nurseries) run in both June and October (n = 27 genets). This suggests that ED50 values from independently run CBASS assays are relatively reproducible for a given genotype. The shift in y-intercept could be due to seasonal differences, or experimental differences, in October vs. June. Overall, this high reproducibility is very encouraging for the continued use of the CBASS approach for thermal tolerance phenotyping.