/
ancient.aux
125 lines (125 loc) · 12.7 KB
/
ancient.aux
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
\relax
\citation{dabney2013ancient}
\citation{meyer2012high}
\citation{pinhasi2015optimal}
\citation{haak2015massive}
\citation{haak2015massive,mathieson2015genome,allentoft2015population,fu2016genetic}
\citation{schraiber2016bayesian,jewett2016effects}
\citation{sjodin2014assessing,lazaridis2014ancient}
\citation{lazaridis2016genomic}
\citation{rasmussen2014genome}
\citation{sawyer2012temporal}
\citation{skoglund2012origins,haak2015massive,mathieson2015genome,allentoft2015population,fu2016genetic,lazaridis2016genomic}
\citation{green2010draft,patterson2012ancient}
\citation{peter2016admixture}
\@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{2}}
\citation{korneliussen2014angsd}
\citation{jonsson2013mapdamage2}
\citation{racimo2016joint}
\@writefile{toc}{\contentsline {section}{\numberline {2}Methods}{4}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}Sampling alleles in ancient populations}{4}}
\newlabel{expectation_matrices}{{2}{5}}
\citation{nielsen2012snp}
\citation{racimo2016joint}
\citation{kelleher2016efficient}
\newlabel{likelihood}{{3}{7}}
\@writefile{toc}{\contentsline {section}{\numberline {3}Results}{7}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.1}Impact of coverage and number of samples on inferences}{7}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.2}Impact of admixture}{9}}
\citation{mathieson2015genome}
\citation{10002015global}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.3}Impact of contamination}{10}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.4}Application to ancient humans}{10}}
\citation{skoglund2014genomic}
\citation{sawyer2012temporal}
\citation{green2010draft}
\citation{racimo2016joint}
\@writefile{toc}{\contentsline {section}{\numberline {4}Discussion}{12}}
\citation{rohde2004modelling,chang1999recent,baird2003distribution,donnelly1983probability}
\citation{sjodin2014assessing}
\citation{rasmussen2014genome}
\citation{sjodin2014assessing}
\citation{lazaridis2014ancient,haak2015massive,lazaridis2016genomic}
\citation{lazaridis2014ancient,haak2015massive}
\citation{skoglund2014genomic}
\citation{pritchard2000inference}
\citation{alexander2009fast}
\citation{falush2016tutorial}
\citation{racimo2016joint}
\citation{kousathanas2017inferring}
\citation{patterson2012ancient,lipson2017working}
\citation{lipson2014reconstructing,pickrell2012inference}
\citation{kamm2016efficient}
\citation{ewens2012mathematical,karlin1981second}
\citation{griffiths2003frequency}
\@writefile{toc}{\contentsline {section}{\numberline {5}Appendix}{17}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.1}Computing allele frequency moments in the ancient population}{17}}
\newlabel{cond_exp}{{4}{17}}
\newlabel{expectation_ode}{{5}{18}}
\citation{nielsen2003correcting}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.2}Robustness to ascertainment in the modern population}{21}}
\citation{racimo2016joint}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.3}Sites covered exactly once have no information about drift in the ancient population}{22}}
\bibstyle{plainnat}
\bibdata{ancient_bibliography}
\@writefile{toc}{\contentsline {section}{\numberline {6}Software Availability}{23}}
\@writefile{toc}{\contentsline {section}{\numberline {7}Acknowledgments}{23}}
\bibcite{alexander2009fast}{{1}{2009}{{Alexander et~al.}}{{Alexander, Novembre, and Lange}}}
\bibcite{allentoft2015population}{{2}{2015}{{Allentoft et~al.}}{{Allentoft, Sikora, Sj{\"o}gren, Rasmussen, Rasmussen, Stenderup, Damgaard, Schroeder, Ahlstr{\"o}m, Vinner, et~al.}}}
\bibcite{baird2003distribution}{{3}{2003}{{Baird et~al.}}{{Baird, Barton, and Etheridge}}}
\bibcite{chang1999recent}{{4}{1999}{{Chang}}{{}}}
\bibcite{10002015global}{{5}{2015}{{Consortium}}{{}}}
\bibcite{dabney2013ancient}{{6}{2013}{{Dabney et~al.}}{{Dabney, Meyer, and P{\"a}{\"a}bo}}}
\bibcite{donnelly1983probability}{{7}{1983}{{Donnelly}}{{}}}
\bibcite{ewens2012mathematical}{{8}{2012}{{Ewens}}{{}}}
\bibcite{falush2016tutorial}{{9}{2016}{{Falush et~al.}}{{Falush, van Dorp, and Lawson}}}
\bibcite{fu2016genetic}{{10}{2016}{{Fu et~al.}}{{Fu, Posth, Hajdinjak, Petr, Mallick, Fernandes, Furtw{\"a}ngler, Haak, Meyer, Mittnik, et~al.}}}
\bibcite{green2010draft}{{11}{2010}{{Green et~al.}}{{Green, Krause, Briggs, Maricic, Stenzel, Kircher, Patterson, Li, Zhai, Fritz, et~al.}}}
\bibcite{griffiths2003frequency}{{12}{2003}{{Griffiths}}{{}}}
\bibcite{haak2015massive}{{13}{2015}{{Haak et~al.}}{{Haak, Lazaridis, Patterson, Rohland, Mallick, Llamas, Brandt, Nordenfelt, Harney, Stewardson, et~al.}}}
\bibcite{jewett2016effects}{{14}{2016}{{Jewett et~al.}}{{Jewett, Steinr{\"u}cken, and Song}}}
\bibcite{jonsson2013mapdamage2}{{15}{2013}{{J{\'o}nsson et~al.}}{{J{\'o}nsson, Ginolhac, Schubert, Johnson, and Orlando}}}
\bibcite{kamm2016efficient}{{16}{2016}{{Kamm et~al.}}{{Kamm, Terhorst, and Song}}}
\bibcite{karlin1981second}{{17}{1981}{{Karlin and Taylor}}{{}}}
\bibcite{kelleher2016efficient}{{18}{2016}{{Kelleher et~al.}}{{Kelleher, Etheridge, and McVean}}}
\bibcite{korneliussen2014angsd}{{19}{2014}{{Korneliussen et~al.}}{{Korneliussen, Albrechtsen, and Nielsen}}}
\bibcite{kousathanas2017inferring}{{20}{2017}{{Kousathanas et~al.}}{{Kousathanas, Leuenberger, Link, Sell, Burger, and Wegmann}}}
\bibcite{lazaridis2014ancient}{{21}{2014}{{Lazaridis et~al.}}{{Lazaridis, Patterson, Mittnik, Renaud, Mallick, Kirsanow, Sudmant, Schraiber, Castellano, Lipson, et~al.}}}
\bibcite{lazaridis2016genomic}{{22}{2016}{{Lazaridis et~al.}}{{Lazaridis, Nadel, Rollefson, Merrett, Rohland, Mallick, Fernandes, Novak, Gamarra, Sirak, et~al.}}}
\bibcite{lipson2017working}{{23}{2017}{{Lipson and Reich}}{{}}}
\bibcite{lipson2014reconstructing}{{24}{2014}{{Lipson et~al.}}{{Lipson, Loh, Patterson, Moorjani, Ko, Stoneking, Berger, and Reich}}}
\bibcite{mathieson2015genome}{{25}{2015}{{Mathieson et~al.}}{{Mathieson, Lazaridis, Rohland, Mallick, Patterson, Roodenberg, Harney, Stewardson, Fernandes, Novak, et~al.}}}
\bibcite{meyer2012high}{{26}{2012}{{Meyer et~al.}}{{Meyer, Kircher, Gansauge, Li, Racimo, Mallick, Schraiber, Jay, Pr{\"u}fer, De~Filippo, et~al.}}}
\bibcite{nielsen2003correcting}{{27}{2003}{{Nielsen and Signorovitch}}{{}}}
\bibcite{nielsen2012snp}{{28}{2012}{{Nielsen et~al.}}{{Nielsen, Korneliussen, Albrechtsen, Li, and Wang}}}
\bibcite{patterson2012ancient}{{29}{2012}{{Patterson et~al.}}{{Patterson, Moorjani, Luo, Mallick, Rohland, Zhan, Genschoreck, Webster, and Reich}}}
\bibcite{peter2016admixture}{{30}{2016}{{Peter}}{{}}}
\bibcite{pickrell2012inference}{{31}{2012}{{Pickrell and Pritchard}}{{}}}
\bibcite{pinhasi2015optimal}{{32}{2015}{{Pinhasi et~al.}}{{Pinhasi, Fernandes, Sirak, Novak, Connell, Alpaslan-Roodenberg, Gerritsen, Moiseyev, Gromov, Raczky, et~al.}}}
\bibcite{pritchard2000inference}{{33}{2000}{{Pritchard et~al.}}{{Pritchard, Stephens, and Donnelly}}}
\bibcite{racimo2016joint}{{34}{2016}{{Racimo et~al.}}{{Racimo, Renaud, and Slatkin}}}
\bibcite{rasmussen2014genome}{{35}{2014}{{Rasmussen et~al.}}{{Rasmussen, Anzick, Waters, Skoglund, DeGiorgio, Stafford~Jr, Rasmussen, Moltke, Albrechtsen, Doyle, et~al.}}}
\bibcite{rohde2004modelling}{{36}{2004}{{Rohde et~al.}}{{Rohde, Olson, and Chang}}}
\bibcite{sawyer2012temporal}{{37}{2012}{{Sawyer et~al.}}{{Sawyer, Krause, Guschanski, Savolainen, and P{\"a}{\"a}bo}}}
\bibcite{schraiber2016bayesian}{{38}{2016}{{Schraiber et~al.}}{{Schraiber, Evans, and Slatkin}}}
\bibcite{sjodin2014assessing}{{39}{2014}{{Sj{\"o}din et~al.}}{{Sj{\"o}din, Skoglund, and Jakobsson}}}
\bibcite{skoglund2012origins}{{40}{2012}{{Skoglund et~al.}}{{Skoglund, Malmstr{\"o}m, Raghavan, Stor{\r a}, Hall, Willerslev, Gilbert, G{\"o}therstr{\"o}m, and Jakobsson}}}
\bibcite{skoglund2014genomic}{{41}{2014}{{Skoglund et~al.}}{{Skoglund, Malmstr{\"o}m, Omrak, Raghavan, Valdiosera, G{\"u}nther, Hall, Tambets, Parik, Sj{\"o}gren, et~al.}}}
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces The generative model. Alleles are found at frequency $x$ in the modern population and are at frequency $y$ in the ancient population. The modern population has effective size $N_e^{(1)}$ and has evolved for $\tau _1$ generations since the common ancestor of the modern and ancient populations, while the ancient population is of size $N_e^{(2)}$ and has evolved for $\tau _2$ generations. Ancient diploid samples are taken and sequenced to possibly low coverage, with errors. Arrows indicate that the sampling probability can be calculated by evolving alleles \emph {backward} in time from the modern population and then forward in time to the ancient population.}}{29}}
\newlabel{generative_model}{{1}{29}}
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Impact of sampling scheme on parameter estimation error. In each panel, the $x$ axis represents the number of simulated ancient samples, while the $y$ axis shows the relative root mean square error for each parameter. Each different line corresponds to individuals sequenced to different depth of coverage. Panel A shows results for $t_1$ while panel B shows results for $t_2$. Simulated parameters are $t_1 = 0.02$ and $t_2 = 0.05$.}}{30}}
\newlabel{RMSE}{{2}{30}}
\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Impact of sampling scheme on rejecting population continuity. The $x$ axis represents the age of the ancient sample in generations, with 0 indicating a modern sample and 400 indicating a sample from exactly at the split time 400 generations ago. The $y$ axis shows the proportion of simulations in which we rejected the null hypothesis of population continuity. Each line shows different sampling schemes, as explained in the legend.}}{31}}
\newlabel{continuity}{{3}{31}}
\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Impact of admixture from the ancient population on inferred parameters. The $x$ axis shows the admixture proportion and the $y$ axis shows the average parameter estimate across simulations. Each line corresponds to a different sampling strategy, as indicated in the legend. Panel A shows results for $t_1$ and Panel B shows results for $t_2$. The true values of $t_1 = 0.02$ and $t_2 = 0.05$ are indicated by dashed lines.}}{32}}
\newlabel{admixture}{{4}{32}}
\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces Impact of ghost admixture on rejecting continuity. The $x$ axis shows the admixture proportion from the ghost population, and the $y$ axis shows the fraction of simulations in which continuity was rejected. Each line corresponds to a different sampling strategy, as indicated in the legend.}}{33}}
\newlabel{ghost}{{5}{33}}
\@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces Impact of contamination on parameter inference. The $x$ axis shows the contamination fraction, and the $y$ axis shows the average parameter estimate from simulations. Each line corresponds to a different sampling strategy, as indicated in the legend. Panel A shows $t_1$, and Panel B shows $t_2$. Dashed lines indicate the true values of $t_1 = 0.02$ and $t_2 = 0.05$}}{34}}
\newlabel{contamination}{{6}{34}}
\@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces Parameters of the model inferred from ancient West Eurasian samples. Panel A shows $t_1$ on the x-axis and $t_2$ on the y-axis, with each point corresponding to a population as indicated in the legend. Numbers in the legend correspond to the mean date of all samples in the population. Panels B and C show scatterplots of the mean age of the samples in the population (x-axis) against $t_1$ and $t_2$, respectively. Points are described by the same legend as Panel A.}}{35}}
\newlabel{pops_together}{{7}{35}}
\@writefile{lof}{\contentsline {figure}{\numberline {8}{\ignorespaces Impact of pooling individuals into populations when estimating model parameters from real data. In both panels, the x-axis indicates the parameter estimate when individuals are analyzed separately, while the y-axis indicates the parameter estimate when individuals are grouped into populations. Size of points is proportional to the coverage of each individual. Panel A reports the impact on estimation of $t_1$, while Panel B reports the impact on $t_2$. Note that Panel B has a broken x-axis. Solid lines in each figure indicate $y = x$.}}{36}}
\newlabel{sep_vs_pops}{{8}{36}}
\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces Details of populations included in analysis. ``pop'' is population name, ``cov'' is mean coverage of individuals in the population, ``date'' is mean date of individuals in the population, ``$t_1$'' is the maximum likelihood estimate of $t_1$ in the full model, ``$t_2$'' is the maximum likelihood estimate of $t_2$ in the full model, ``LnL'' is the maximum likelihood value in the full model, ``$t_1$ (cont)'' is the maximum likelihood estimate of $t_1$ in the model where $t_2 = 0$, ``LnL'' is the maximum likelihood value in the model where $t_2 = 0$.}}{37}}
\newlabel{params_table}{{1}{37}}