-
Notifications
You must be signed in to change notification settings - Fork 2
/
newick2timestamp.cpp
129 lines (119 loc) · 3.82 KB
/
newick2timestamp.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
/**
* Helper script to parse Newick trees
*
* Usage:
* cat newicktree.txt | ./newick2dot dot-prefix
*/
#include "NewickTree.h"
#include <iostream>
#include <sstream>
#include <fstream>
#include <vector>
#include <set>
#include <map>
#include <string>
#include <cmath>
#include <cassert>
#include <cstdlib>
using namespace std;
int atoi_min(char const *value, int min)
{
std::istringstream iss(value);
int i;
char c;
if (!(iss >> i) || iss.get(c))
{
cerr << "error: value " << value << " must be of type <int>, and greater than or equal to " << min << endl;
std::exit(1);
}
if (i < min)
{
cerr << "error: value " << value << " must be greater than or equal to " << min << endl;
std::exit(1);
}
return i;
}
map<unsigned,unsigned> init_pop_map(const char *fn)
{
// Init population map
ifstream ifs(fn);
map<unsigned,unsigned> popm;
map<unsigned,unsigned> popcount;
if (!ifs.good())
{
cerr << "error: unable to read file " << fn << endl;
return popm;
}
while (ifs.good())
{
unsigned lid = 0, p = 0;
ifs >> lid;
if (!ifs.good())
break;
ifs >> p;
assert (p > 0);
assert (lid > 0);
popm[lid] = p;
popcount[p]+=1;
}
cerr << "Read " << popcount.size() << " populations:" << endl;
for (map<unsigned,unsigned>::iterator it = popcount.begin(); it != popcount.end(); ++it)
cerr << " population " << it->first << " with " << it->second << " leaf ids" << endl;
return popm;
}
int main(int argc, char ** argv)
{
if (argc != 5)
{
cerr << "usage: cat newicktree.txt | " << argv[0] << " <pop_map.txt> <pop1> <pop2> <max rows>" << endl;
cerr << " pops_map.txt - text file that lists pairs of <node id, pop id>" << endl;
return 1;
}
map<unsigned,unsigned> popmap = init_pop_map(argv[1]);
if (popmap.empty())
{
cerr << "error: unable to read pop map input" << endl;
return 1;
}
unsigned popl = atoi_min(argv[2], 1);
unsigned popr = atoi_min(argv[3], 1);
cerr << "comparing pairs from pop " << popl << " vs " << popr << endl;
unsigned maxr = atoi_min(argv[4], 1);
// Init tree input from <stdin>
NewickTree predt("-");
unsigned n = 0;
unsigned base = 0;
while (predt.good() && n < maxr)
{
base += predt.validForBases(); // bp position of the tree
for(map<unsigned,unsigned>::iterator it = popmap.begin(); it != popmap.end(); ++it)
{
if (it->second != popl)
continue;
map<unsigned,unsigned>::iterator itt = popmap.begin();
while (itt->second != popr && itt != popmap.end())
++itt;
while (itt != popmap.end())
{
assert(it->second == popl); // Compare exactly these populations
assert(itt->second == popr);
if (it->first != itt->first && (popl != popr || it->first < itt->first)){
// Output format: leaf X leaf Y column bp position timestamp of pair X,Y at position n
cout << "TIME" << '\t' << it->first << '\t' << itt->first << '\t' << n << '\t' << base << '\t' << predt.getLCATime(it->first, itt->first) << '\n';
}
do ++itt;
while (itt->second != popr && itt != popmap.end());
}
}
do
{
// Find next inferred tree
predt.next();
} while (predt.good() && predt.validForBases() == 0); // Skip if valid only for 0 bases
n++;
if (n%1000 == 0)
cerr << n << " lines processed" << endl;
}
cerr << n << " sites, " << base << " predt_bases" << endl;
return 0;
}