-
Notifications
You must be signed in to change notification settings - Fork 73
/
systemtest_multivariate_te.py
279 lines (244 loc) · 11.5 KB
/
systemtest_multivariate_te.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
"""System test for multivariate TE using the JIDT Kraskov estimator."""
import os
import random as rn
import numpy as np
from idtxl.multivariate_te import MultivariateTE
from idtxl.data import Data
from idtxl.idtxl_utils import calculate_mi
def test_multivariate_te_corr_gaussian(estimator=None):
"""Test multivariate TE estimation on correlated Gaussians.
Run the multivariate TE algorithm on two sets of random Gaussian data with
a given covariance. The second data set is shifted by one sample creating
a source-target delay of one sample. This example is modeled after the
JIDT demo 4 for transfer entropy. The resulting TE can be compared to the
analytical result (but expect some error in the estimate).
The simulated delay is 1 sample, i.e., the algorithm should find
significant TE from sample (0, 1), a sample in process 0 with lag/delay 1.
The final target sample should always be (1, 1), the mandatory sample at
lat 1, because there is no memory in the process.
Note:
This test runs considerably faster than other system tests.
This produces strange small values for non-coupled sources. TODO
"""
if estimator is None:
estimator = 'JidtKraskovCMI'
n = 1000
cov = 0.4
source = [rn.normalvariate(0, 1) for r in range(n)]
target = [sum(pair) for pair in zip(
[cov * y for y in source],
[(1 - cov) * y for y in [rn.normalvariate(0, 1) for r in range(n)]])]
# Cast everything to numpy so the idtxl estimator understands it.
source = np.expand_dims(np.array(source), axis=1)
target = np.expand_dims(np.array(target), axis=1)
data = Data(normalise=True)
data.set_data(np.vstack((source[1:].T, target[:-1].T)), 'ps')
settings = {
'cmi_estimator': estimator,
'max_lag_sources': 5,
'min_lag_sources': 1,
'max_lag_target': 5,
'n_perm_max_stat': 21,
'n_perm_min_stat': 21,
'n_perm_omnibus': 21,
'n_perm_max_seq': 21,
}
random_analysis = MultivariateTE()
results = random_analysis.analyse_single_target(settings, data, 1)
# Assert that there are significant conditionals from the source for target
# 1. For 500 repetitions I got mean errors of 0.02097686 and 0.01454073 for
# examples 1 and 2 respectively. The maximum errors were 0.093841 and
# 0.05833172 repectively. This inspired the following error boundaries.
corr_expected = cov / (1 * np.sqrt(cov**2 + (1-cov)**2))
expected_res = calculate_mi(corr_expected)
estimated_res = results.get_single_target(1, fdr=False).omnibus_te
diff = np.abs(estimated_res - expected_res)
print('Expected source sample: (0, 1)\nExpected target sample: (1, 1)')
print(('Estimated TE: {0:5.4f}, analytical result: {1:5.4f}, error:'
'{2:2.2f} % ').format(
estimated_res, expected_res, diff / expected_res))
assert (diff < 0.1), ('Multivariate TE calculation for correlated '
'Gaussians failed (error larger 0.1: {0}, expected: '
'{1}, actual: {2}).'.format(
diff, expected_res, estimated_res))
def test_multivariate_te_lagged_copies():
"""Test multivariate TE estimation on a lagged copy of random data.
Run the multivariate TE algorithm on two sets of random data, where the
second set is a lagged copy of the first. This test should find no
significant conditionals at all (neither in the target's nor in the
source's past).
Note:
This test takes several hours and may take one to two days on some
machines.
"""
lag = 3
d_0 = np.random.rand(1, 1000, 20)
d_1 = np.hstack((np.random.rand(1, lag, 20), d_0[:, lag:, :]))
data = Data()
data.set_data(np.vstack((d_0, d_1)), 'psr')
settings = {
'cmi_estimator': 'JidtKraskovCMI',
'max_lag_sources': 5,
'n_perm_max_stat': 21,
'n_perm_min_stat': 21,
'n_perm_omnibus': 500,
'n_perm_max_seq': 500,
}
random_analysis = MultivariateTE()
# Assert that there are no significant conditionals in either direction
# other than the mandatory single sample in the target's past (which
# ensures that we calculate a proper TE at any time in the algorithm).
for t in range(2):
results = random_analysis.analyse_single_target(settings, data, t)
assert len(results.get_single_target(t, fdr=False).selected_vars_full) == 1, (
'Conditional contains more/less than 1 variables.')
assert not results.get_single_target(t, fdr=False).selected_vars_sources, (
'Conditional sources is not empty.')
assert len(results.get_single_target(t, fdr=False).selected_vars_target) == 1, (
'Conditional target contains more/less than 1 variable.')
assert results.get_single_target(t, fdr=False).selected_sources_pval is None, (
'Conditional p-value is not None.')
assert results.get_single_target(t, fdr=False).omnibus_pval is None, (
'Omnibus p-value is not None.')
assert results.get_single_target(t, fdr=False).omnibus_sign is None, (
'Omnibus significance is not None.')
assert results.get_single_target(t, fdr=False).selected_sources_te is None, (
'Conditional TE values is not None.')
def test_multivariate_te_random():
"""Test multivariate TE estimation on two random data sets.
Run the multivariate TE algorithm on two sets of random data with no
coupling. This test should find no significant conditionals at all (neither
in the target's nor in the source's past).
Note:
This test takes several hours and may take one to two days on some
machines.
"""
d = np.random.rand(2, 1000, 20)
data = Data()
data.set_data(d, 'psr')
settings = {
'cmi_estimator': 'JidtKraskovCMI',
'max_lag_sources': 5,
'min_lag_sources': 1,
'n_perm_max_stat': 200,
'n_perm_min_stat': 200,
'n_perm_omnibus': 500,
'n_perm_max_seq': 500,
}
random_analysis = MultivariateTE()
# Assert that there are no significant conditionals in either direction
# other than the mandatory single sample in the target's past (which
# ensures that we calculate a proper TE at any time in the algorithm).
for t in range(2):
results = random_analysis.analyse_single_target(settings, data, t)
assert len(results.get_single_target(t, fdr=False).selected_vars_full) == 1, (
'Conditional contains more/less than 1 variables.')
assert not results.get_single_target(t, fdr=False).selected_vars_sources, (
'Conditional sources is not empty.')
assert len(results.get_single_target(t, fdr=False).selected_vars_target) == 1, (
'Conditional target contains more/less than 1 variable.')
assert results.get_single_target(t, fdr=False).selected_sources_pval is None, (
'Conditional p-value is not None.')
assert results.get_single_target(t, fdr=False).omnibus_pval is None, (
'Omnibus p-value is not None.')
assert results.get_single_target(t, fdr=False).omnibus_sign is None, (
'Omnibus significance is not None.')
assert results.get_single_target(t, fdr=False).selected_sources_te is None, (
'Conditional TE values is not None.')
def test_multivariate_te_lorenz_2():
"""Test multivariate TE estimation on bivariately couled Lorenz systems.
Run the multivariate TE algorithm on two Lorenz systems with a coupling
from first to second system with delay u = 45 samples. Both directions are
analyzed, the algorithm should not find a coupling from system two to one.
Note:
This test takes several hours and may take one to two days on some
machines.
"""
# load simulated data from 2 coupled Lorenz systems 1->2, u = 45 ms
d = np.load(os.path.join(os.path.dirname(__file__),
'data/lorenz_2_exampledata.npy'))
data = Data()
data.set_data(d, 'psr')
settings = {
'cmi_estimator': 'JidtKraskovCMI',
'max_lag_sources': 47,
'min_lag_sources': 42,
'max_lag_target': 20,
'tau_target': 2,
'n_perm_max_stat': 21, # 200
'n_perm_min_stat': 21, # 200
'n_perm_omnibus': 21,
'n_perm_max_seq': 21, # this should be equal to the min stats b/c we
# reuse the surrogate table from the min stats
}
lorenz_analysis = MultivariateTE()
# FOR DEBUGGING: add the whole history for k = 20, tau = 2 to the
# estimation, this makes things faster, b/c these don't have to be
# tested again. Note conditionals are specified using lags.
settings['add_conditionals'] = [(1, 19), (1, 17), (1, 15), (1, 13),
(1, 11), (1, 9), (1, 7), (1, 5), (1, 3),
(1, 1)]
settings['max_lag_sources'] = 60
settings['min_lag_sources'] = 31
settings['tau_sources'] = 2
settings['max_lag_target'] = 1
settings['tau_target'] = 1
# Just analyse the direction of coupling
results = lorenz_analysis.analyse_single_target(settings, data, target=1)
print(results._single_target)
print(results.get_adjacency_matrix('binary', fdr=False))
def test_multivariate_te_mute():
"""Test multivariate TE estimation on the MUTE example network.
Test data comes from a network that is used as an example in the paper on
the MuTE toolbox (Montalto, PLOS ONE, 2014, eq. 14). The network has the
following (non-linear) couplings:
0 -> 1, u = 2
0 -> 2, u = 3
0 -> 3, u = 2 (non-linear)
3 -> 4, u = 1
4 -> 3, u = 1
The maximum order of any single AR process is never higher than 2.
"""
data = Data()
data.generate_mute_data(n_samples=1000, n_replications=10)
settings = {
'cmi_estimator': 'JidtKraskovCMI',
'max_lag_sources': 3,
'min_lag_sources': 1,
'max_lag_target': 3,
'n_perm_max_stat': 21,
'n_perm_min_stat': 21,
'n_perm_omnibus': 21,
'n_perm_max_seq': 21} # this should be equal to the min stats b/c we
# reuse the surrogate table from the min stats
network_analysis = MultivariateTE()
network_analysis.analyse_network(settings, data, targets=[1, 2])
def test_multivariate_te_multiple_runs():
"""Test TE estimation using multiple runs on the GPU.
Test if data is correctly split over multiple runs, if the problem size
exceeds the GPU global memory and thus requires multiple runs. Using a
number of permutations of 7000 requires two runs on a GPU with global
memory of about 6 GB.
"""
data = Data()
data.generate_mute_data(n_samples=1000, n_replications=10)
settings = {
'cmi_estimator': 'OpenCLKraskovCMI',
'max_lag_sources': 3,
'min_lag_sources': 1,
'max_lag_target': 3,
'n_perm_max_stat': 7000,
'n_perm_min_stat': 7000,
'n_perm_omnibus': 21,
'n_perm_max_seq': 21} # this should be equal to the min stats b/c we
# reuse the surrogate table from the min stats
network_analysis = MultivariateTE()
network_analysis.analyse_network(settings, data, targets=[1, 2])
if __name__ == '__main__':
test_multivariate_te_mute()
test_multivariate_te_lorenz_2()
test_multivariate_te_random()
test_multivariate_te_lagged_copies()
test_multivariate_te_multiple_runs()
test_multivariate_te_corr_gaussian()
test_multivariate_te_corr_gaussian('OpenCLKraskovCMI')