-
Notifications
You must be signed in to change notification settings - Fork 1
/
temporal_analysis.jl
150 lines (128 loc) · 5.02 KB
/
temporal_analysis.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
include("algorithms.jl")
using Base.Threads
using DelimitedFiles
using FileIO
using JLD2
using Random
using Statistics
using ScikitLearn
@sk_import metrics: average_precision_score
"""
recovery_over_time(dataset::String, interval_in_days::Int64=10)
---------------------
Computes the precision at core size, area under the
using the following algorithms:
- Union of minimal verex covers (UMVC)
- Degree ordering
- Betweenness centrality score ordering
- Borgatti & Everret core scores
recovery_over_time(dataset::String, interval_in_days::Int64=10)
Input parameters:
- dataset::String: dataset name
- interval_in_days::Int64: sampling interval
stores results in output/dataset-temporal-perf-stats.mat
"""
function recovery_over_time(dataset::String, interval_in_days::Int64=10)
Random.seed!(1234) # for consistency
TD = read_temporal_data(dataset)
A = TemporalData2SimpleGraph(TD)
n = size(A, 1)
core01 = read_core(dataset, n)
core = findall(core01 .> 0)
nc = length(core)
total_days = (maximum(TD.times) - minimum(TD.times)) / 86400
increment = interval_in_days / total_days
ps = collect(increment:increment:1.0)
np = length(ps)
perf_deg_pacs = zeros(Float64, np)
perf_mvc_pacs = zeros(Float64, np)
perf_upb_pacs = zeros(Float64, np)
perf_btw_pacs = zeros(Float64, np)
perf_bev_pacs = zeros(Float64, np)
shuffled_inds = shuffle(collect(1:np))
all_deg_scores = zeros(Float64, length(core01), np)
all_mvc_scores = zeros(Float64, length(core01), np)
all_btw_scores = zeros(Float64, length(core01), np)
all_upb_scores = zeros(Float64, length(core01), np)
all_bev_scores = zeros(Float64, length(core01), np)
nedges = nnz(A) / 2
nnodes = sum(vec(sum(A, dims=2)) .> 0)
frac_edges = zeros(Float64, np)
frac_core = zeros(Float64, np)
frac_nodes = zeros(Float64, np)
#Threads.@threads for ii = 1:length(shuffled_inds)
for ii = 1:length(shuffled_inds)
i = shuffled_inds[ii]
p = ps[i]
if Threads.threadid() == 1
print("$(ii) of $(np)... \r")
flush(stdout)
end
# Collect data and scores
Ap = TemporalData2SimpleGraph(quantiled_data(TD, p), n)
deg_order, d = degree_order(Ap)
mvc_order = UMVC_order(Ap)
mvc_scores = collect(length(mvc_order):-1:1)[sortperm(mvc_order)]
btw_order, btw_scores = betweenness_order(Ap)
bev_order, bev_scores = BorgattiEverett_order(Ap)
non_iso_core = core[findall(d[core] .> 0)]
# precision @ core size
end_ind = min(nc, size(Ap, 1))
pacs(ord::Vector{Int64}) = length(intersect(ord[1:end_ind], core)) / nc
perf_deg_pacs[i] = pacs(deg_order)
perf_mvc_pacs[i] = pacs(mvc_order)
perf_btw_pacs[i] = pacs(btw_order)
perf_bev_pacs[i] = pacs(bev_order)
perf_upb_pacs[i] = length(non_iso_core) / nc
# upper bound
upb_scores = rand(Float64, n)
mval = maximum(upb_scores)
upb_scores[non_iso_core] .= mval * 10.0
deg_scores = convert(Vector{Float64}, d)
# Store data to avoid threading issues
all_deg_scores[:, i] = deg_scores
all_mvc_scores[:, i] = mvc_scores
all_btw_scores[:, i] = btw_scores
all_bev_scores[:, i] = bev_scores
all_upb_scores[:, i] = upb_scores
frac_edges[i] = (nnz(Ap) / 2) / nedges
frac_core[i] = length(non_iso_core) / nc
frac_nodes[i] = sum(vec(sum(Ap, dims=2)) .> 0) / nnodes
end
perf_deg_auprc = zeros(Float64, np)
perf_mvc_auprc = zeros(Float64, np)
perf_bev_auprc = zeros(Float64, np)
perf_upb_auprc = zeros(Float64, np)
perf_btw_auprc = zeros(Float64, np)
for i = 1:np
perf_deg_auprc[i] =
average_precision_score(core01, all_deg_scores[:, i])
perf_mvc_auprc[i] =
average_precision_score(core01, all_mvc_scores[:, i])
perf_btw_auprc[i] =
average_precision_score(core01, all_btw_scores[:, i])
perf_bev_auprc[i] =
average_precision_score(core01, all_bev_scores[:, i])
perf_upb_auprc[i] =
average_precision_score(core01, all_upb_scores[:, i])
end
save("output/$dataset-temporal-perf-stats.jld2",
Dict("ps" => ps,
"interval" => interval_in_days,
"frac_core" => frac_core,
"frac_nodes" => frac_nodes,
"frac_edges" => frac_edges,
"ncore" => nc,
"nedges" => nedges,
"nnodes" => nnodes,
"deg_auprc" => perf_deg_auprc,
"mvc_auprc" => perf_mvc_auprc,
"btw_auprc" => perf_btw_auprc,
"upb_auprc" => perf_upb_auprc,
"bev_auprc" => perf_bev_auprc,
"deg_pacs" => perf_deg_pacs,
"mvc_pacs" => perf_mvc_pacs,
"btw_pacs" => perf_btw_pacs,
"upb_pacs" => perf_upb_pacs,
"bev_pacs" => perf_bev_pacs,))
end