-
Notifications
You must be signed in to change notification settings - Fork 1
/
loggle_test_function.R
139 lines (111 loc) · 5.75 KB
/
loggle_test_function.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# Evaluation function for loggle #######################################################################################
########################################################################################################################
# Input ###
# pos: indices of time points where graphs are estimated
# Omega: a list of precision matrices or adjacency matrices across time points specified by pos
# Omega.true: a list of true precision matrices across all time points
# X: a p by N data matrix containing observations on a time grid ranging from 0 to 1
# Output ###
# FDR: mean and standard deviation of false discovery rates across time points specified by pos
# power: mean and standard deviation of powers across time points specified by pos
# F1: mean of F1 scores across time points specified by pos
# KLdis: mean and standard deviation of KL distance across time points specified by pos
loggle.eval <- function(pos, Omega, Omega.true, X) {
K <- length(pos)
p <- dim(X)[1]
tri.ind <- lower.tri(Omega.true[[1]])
edge.true <- rep(NA, K)
edge <- rep(NA, K)
overlap.edge <- rep(NA, K)
for(k in 1:K) {
adj.mat.true <- as.matrix(Omega.true[[pos[k]]] != 0)
adj.mat <- as.matrix(Omega[[k]] != 0)
edge.true[k] <- sum(adj.mat.true[tri.ind])
edge[k] <- sum(adj.mat[tri.ind])
overlap.edge[k] <- sum((adj.mat.true&adj.mat)[tri.ind])
}
FDR.list <- 1-overlap.edge/edge
FDR.list[is.nan(FDR.list)] <- 0
power.list <- overlap.edge/edge.true
FDR <- c(mean(FDR.list), sd(FDR.list))
power <- c(mean(power.list), sd(power.list))
F1 <- mean(2*(1-FDR.list)*power.list/(1-FDR.list+power.list))
KLdis.list <- rep(NA, K)
Omega.refit <- loggle.refit(X, pos, Omega, print.detail = FALSE)
for(k in 1:K) {
Omega.prod <- t(solve(as.matrix(Omega.true[[pos[k]]]), as.matrix(Omega.refit[[k]])))
KLdis.list[k] <- sum(diag(Omega.prod)) - log(det(Omega.prod)) - p
}
KLdis <- c(mean(KLdis.list), sd(KLdis.list))
result <- list(FDR = FDR, power = power, F1 = F1, KLdis = KLdis)
return(result)
}
# Selection function for cross validation result for method "loggle", "kernel" and "invariant" #########################
########################################################################################################################
# Input ###
# cv.result: results from loggle.cv
# select.type: "all_flexible": optimal d and lambda can vary across time points specified by pos,
# "d_fixed": optimal d is fixed and optimal lambda can vary across time points specified by pos,
# "all_fixed": optimal d and lambda are fixed across time points specified by pos
# cv.vote.thres: an edge is kept after cv.vote if and only if it exists in no less than cv.vote.thres*cv.fold cv folds
# method: "loggle": loggle method,
# "kernel": Zhou's method,
# "invariant": Wang's method
# Output ###
# h.opt: optimal value of h
# d.opt: a vector of optimal values of d for each estimated graph
# lambda.opt: a vector of optimal values of lambda for each estimated graph
# cv.score.opt: optimal cv score (averaged over time points and cv folds)
# edge.num.opt: a vector of numbers of edges for each estimated graph
# edge.opt: a list of edges for each estimated graph
# adj.mat.opt: a list of adjacency matrices for each estimated graph
loggle.cv.select.full <- function(cv.result, select.type = "all_flexible", cv.vote.thres = 0.8, method = "loggle") {
d.list <- as.numeric(colnames(cv.result$cv.result.h[[1]]$cv.score))
H <- length(cv.result$cv.result.h)
cv.fold <- length(cv.result$cv.result.h[[1]]$cv.result.fold)
if(method %in% c("kernel", "invariant")) {
d.pos <- ifelse(method == "kernel", which(d.list == 0), which(d.list == 1))
for(h in 1:H) {
cv.result$cv.result.h[[h]]$cv.score <- cv.result$cv.result.h[[h]]$cv.score[ , d.pos, , , drop=F]
for(i in 1:cv.fold) {
cv.result$cv.result.h[[h]]$cv.result.fold[[i]]$Omega <-
cv.result$cv.result.h[[h]]$cv.result.fold[[i]]$Omega[ , d.pos, , drop=F]
}
}
} else if(method != "loggle") {
stop("method must be 'loggle', 'kernel' or 'invariant'!")
}
cv.select.result <- loggle.cv.select(cv.result, select.type, cv.vote.thres)
return(cv.select.result)
}
# Summary matrix containing number of within-sector and cross-sector edges #############################################
########################################################################################################################
# Input ###
# select.result: result from loggle.cv.select.full
# sp.num: number of stocks within each sector
# h: bandwidth in kernel smoothed edge number
# Output ###
# edge.sector: row 1 to row length(sp.num): within-sector edge number at each time point for each sector
# row length(sp.num)+1: cross-sector edge number at each time point
# row length(sp.num)+2: total edge number at each time point
# edge.sector.sm: smoothed edge.sector across time points with kernel bandwidth h
loggle.sector <- function(select.result, sp.num, h) {
K <- length(select.result$edge.num.opt)
sector.num <- length(sp.num)
sp.ind <- c(0, cumsum(sp.num))
edge.sector <- matrix(NA, sector.num+2, K)
edge.sector.sm <- matrix(NA, sector.num+2, K)
for(i in 1:sector.num) {
edge.sector[i, ] <- sapply(1:K, function(k)
(sum(select.result$adj.mat.opt[[k]][(sp.ind[i]+1):sp.ind[i+1],(sp.ind[i]+1):sp.ind[i+1]])-sp.num[i])/2)
}
edge.sector[sector.num+2, ] <- select.result$edge.num.opt
edge.sector[sector.num+1, ] <- edge.sector[sector.num+2, ] - colSums(edge.sector[1:sector.num, ])
for(k in 1:K) {
Kh <- pmax(3/4*(1-((1:K-k)/((K-1)*h))^2), 0)
omega <- Kh/sum(Kh)
edge.sector.sm[, k] <- edge.sector %*% omega
}
result <- list(edge.sector = edge.sector, edge.sector.sm = edge.sector.sm)
return(result)
}