-
Notifications
You must be signed in to change notification settings - Fork 2
/
Train.m
178 lines (151 loc) · 6.11 KB
/
Train.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
% Online Cyber-Attack Detection using Model-free RL for POMDPs
% Learning Phase (Algorithm 1)
% System (Smart Grid) Parameters
K = 23; N = 13;
sigvsq = 1e-4; sigwsq = 2e-4;
load H;
load A;
load new_angles;
I=4; % number of quantization intervals
betas = [0 0.95e-2 1.05e-2 1.15e-2]; % quantization thresholds
M = 4; % size of the finite history (memory) window
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
c = 0.2; % relative cost, tradeoff parameter for ADD vs PFA (or FAP)
Q = zeros(I^M,2); % Q-table
no_episodes = 4e5; % number of training episodes (need to be chosen higher for better learning)
max_time = 200; % max. duration of an episode (if "stop" is chosen, then episode is terminated)
settle_time = 20;
attack_time = settle_time; % choose the attack launch time
% attack_time = max_time - 100; % choose the attack launch time
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
alpha = 0.1;
eps = 0.1;
level_eps = norminv(1-eps);
load ind_mat_v2;
tic;
for i = 1:no_episodes
i
t = 1;
a = 2; % initial action (stop: a=1, continue: a=2)
P = eye(N);
G = zeros(N,K);
x = new_angles; % initial state parameters obtained in Matpower
hx0_m = new_angles; % initialization of the state estimates
% For Kalman filter convergence (during normal operating conditions)
while (t <= settle_time)
% State update at time t
v = mvnrnd(zeros(N,1),sigvsq*eye(N))';
x = A*x + v;
% Collect the measurements at time t
w = mvnrnd(zeros(K,1),sigwsq*eye(K))';
y = H*x + w;
% Prediction Step of Kalman Filter
hx0_p = A*hx0_m;
P = A*P*A' + sigvsq*eye(N);
% Measurement Update Step of Kalman Filter
G = P*H'/(H*P*H'+(sigwsq)*eye(K));
hx0_m = hx0_p + G*(y-H*hx0_p);
P = P - G*H*P;
% Compute quantized measurements
eta_t = (y - H*hx0_m)'*(y - H*hx0_m); % negative log-scaled likelihood
if (eta_t >= betas(4))
recent = 4;
elseif ((eta_t >= betas(3)) && (eta_t < betas(4)))
recent = 3;
elseif ((eta_t >= betas(2)) && (eta_t < betas(3)))
recent = 2;
elseif ((eta_t >= betas(1)) && (eta_t < betas(2)))
recent = 1;
end
if (t == settle_time)
quant_now = recent; % (initial) most recent sliding window of size 4
elseif (t == settle_time-1)
quant_1 = recent;
elseif (t == settle_time-2)
quant_2 = recent;
elseif (t == settle_time-3)
quant_3 = recent;
end
% Time update
t = t+1;
end
tmp3 = quant_3; tmp2 = quant_2; tmp1 = quant_1; tmp_now = quant_now;
rslt = strcat(num2str(tmp3),num2str(tmp2),num2str(tmp1),num2str(tmp_now));
index = str2double(rslt);
o = ind_mat(index); % initial o
terminal = 0;
while (settle_time < t && t < max_time && terminal == 0)
% State update at time t
v = mvnrnd(zeros(N,1),sigvsq*eye(N))';
x = A*x + v;
% Collect the measurements at time t
w = mvnrnd(zeros(K,1),sigwsq*eye(K))';
% Attack parameters: train the agent so that it can detect low-magnitude hybrid attacks (choose desired minimum levels for detection purposes)
ddd = randn(1); sgn = 1*(ddd>0) - 1*(ddd<=0);
fdata = sgn*(0.02 + 0.04*rand(K,1)); % false data
ss = (randn(1) > 0)*2e-4; % with prob. 0.5, jamming occurs
sigma = (ss+ss*rand(K,1)); % jamming noise variance
% tmp_mat = randn(K,K);
% rand_mat = abs(tmp_mat*tmp_mat'); % correlation terms
% rand_mat = diag(sigma) + rand_mat - diag(diag(rand_mat)); % variance terms
noise = mvnrnd(zeros(K,1),diag(sigma))'; % correlated (over space) jamming noise
y = H*x + w + (fdata+noise)*(t >= attack_time);
% Prediction Step of Kalman Filter
hx0_p = A*hx0_m;
P = A*P*A' + sigvsq*eye(N);
% Measurement Update Step of Kalman Filter
G = P*H'/(H*P*H'+(sigwsq)*eye(K));
hx0_m = hx0_p + G*(y-H*hx0_p);
P = P - G*H*P;
% Compute quantized measurements
eta_t = (y - H*hx0_m)'*(y - H*hx0_m);
if (eta_t >= betas(4))
recent = 4;
elseif ((eta_t >= betas(3)) && (eta_t < betas(4)))
recent = 3;
elseif ((eta_t >= betas(2)) && (eta_t < betas(3)))
recent = 2;
elseif ((eta_t >= betas(1)) && (eta_t < betas(2)))
recent = 1;
end
% Update on the sliding window
quant_3 = quant_2;
quant_2 = quant_1;
quant_1 = quant_now;
quant_now = recent;
% Determine the new observation
tmp3 = quant_3; tmp2 = quant_2; tmp1 = quant_1; tmp_now = quant_now;
rslt = strcat(num2str(tmp3),num2str(tmp2),num2str(tmp1),num2str(tmp_now));
index = str2double(rslt);
o_new = ind_mat(index);
if ((t < attack_time) && (a == 1))
r = 1;
elseif ((t >= attack_time) && (a == 1))
r = 0;
elseif ((t >= attack_time) && (a == 2))
r = c;
elseif ((t < attack_time) && (a == 2))
r = 0;
end
% Determine the next action based on the new observation and the epsilon-greedy strategy
if (randn(1) < level_eps)
[tmp,a_new] = min(Q(o_new,:));
else
a_new = randi(2);
end
if (a == 1)
terminal = 1;
delta = r - Q(o,a);
elseif (a == 2)
delta = r + Q(o_new,a_new) - Q(o,a);
end
Q(o,a) = Q(o,a) + alpha*delta;
% Updates for the next time
o = o_new;
a = a_new;
t = t + 1;
end
end
toc;
elapsed_training_time = toc;
save('Q_c0p2_new_sarsa','Q');