/
calculate_overall_score_forecast.py
104 lines (96 loc) · 3.53 KB
/
calculate_overall_score_forecast.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import pandas as pd
import torch
from generate_parameters_bayesian import load_obj
from KFT.job_utils import *
from matplotlib import pyplot as plt
font_size = 30
plt.rcParams['font.size'] = font_size
plt.rcParams['legend.fontsize'] = font_size
plt.rcParams['axes.labelsize'] = font_size
plt.rcParams['figure.figsize'] = 15, 7.5
plt.rcParams['axes.titlesize'] = 35
plt.rcParams['xtick.labelsize'] = 25
plt.rcParams['ytick.labelsize'] = 25
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 25)
# folder_2 = "jobs_CCDS_side_info_WLR_2"
folder_2 = "jobs_traffic_3"
folder = f"{folder_2}_results"
def get_best(df,fold_idx,sort_on):
subdf = df[df['fold']==fold_idx].sort_values(sort_on,ascending=True)
best = subdf.iloc[0,:]
return best['job_ind'],best['file_name']
def get_errors_np(preds_cat,Y_cat):
diff = preds_cat-Y_cat
yabs_mean = np.abs(Y_cat).mean()
mean_abs_err = np.abs(diff).mean()
mse = np.mean(diff**2)
NRSME = mse**0.5/yabs_mean
ND = mean_abs_err/yabs_mean
print('RMSE',mse**0.5)
print('NRMSE',NRSME) #THIS IS OK ALREADY
print('ND',ND)
return mse**0.5,NRSME,ND
def get_exact_preds(j_tmp,job_ind,sort_on,i):
d = pd.read_csv(f'{folder}/{job_ind}/test_df.csv', index_col=0)
print(d)
d = d.sort_values(sort_on)
best_ind = d.index.values.astype(int)[0]
j_tmp.load_dumped_model(best_ind + 1)
j_tmp.model.turn_on_all()
j_tmp.model.to(j_tmp.device)
j_tmp.init_dataloader(0.01)
j_tmp.dataloader.dataset.set_data(i)
j_tmp.dataloader.dataset.set_mode('test')
with torch.no_grad():
total_loss, Y, y_preds, Xs = j_tmp.get_preds()
y_preds, Y = j_tmp.inverse_transform(y_preds, Y)
rmse,nrsme,ND = get_errors_np(y_preds, Y)
return y_preds,Y,rmse,nrsme,ND,Xs
def forecast_plot_traffic(X, Y, preds, fold_idx):
X_cpu = X.cpu().numpy()
preds = preds
slice_indices= [0,1,100,236,45]
mask = np.isin(X_cpu[:,1],slice_indices)
y_true = Y[mask]
preds_ = preds[mask]
x_subset = X_cpu[mask,:]
df = pd.DataFrame(np.concatenate([x_subset,y_true[:,np.newaxis],preds_[:,np.newaxis]],axis=1))
df = df.sort_values(by=[1, 0])
for i in slice_indices:
subset = df[df[1]==i]
plt.plot(subset[0],subset[2],'-.',label='True values',color='b')
plt.plot(subset[0],subset[3],'-',label='Forecasts',color='b')
plt.title(f'Series index {i}')
plt.xlabel('Time index')
plt.ylabel('Value')
plt.legend()
plt.savefig(f'traffic_forecast_{fold_idx}_{i}.png',bbox_inches = 'tight',
pad_inches = 0.1)
plt.clf()
if __name__ == '__main__':
sort_on = 'RMSE'
folds_nr = 5
df = pd.read_csv(f"analysis_{folder_2}.csv",index_col=0)
Y_cat = []
preds_cat = []
key_init = load_obj('job_0.pkl', f"{folder_2}/")
devices = GPUtil.getAvailable(order='memory', limit=1)
print(key_init)
j_tmp = job_object(key_init)
df_metrics = []
X = []
for i in range(folds_nr):
job_ind,file_name = get_best(df,i,sort_on)
key_load = load_obj(file_name,f"{folder_2}/")
j_tmp.save_path = f'{folder}/{job_ind}'
y_preds,Y,rmse,nrsme,ND,Xs = get_exact_preds(j_tmp,job_ind,sort_on,i)
forecast_plot_traffic(Xs, Y, y_preds,i)
Y_cat.append(Y)
preds_cat.append(y_preds)
df_metrics.append([rmse,nrsme,ND])
preds_cat=np.concatenate(preds_cat)
Y_cat = np.concatenate(Y_cat)
get_errors_np(preds_cat, Y_cat)
df_metrics = pd.DataFrame(df_metrics,columns=['RSME','NRSME','ND'])
print(df_metrics.describe())