-
Notifications
You must be signed in to change notification settings - Fork 0
/
MCI_crossval.py
executable file
·169 lines (134 loc) · 6.46 KB
/
MCI_crossval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
# import configuration file
import config
# set random seed
from numpy.random import seed
from tensorflow import set_random_seed
seed(config.fixed_seed)
set_random_seed(config.fixed_seed)
import tensorflow as tf
import os
from keras import backend as K
# when running at local pc avoid using all GPU memory for 1 experiment
if config.location == "local":
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
sc = tf.ConfigProto()
sc.gpu_options.allow_growth = True
s = tf.Session(config = sc)
K.set_session(s)
import os
import time
import numpy as np
import sys
from shutil import copyfile
from model_selection import load_best_model
from sklearn.metrics import roc_curve, auc, classification_report
from savings import save_results, save_DL_model
from create_sets import create_labels, count_sets
from generator import DataGenerator
from plotting import plot_ROC
def main(argv):
"""
This script is an alternative version of 'main.py' which has the purpose of running a complete data set
through a pre-trained model as evaluation only. This can be used for MCI classification, since for this
task it is beneficial to use a network which is pre-trained on the AD task. For every model created in
each fold of the cross-validation of the AD task, all MCI data is used to evaluate this model. In this
way the MCI results of each fold can be averaged for the final performance.
Similar as for the 'main.py' script this script uses the configuration settings of the 'config.py' file.
Also this script saves all evaluation results in a 'results.npy' dictionary and provides a plot of the
ROC-AUC of all folds. The configuration file and model information will be saved.
"""
# start timer
start = time.time()
start_localtime = time.localtime()
# if temp job directory is provided, use this as data direction (when running on server)
if len(argv) > 1:
config.data_dir = sys.argv[1] + "/"
config.aug_dir = sys.argv[1] + "/augmented/"
# if job nr is provided, use as output dir name
if len(argv) > 2:
config.output_dir = config.all_results_dir + sys.argv[2] + "_" + config.roi \
+ "_" + config.task + "_" + config.model + config.comments + "/"
# save configuration file
create_data_directory(config.output_dir)
copyfile(config.config_file, config.output_dir + "configuration_" + config.model + ".py")
# initialization
results = {"train": {"loss": [], "acc": [], "fpr": [], "tpr": [], "auc": [], "sensitivity": [], "specificity": []},
"validation": {"loss": [], "acc": [], "fpr": [], "tpr": [], "auc": [], "sensitivity": [], "specificity": []},
"test": {"loss": [], "acc": [], "fpr": [], "tpr": [], "auc": [], "sensitivity": [], "specificity": []}}
# create labels and test set with all data
partition_labels, labels = create_labels()
partition_test = {"test": []}
X = np.concatenate((partition_labels[config.class0], partition_labels[config.class1]), axis=0)
for i in range(config.k_cross_validation):
partition_test["test"].append(X)
count_sets(partition_test, labels)
print("\n")
np.save(config.output_dir + "train_test.npy", partition_test)
# START CROSS VALIDATION
for i in range(config.k_cross_validation):
# select pre-trained model of the specified fold
fold_path = f"{config.pretrain_path}k{i}/"
print(f"Location pre-trained model fold {i}: {fold_path}")
model = load_best_model(fold_path)
if i == 0:
model.summary()
if config.pre_train and config.test_only:
print("\nNo training -> testing only!\n")
print("\n----------- CROSS VALIDATION " + str(i) + " ----------------\n")
# create results directory
results_dir = config.output_dir + "k" + str(i)
create_data_directory(results_dir)
file = open(results_dir + '/results.txt', 'w')
# get specified mean + std to standardize all data in generator
mean = np.load(f"{fold_path}mean.npy")
std = np.load(f"{fold_path}std.npy")
# create data generator
test_generator = DataGenerator(partition_test["test"][i], labels, mean, std, batch_size=1, dim=config.input_shape, n_channels=1, n_classes=2, shuffle=False)
# TEST EVALUATION
# roc auc
Y_pred = model.predict_generator(test_generator, verbose=0)
y_pred = np.argmax(Y_pred, axis=1)
y_true = []
for id in test_generator.list_IDs:
y_true.append(labels[id])
fpr, tpr, thresholds = roc_curve(y_true, Y_pred[:,1])
roc_auc = auc(fpr, tpr)
# save subject classifications (for statistical analysis)
np.save(results_dir + "/test_IDs.npy", test_generator.list_IDs)
np.save(results_dir + "/test_y_true.npy", y_true)
np.save(results_dir + "/test_y_pred.npy", y_pred)
# sen / spe
report = classification_report(y_true, y_pred, target_names=[config.class0, config.class1], output_dict=True)
# loss, acc
score = model.evaluate_generator(generator=test_generator, verbose=1)
results["test"]["loss"].append(score[0])
results["test"]["acc"].append(score[1])
results["test"]["fpr"].append(fpr)
results["test"]["tpr"].append(tpr)
results["test"]["auc"].append(roc_auc)
results["test"]["sensitivity"].append(report[config.class1]["recall"])
results["test"]["specificity"].append(report[config.class0]["recall"])
# report test results
test_results = f"\nTest\n loss: {score[0]:.4f}\n acc: {score[1]:.4f}\n AUC: {roc_auc:.4f}\n " \
f"sens: {report[config.class1]['recall']:.4f}\n spec: {report[config.class0]['recall']:.4f}\n\n"
file.write(test_results), print(test_results)
file.close()
print("\n---------------------- RESULTS ----------------------\n\n")
# plot test ROC of all folds + average
plot_ROC(results["test"]["tpr"], results["test"]["fpr"], results["test"]["auc"])
# end timer
end = time.time()
end_localtime = time.localtime()
# save results + model
np.save(config.output_dir + "results.npy", results)
save_DL_model(model)
save_results(results, start, start_localtime, end, end_localtime)
print('\nend')
def create_data_directory(path):
"""
Creates new data path if not already exists
"""
if not os.path.exists(path):
os.makedirs(path)
if __name__ == '__main__':
main(sys.argv)