/
cross_validation.py
98 lines (84 loc) · 3.5 KB
/
cross_validation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
def cross_validate_clf(design_matrix, labels, classifier, cv_folds):
""" Perform a cross-validation and returns the predictions.
Parameters:
-----------
design_matrix: (n_samples, n_features) np.array
Design matrix for the experiment.
labels: (n_samples, ) np.array
Vector of labels.
classifier: sklearn classifier object
Classifier instance; must have the following methods:
- fit(X, y) to train the classifier on the data X, y
- predict_proba(X) to apply the trained classifier to the data X and return probability estimates
cv_folds: sklearn cross-validation object
Cross-validation iterator.
Return:
-------
pred: (n_samples, ) np.array
Vectors of predictions (same order as labels).
"""
import numpy as np
pred = np.zeros(labels.shape)
for tr, te in cv_folds:
classifier.fit(design_matrix[tr,:], labels[tr])
pos_idx = list(classifier.classes_).index(1)
pred[te] = (classifier.predict_proba(design_matrix[te,:]))[:, pos_idx]
return pred
def cross_validate_clf_optimize(design_matrix, labels, classifier, cv_folds):
""" Perform a cross-validation and returns the predictions.
Parameters:
-----------
design_matrix: (n_samples, n_features) np.array
Design matrix for the experiment.
labels: (n_samples, ) np.array
Vector of labels.
classifier: sklearn classifier object
Classifier instance; must have the following methods:
- fit(X, y) to train the classifier on the data X, y
- predict_proba(X) to apply the trained classifier to the data X and return probability estimates
cv_folds: sklearn cross-validation object
Cross-validation iterator.
Return:
-------
pred: (n_samples, ) np.array
Vectors of predictions (same order as labels).
"""
import numpy as np
pred = np.zeros(labels.shape)
for tr, te in cv_folds:
classifier.fit(design_matrix[tr,:], labels[tr])
print(classifier.best_params_)
pos_idx = list(classifier.best_estimator_.classes_).index(1)
pred[te] = (classifier.predict_proba(design_matrix[te,:]))[:, pos_idx]
return pred
def cross_validate(design_matrix, labels, regressor, cv_folds):
""" Perform a cross-validation and returns the predictions.
Use a scaler to scale the features to mean 0, standard deviation 1.
Parameters:
-----------
design_matrix: (n_samples, n_features) np.array
Design matrix for the experiment.
labels: (n_samples, ) np.array
Vector of labels.
classifier: Regressor instance; must have the following methods:
- fit(X, y) to train the regressor on the data X, y
- predict_proba(X) to apply the trained regressor to the data X and return predicted values
cv_folds: sklearn cross-validation object
Cross-validation iterator.
Return:
-------
pred: (n_samples, ) np.array
Vectors of predictions (same order as labels).
"""
import numpy as np
from sklearn.preprocessing import StandardScaler
n_classes = np.unique(labels).size
pred = np.zeros((labels.shape[0], n_classes))
for tr, te in cv_folds:
scaler = StandardScaler()
Xtr = scaler.fit_transform(design_matrix[tr,:])
ytr = labels[tr]
Xte = scaler.transform(design_matrix[te,:])
regressor.fit(Xtr, ytr)
pred[te, :] = regressor.predict_proba(Xte)
return pred