-
Notifications
You must be signed in to change notification settings - Fork 0
/
randFor.py
executable file
·52 lines (46 loc) · 1.65 KB
/
randFor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.model_selection import cross_validate as cv
from sklearn.model_selection import cross_val_predict as cvp
from sklearn.model_selection import GridSearchCV as GScv
from sklearn.metrics import confusion_matrix as cm
from sklearn.metrics import balanced_accuracy_score as bac
from sklearn.metrics.scorer import make_scorer
from joblib import dump, load
import pandas as pd
import numpy as np
def custom_scorer(Y, Y_pred): #scorere that prints both balanced accuracy and confusion matrix
score = bac(Y, Y_pred)
print(score)
print(cm(Y, Y_pred))
return score
cust_scorer = make_scorer(custom_scorer, greater_is_better=True)
print("Loading data...")
X = pd.read_csv("MIArr.csv", header=None, dtype=int).to_numpy(dtype=int)
Y = pd.read_csv("MILabels.csv", header=None, dtype=int).to_numpy(dtype=int)
print("Data loaded!")
#declare random forest and run gridsearch with cross validation
clf = RFC(class_weight="balanced", n_jobs=-1)
print("\nPreforming Grid Search...")
parameters = {'min_samples_leaf':[0.0001, 0.001, 0.01], 'n_estimators': [50,60,70,80,90,100,125,150,200,300,500]}
gs = GScv(clf, parameters, cv=5, n_jobs=-1, verbose=1, scoring=cust_scorer)
gs.fit(X, Y)
print("\nBest params: ",gs.best_params_)
print("\nBest Score:",gs.best_score_)
clf = gs.best_estimator_
#print average and max tree height
max = 0
sumDepth = 0
k = 0
for j in clf.estimators_:
depth = j.tree_.max_depth
sumDepth+=depth
k += 1
if depth > max:
max = depth
print("Average depth:",sumDepth/k)
print("Max depth:",max)
#Save random forest
fname = "RF_CLF.joblib"
print("Saving classifier to "+fname)
dump(clf, fname)
print("DONE")