-
Notifications
You must be signed in to change notification settings - Fork 0
/
MI.py
executable file
·29 lines (21 loc) · 1.25 KB
/
MI.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
#This file uses batch feature array and label vector files to detemine batch wise Mutual Information scores of each feature
import sys
import numpy as np
import pandas as pd
from sklearn.feature_selection import mutual_info_classif
#cmd line arg 1: expects integer denoting which batch file to open
#obtain m by n feature matrix where m is number of features and n is number of samples from the file "batchx" in directory "batches" with x specified by the first command line argument
print(sys.argv[1]+"Loading data...")
df = pd.read_csv("batches/batch"+sys.argv[1]+"Arr.csv", dtype=np.uint8)
X = df.to_numpy()[:,1:]
Y = np.ravel(pd.read_csv("batches/batch"+sys.argv[1]+"Labels.csv", header=None).to_numpy()) #reads batch labels in file "batchxLabels" in directory "batches" with x specified by the first command line argument
#calculate MI scores for each feature
print(sys.argv[1]+" Done loading, starting MI...")
MI = mutual_info_classif(X, Y)
print(sys.argv[1]+" Done MI, saving scores...")
#Save Chi^2 scores for batch in folder "batches" in file batchxchi2 with x specified by the first commmand line argument
fp = open("batches/batch"+sys.argv[1]+"MI.csv","w")
fp.write("MI\n")
for i in range(len(MI)):
fp.write(str(MI[i])+"\n")
print(sys.argv[1]+" COMPLETE!")