-
Notifications
You must be signed in to change notification settings - Fork 0
/
Chi.py
29 lines (21 loc) · 1.22 KB
/
Chi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
#This file uses batch feature array and label vector files to detemine the batch wise Chi^2 scores of each feature
import sys
import numpy as np
import pandas as pd
from sklearn.feature_selection import chi2
#cmd line arg 1: expects integer denoting which batch file to open
#obtain m by n feature matrix where m is number of features and n is number of samples from the file "batchx" in directory "batches" with x specified by the first command line argument
print(sys.argv[1]+"Loading data...")
df = pd.read_csv("batches/batch"+sys.argv[1]+"Arr.csv", dtype=np.uint8)
X = df.to_numpy()[:,1:]
Y = np.ravel(pd.read_csv("batches/batch"+sys.argv[1]+"Labels.csv", header=None).to_numpy()) #reads batch labels in batchxLabels in directory "batches" with x specified by the first command line argument
#calculate Chi^2 scores for each feature
print(sys.argv[1]+" Done loading, starting Chi^2...")
chi, pval = chi2(X, Y)
print(sys.argv[1]+" Done Chi^2, saving scores...")
#Save Chi^2 scores for batch in folder "batches" in file batchxchi2 with x specified by the first commmand line argument
fp = open("batches/batch"+sys.argv[1]+"chi2.csv","w")
fp.write("chi2\n")
for i in range(len(chi)):
fp.write(str(chi[i])+"\n")
print(sys.argv[1]+" COMPLETE!")