/
KMeans.py
33 lines (27 loc) · 830 Bytes
/
KMeans.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from sklearn.cluster import KMeans
import numpy as np
from numpy import array
NUM_CLASS = 16
train_X = []
read_feature = open("train_feature.txt", "r")
lines = read_feature.readlines()
print(len(lines))
for line in lines:
line_split = line.split(" ")
line_split = [float(i) for i in line_split]
train_X.append(line_split)
train_X = array(train_X)
kmeans = KMeans(n_clusters=NUM_CLASS, random_state=0).fit(train_X)
labels = kmeans.labels_
label_f = open('cluster_label.txt', 'w+')
np.savetxt(label_f, labels, fmt='%d')
# topk closest points of controid
count = 16
topk = 11
indexs = np.zeros((count,topk), dtype=int)
for j in range(count):
d = kmeans.transform(train_X)[:, j]
ind = np.argsort(d)[::][:topk]
indexs[j] = ind
closed_f = open('closed_index.txt', 'w+')
np.savetxt(closed_f, indexs, fmt='%d')