forked from Azure/ImageClassificationUsingCntk
-
Notifications
You must be signed in to change notification settings - Fork 0
/
helpers.py
132 lines (116 loc) · 5.11 KB
/
helpers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# -*- coding: utf-8 -*-
import sys
sys.path.append("./resources/libraries")
#sys.path.append("C:/Users/pabuehle/Desktop/PROJECTS/pythonLibrary")
from pabuehle_utilities_general_v2 import *
from pabuehle_utilities_CVbasic_v2 import *
from sklearn import svm, metrics
random.seed(0)
################################
# Shared helper functions
################################
def getModelNode(classifier):
if classifier.startswith("svm"):
node = "poolingLayer"
else:
node = []
return(node)
def getImgLabelList(imgDict, imgDir, lut = None):
imgLabelList = []
for label in imgDict.keys():
for imgFilename in imgDict[label]:
imgPath = pathJoin(imgDir, label, imgFilename) #imgDir + "/" + str(label) + "/" + imgFilename
if lut is None:
imgLabelList.append((imgPath, label))
else:
imgLabelList.append((imgPath, lut[label]))
return imgLabelList
def getSvmInput(imgDict, features, boL2Normalize, lutLabel2Id = []):
feats = []
labels = []
imgFilenames = []
for label in list(imgDict.keys()):
for imgFilename in imgDict[label]:
feat = features[label][imgFilename]
if boL2Normalize:
feat /= np.linalg.norm(feat, 2)
feats.append(np.float32(feat))
if lutLabel2Id == []:
labels.append(label)
else:
labels.append(int(lutLabel2Id[label]))
imgFilenames.append(imgFilename)
return feats, labels, imgFilenames
def runClassifier(classifier, dnnOutput, imgDict = [], lutLabel2Id = [], svmPath = [], svm_boL2Normalize = []):
# Run classifier on all known images, if not otherwise specified
if imgDict == []:
imgDict = {}
for label in list(dnnOutput.keys()):
imgDict[label] = list(dnnOutput[label].keys())
# Compute SVM classification scores
if classifier.startswith('svm'):
learner = readPickle(svmPath)
feats, gtLabels, imgFilenames = getSvmInput(imgDict, dnnOutput, svm_boL2Normalize, lutLabel2Id)
print("Evaluate SVM...")
scoresMatrix = learner.decision_function(feats)
# If binary classification problem then manually create 2nd column
# Note: scoresMatrix is of size nrImages x nrClasses
if len(scoresMatrix.shape) == 1:
scoresMatrix = [[-scoresMatrix[i],scoresMatrix[i]] for i in range(len(scoresMatrix))]
scoresMatrix = np.array(scoresMatrix)
# Get DNN classification scores
else:
gtLabels = []
scoresMatrix = []
imgFilenames = []
for label in list(imgDict.keys()):
for imgFilename in imgDict[label]:
scores = dnnOutput[label][imgFilename]
if lutLabel2Id == []:
gtLabels.append(label)
else:
gtLabels.append(int(lutLabel2Id[label]))
scoresMatrix.append(scores)
imgFilenames.append(imgFilename)
scoresMatrix = np.vstack(scoresMatrix)
return scoresMatrix, imgFilenames, gtLabels
def runClassifierOnImagePaths(classifier, dnnOutput, svmPath = [], svm_boL2Normalize = []):
dnnOutputDict = {"dummy":{} }
for i,feat in enumerate(dnnOutput):
dnnOutputDict["dummy"][str(i)] = feat
scoresMatrix, _, _ = runClassifier(classifier, dnnOutputDict, [], [], svmPath, svm_boL2Normalize)
return scoresMatrix
################################
# Script-specific helper functions
################################
## 3_refineDNN
def cntkBalanceDataset(imgLabelList):
duplicates = []
counts = collections.Counter(getColumn(imgLabelList,1))
print("Before balancing of training set:")
for item in counts.items():
print(" Class {:3}: {:5} exmples".format(*item))
# Get duplicates to balance dataset
targetCount = max(getColumn(counts.items(), 1))
while min(getColumn(counts.items(),1)) < targetCount:
for imgPath, label in imgLabelList:
if counts[label] < targetCount:
duplicates.append((imgPath, label))
counts[label] += 1
# Add duplicates to original dataset
print("After balancing: all classes now have {} images; added {} duplicates to the {} original images.".format(targetCount, len(duplicates), len(imgLabelList)))
imgLabelListDup = imgLabelList + duplicates
counts = collections.Counter(getColumn(imgLabelListDup,1))
assert(min(counts.values()) == max(counts.values()) == targetCount)
return imgLabelListDup
## 5_trainSVM
def sklearnAccuracy(learner, feats, gtLabels):
estimatedLabels = learner.predict(feats)
confusionMatrix = metrics.confusion_matrix(gtLabels, estimatedLabels)
return cmGetAccuracies(confusionMatrix)
def printFeatLabelInfo(title, feats, labels, preString = " "):
print(title)
print(preString + "Number of examples: {}".format(len(feats)))
print(preString + "Number of positive examples: {}".format(sum(np.array(labels) == 1)))
print(preString + "Number of negative examples: {}".format(sum(np.array(labels) == 0)))
print(preString + "Dimension of each example: {}".format(len(feats[0])))