-
Notifications
You must be signed in to change notification settings - Fork 9
/
annotatedimage.py
251 lines (235 loc) · 10.1 KB
/
annotatedimage.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
import copy
import numpy as np
import sys
import util
from featextractor import *
from bbox import *
class AnnotatedHeatmap:
"""
Object defining an heatmap with annotations.
The public fields are:
- heatmap: nd-array.
- description: string. A textual description of what this heatmap refers to.
- type: string. the classname that generated this heatmap.
- spec: implementation-specific data, containing more information regarding
how the heatmap has been extracted.
"""
def __init__(self):
self.heatmap = None
self.description = ''
self.type = ''
self.specs = None
class AnnotatedObject:
"""
Everything regarding the annotation of an object.
The public fields are:
- label: string (i.e. 'n0001000')
- confidence: float. The confidence value associated with this object
Normally it refers to the full-image confidence. If the confidence
is not available, this field is None.
- bboxes: list of BBox objects (if any)
- heatmaps: list of AnnotatedHeatmap objects
"""
def __init__(self, label='', confidence=None):
self.label = label
self.confidence = confidence
self.bboxes = []
self.heatmaps = []
def __str__(self):
out = 'label: ' + str(self.label) + '; '
for bbox in self.bboxes:
out += str(bbox) + ' '
return out
class AnnotatedImage:
"""
All the information regarding an image.
The public fields are:
- image_jpeg: array of bytes.
An array of bytes containing the image encoded in JPEG.
Use the methods set/get_image to set/get this field.
- image_width, image_height: int. This MUST match the size of image_jpeg
- image_name: string
The unique file identifier of the image
(i.e. 'val/ILSVRC2012_val_00000001.JPEG')
- gt_objects: dictionary {'label'} -> AnnotatedObject
Note that the confidence values must be set (with any value)
if you want to use the method get_gt_label()
- pred_objects: dictionary {'name'} -> ({'label'} -> AnnotatedObject)
- crop_description: string, containing a description regarding how the image
has been generated from its original version
- segmentation_name: string, denoting the unique name of the segmentation
mask used for this image.
- stats: dictionary {'name'} -> ({'label'} -> Stats)
- features: {'feat_extractor_module'} -> data where data is
a private feature-dependent object.
"""
def __init__(self):
self.image_jpeg = ''
self.image_width = 0
self.image_height = 0
self.image_name = ''
self.gt_objects = {}
self.pred_objects = {}
self.crop_description = ''
self.segmentation_name = ''
self.stats = {}
self.features = {}
def __str__(self):
out = '{0}:[{1} x {2}]\n'.format(self.image_name, \
self.image_height, \
self.image_width)
out += 'gt_objects:\n'
for label, obj in self.gt_objects.iteritems():
out += ' ' + str(obj)
return out
def __getstate__(self):
d = dict(self.__dict__)
if 'feature_extractor_' in d:
del d['feature_extractor_']
return d
def set_image(self, img):
"""
Set the image, given a ndarray-image
"""
self.image_jpeg = util.convert_image_to_jpeg_string(img)
self.image_width = img.shape[1]
self.image_height = img.shape[0]
def get_image(self):
"""
Return a ndarray-image
"""
img = util.convert_jpeg_string_to_image(self.image_jpeg)
assert self.image_width == img.shape[1]
assert self.image_height == img.shape[0]
return img
def get_gt_label(self):
"""
Return the top-scoring (full image) gt label.
"""
label = ''
max_conf = -sys.float_info.max
for key, obj in self.gt_objects.iteritems():
assert key == obj.label
if (obj.confidence != None) and (obj.confidence > max_conf):
label = obj.label
max_conf = obj.confidence
return label
def set_stats(self):
self.stats = {}
def extend_pred_objects(self, anno, classifier):
"""
This function extend the predicted objects in self with the pred_objs
It keeps the label and confidence of self in case of collision.
"""
for eachkey in anno.pred_objects[classifier].keys():
# check existing key
if self.pred_objects[classifier].has_key(eachkey): # collision
self.pred_objects[classifier][eachkey].bboxes.extend( \
anno.pred_objects[classifier][eachkey].bboxes)
self.pred_objects[classifier][eachkey].heatmaps.extend( \
anno.pred_objects[classifier][eachkey].heatmaps)
else: # add the key
self.pred_objects[classifier][eachkey] = \
anno.pred_objects[classifier][eachkey]
def export_pred_bboxes_to_text(self, name_pred_objects, \
max_num_bboxes = sys.maxint,\
output_filtered_pred_obj = False, \
bbox_integer_coordinates = False):
"""
Export the predicted bboxes to a text representation with multi lines,
each line, with the following tab-separated fields:
<image_name image_width image_height label full_image_confidence ...
xmin ymin xmax ymax bbox_confidence>
If max_num_bboxes is set, for each image and class label
we sort the bboxes by
confidence and we export only the top-max_num_bboxes bboxes per label.
"""
assert name_pred_objects in self.pred_objects
out = ''
output_object = {}
# for each AnnotatedObject ....
for label in self.pred_objects[name_pred_objects]:
anno_object = self.pred_objects[name_pred_objects][label]
full_image_confidence = anno_object.confidence
try: # if the conf is not a number, set it to zero
full_image_confidence = float(full_image_confidence)
except:
full_image_confidence = -sys.float_info.max
# for each bbox ...
bboxes = copy.deepcopy(anno_object.bboxes)
for bb in bboxes:
try: # if the conf is not a number, set it to zero
bb.confidence = float(bb.confidence)
except:
bb.confidence = -sys.float_info.max
bboxes = sorted(bboxes, key = lambda bb: -bb.confidence)
bboxes = bboxes[0:min(max_num_bboxes, len(bboxes))]
output_object[label] = copy.deepcopy(anno_object)
output_object[label].bboxes = bboxes
for bbox in bboxes:
bbox_confidence = bbox.confidence
xmin = bbox.xmin
ymin = bbox.ymin
xmax = bbox.xmax
ymax = bbox.ymax
if bbox_integer_coordinates:
xmin = int(xmin*self.image_width)
ymin = int(ymin*self.image_height)
xmax = int(xmax*self.image_width)
ymax = int(ymax*self.image_height)
line = '{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\n'\
.format( \
self.image_name, self.image_width, self.image_height, \
anno_object.label, full_image_confidence, \
xmin, ymin, xmax, ymax, bbox_confidence)
out = out + line
if output_filtered_pred_obj:
return out, output_object
else:
return out
def extract_features(self, bboxes):
"""
It extracts the feature vectors from the given list of bbboxes
or a single bbox.
It returns a np.ndarray matrix of size [num_bboxes, num_dims].
Note that you must register a FeatureExtractor module first,
using the register_feature_extractor() method.
"""
# check input
assert hasattr(self, 'feature_extractor_') and self.feature_extractor_, \
'You must register a FeatureExtractor module'
if not isinstance(bboxes, list):
bboxes = [bboxes]
for bb in bboxes:
assert isinstance(bb, BBox)
# extract the features using the registered module
feats = self.feature_extractor_.extract(bboxes)
if self.save_features_cache_:
self.features[self.feature_extractor_.name] = \
self.feature_extractor_.get_cache()
# check the output and return
assert isinstance(feats, np.ndarray)
assert feats.shape[0] == len(bboxes)
return feats
def register_feature_extractor(self, feature_extractor_params, \
save_features_cache = False):
"""
Build and register a FeatureExtractor module, that will be
used to extract the features from the image.
The input must be a subclass of FeatureExtractorParams
If save_features is True, the cache of the feature extractor
module will be saved in the features field.
"""
# check the input
# TODO: super-hack due to a circular import.
from featextractor import *
#assert isinstance(feature_extractor_params, FeatureExtractorParams)
if not hasattr(self, 'features'):
self.features = {}
if not hasattr(self, 'feature_extractor_'):
self.feature_extractor_ = None
assert not self.feature_extractor_, 'Already present a FeatExtractor'
# register
self.feature_extractor_ = FeatureExtractor.create_feature_extractor( \
self, feature_extractor_params)
self.save_features_cache_ = save_features_cache