/
eval_dets.lua
188 lines (162 loc) · 6.77 KB
/
eval_dets.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
require 'torch'
require 'nn'
require 'nngraph'
require 'misc.DetsLoader'
require 'misc.LanguageModel'
require 'misc.modules.SplitEmbedding'
require 'misc.modules.SplitGeneration'
local utils = require 'misc.utils'
local net_utils = require 'misc.net_utils'
local eval_utils = require 'misc.eval_utils'
-------------------------------------------------------------------------------
-- Input arguments and options
-------------------------------------------------------------------------------
cmd = torch.CmdLine()
cmd:text()
cmd:text('Evaluate Referring Expression Comprehension')
cmd:text()
-- Input paths
cmd:option('-dataset', 'refcoco_unc', 'name of our dataset+splitBy')
cmd:option('-id', '', 'model id to be evaluated')
cmd:option('-mode', 0, '0: use lm, 1: use embedding, 2: ensemble')
cmd:option('-lambda', 0.2, 'weight on lm for ensemble')
-- Test on what split
cmd:option('-split', 'testA', 'what split to use: val|test|train')
-- misc
cmd:option('-gpuid', 0, 'which gpu to use. -1 = use CPU')
cmd:text()
-------------------------------------------------------------------------------
-- Basic Torch initializations
-------------------------------------------------------------------------------
-- For CPU
local opt = cmd:parse(arg)
torch.setdefaulttensortype('torch.FloatTensor')
-- For GPU
if opt.gpuid >= 0 then
require 'cutorch'
require 'cunn'
require 'cudnn'
cutorch.setDevice(opt.gpuid + 1) -- note +1 because lua is 1-indexed
end
print(opt)
-------------------------------------------------------------------------------
-- Create the Data Loader instance
-------------------------------------------------------------------------------
local data_json = 'cache/prepro/' .. opt.dataset .. '/data.json'
local data_h5 = 'cache/prepro/' .. opt.dataset .. '/data.h5'
local dets_json = 'cache/prepro/' .. opt.dataset .. '/dets.json'
local loader = DetsLoader{data_json = data_json, data_h5 = data_h5, dets_json = dets_json}
-- also load extracted features: call scripts/extract_xxx_feats before training!
local feats_dir = 'cache/feats/' .. opt.dataset
local featsOpt = { ann = feats_dir .. '/ann_feats.h5',
img = feats_dir .. '/img_feats.h5',
det = feats_dir .. '/det_feats.h5',
window2 = feats_dir .. '/window2_feats.h5',
window3 = feats_dir .. '/window3_feats.h5',
window4 = feats_dir .. '/window4_feats.h5',
window5 = feats_dir .. '/window5_feats.h5' }
loader:loadFeats(featsOpt)
-------------------------------------------------------------------------------
-- Load the model checkpoint to evaluate
-------------------------------------------------------------------------------
assert(string.len(opt.dataset) > 0 and string.len(opt.id) > 0, 'must provide dataset name and model id')
local model_path = path.join('models', opt.dataset, 'model_id' .. opt.id .. '.t7')
local checkpoint = torch.load(model_path)
local protos = checkpoint.protos
-- override and collect parameters
if opt.batch_size == 0 then opt.batch_size = checkpoint.opt.batch_size end
local fetch = {'use_context', 'use_ann', 'use_location', 'margin', 'dif_ann', 'dif_location', 'dif_num',
'dif_source', 'dif_pool'}
for k, v in pairs(fetch) do opt[v] = checkpoint.opt[v] end
-------------------------------------------------------------------------------
-- Evaluation fun(ction)
-------------------------------------------------------------------------------
-- set mode
for k, v in pairs(protos) do
print('protos has ' .. k)
if opt.gpuid >= 0 then v:cuda() end -- ship to GPU
v:evaluate() -- set evalute mode
end
-- initialize
loader:resetImageIterator(opt.split)
local n = 0
local loss_sum = 0
local loss_evals = 0
local accuracy = 0
local predictions = {}
-- evaluate
while true do
-- fetch data for one image
local data = loader:getImageBatch(opt.split, opt)
local image_id = data.image_id
local img_det_ids = data.img_det_ids
local sent_ids = data.sent_ids
local gd_boxs = data.gd_boxs
assert(#gd_boxs == #sent_ids)
local feats = data.feats -- {(num_anns, dim), ...}
local seqz = data.seqz -- (seq_length, num_sents)
local zseq = data.zseq -- (seq_length, num_sents)
assert(feats[1]:size(1) == #img_det_ids)
assert(seqz:size(2) == #sent_ids)
-- ship to GPU
if opt.gpuid >= 0 then
for k = 1, #feats do feats[k] = feats[k]:cuda() end
zseq = zseq:cuda()
end
-- check over each sent
local seq_length = loader:getSeqLength()
for i, sent_id in ipairs(sent_ids) do
-- expand sent_i's seq
local sent_zseq = zseq[{ {}, {i} }]:expand(seq_length, #img_det_ids)
local sent_seqz = seqz[{ {}, {i} }]:expand(seq_length, #img_det_ids)
-- forward
local vis_enc_feats = protos.vis_encoder:forward(feats)
local lang_enc_feats = protos.lang_encoder:forward(sent_zseq)
local cossim, vis_emb_feats = unpack(protos.cca_embedding:forward{vis_enc_feats, lang_enc_feats})
local vis_feats = protos.vis_combiner:forward{vis_enc_feats, vis_emb_feats}
local logprobs = protos.lm:forward{vis_feats, sent_seqz} -- (seq_length+1, #img_det_ids, vocab_size+1)
-- language ranking margin loss
local lm_scores = -computeLosses(logprobs, sent_seqz):float() -- (#img_det_ids, )
-- embedding ranking margin loss
local emb_scores = cossim:float()
-- check detected box
local scores, mode_str
assert(opt.mode==0 or opt.mode==1 or opt.mode==2)
if opt.mode == 0 then
mode_str = 'lm'
scores = lm_scores
elseif opt.mode == 1 then
mode_str = 'emb'
scores = emb_scores
else
mode_str = 'ensemble'
scores = emb_scores + opt.lambda * lm_scores
end
local _, max_ix = torch.max(scores, 1)
max_ix = max_ix[1]
local pred_det_id = img_det_ids[max_ix]
local pred_box = loader.Dets[pred_det_id]['box']
local gd_box = gd_boxs[i]
-- check IoU
local flag = -1
if utils.IoU(pred_box, gd_box) >= 0.5 then accuracy = accuracy+1; flag = 1 end
loss_evals = loss_evals + 1
-- add to predictions
local entry = {sent_id = sent_id, image_id = image_id, pred_det_id = pred_det_id,
gd_box = gd_box, pred_box = pred_box, flag = flag}
table.insert(predictions, entry)
-- print
local ix0 = data.bounds.it_pos_now - 1
local ix1 = data.bounds.it_max
print(string.format('%s-th: evaluating [%s] performance using [%s] ... image[%d/%d] sent[%d], acc=%.2f%%',
loss_evals, opt.split, mode_str, ix0, ix1, i, accuracy*100.0/loss_evals))
end
if data.bounds.wrapped then break end -- we've used up images
end
print(string.format('accuracy = %.2f%%', accuracy/loss_evals*100))
-- save results
if not utils.file_exists('cache/box') then os.execute('mkdir cache/box') end
local cache_box_dataset_dir = path.join('cache/box', opt.dataset)
if not utils.file_exists(cache_box_dataset_dir) then os.execute('mkdir ' .. cache_box_dataset_dir) end
local cache_path = path.join(cache_box_dataset_dir, 'model_id' .. opt.id .. '_' .. opt.split .. '(dets).json')
utils.write_json(cache_path, {predictions=predictions})