Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Coco #336

Open
wants to merge 4 commits into
base: coco
Choose a base branch
from
Open

Coco #336

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Binary file added .infer.py.swp
Binary file not shown.
3 changes: 3 additions & 0 deletions README.md
Expand Up @@ -4,6 +4,9 @@ A [PyTorch](http://pytorch.org/) implementation of [Single Shot MultiBox Detecto

<img align="right" src= "https://github.com/amdegroot/ssd.pytorch/blob/master/doc/ssd.png" height = 400/>

### OK!!!!
this is my own branch

### Table of Contents
- <a href='#installation'>Installation</a>
- <a href='#datasets'>Datasets</a>
Expand Down
Binary file added data/1.jpg
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added data/2.jpg
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 1 addition & 2 deletions data/coco.py
Expand Up @@ -29,7 +29,6 @@
'refrigerator', 'book', 'clock', 'vase', 'scissors',
'teddy bear', 'hair drier', 'toothbrush')


def get_label_map(label_file):
label_map = {}
labels = open(label_file, 'r')
Expand Down Expand Up @@ -83,7 +82,7 @@ class COCODetection(data.Dataset):
in the target (bbox) and transforms it.
"""

def __init__(self, root, image_set='trainval35k', transform=None,
def __init__(self, root, image_set='train2014', transform=None,
target_transform=COCOAnnotationTransform(), dataset_name='MS COCO'):
sys.path.append(osp.join(root, COCO_API))
from pycocotools.coco import COCO
Expand Down
2 changes: 1 addition & 1 deletion data/config.py
Expand Up @@ -27,7 +27,7 @@
}

coco = {
'num_classes': 201,
'num_classes': 81,
'lr_steps': (280000, 360000, 400000),
'max_iter': 400000,
'feature_maps': [38, 19, 10, 5, 3, 1],
Expand Down
1 change: 1 addition & 0 deletions debug.sh
@@ -0,0 +1 @@
vim layers/functions/detection.py
31 changes: 31 additions & 0 deletions infer.py
@@ -0,0 +1,31 @@
#do the infer

import torch
import cv2
from ssd import build_ssd

num_classes = 81
image = cv2.imread("data/1.jpg")
weights = "weights/ssd300_COCO_10000.pth"

#cv2.imshow("fafda", image)
#cv2.waitKey()

#def infer()
def get_features_hook(self, input, output):
print("hooks ", output.data.cpu().numpy().shape)

if __name__ == '__main__':
net = build_ssd('test', 300, num_classes)
image = cv2.resize(image, (300, 300))
image = torch.Tensor(image)
image = image.permute(2, 0, 1)
image = image.unsqueeze(0)
#load weights to the net
net.load_state_dict(torch.load(weights))
output = net(image)
print(output.shape)
#get the specific layer value

# print(net)

130 changes: 65 additions & 65 deletions layers/box_utils.py
Expand Up @@ -172,68 +172,68 @@ def log_sum_exp(x):
# Original author: Francisco Massa:
# https://github.com/fmassa/object-detection.torch
# Ported to PyTorch by Max deGroot (02/01/2017)
def nms(boxes, scores, overlap=0.5, top_k=200):
"""Apply non-maximum suppression at test time to avoid detecting too many
overlapping bounding boxes for a given object.
Args:
boxes: (tensor) The location preds for the img, Shape: [num_priors,4].
scores: (tensor) The class predscores for the img, Shape:[num_priors].
overlap: (float) The overlap thresh for suppressing unnecessary boxes.
top_k: (int) The Maximum number of box preds to consider.
Return:
The indices of the kept boxes with respect to num_priors.
"""

keep = scores.new(scores.size(0)).zero_().long()
if boxes.numel() == 0:
return keep
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
area = torch.mul(x2 - x1, y2 - y1)
v, idx = scores.sort(0) # sort in ascending order
# I = I[v >= 0.01]
idx = idx[-top_k:] # indices of the top-k largest vals
xx1 = boxes.new()
yy1 = boxes.new()
xx2 = boxes.new()
yy2 = boxes.new()
w = boxes.new()
h = boxes.new()

# keep = torch.Tensor()
count = 0
while idx.numel() > 0:
i = idx[-1] # index of current largest val
# keep.append(i)
keep[count] = i
count += 1
if idx.size(0) == 1:
break
idx = idx[:-1] # remove kept element from view
# load bboxes of next highest vals
torch.index_select(x1, 0, idx, out=xx1)
torch.index_select(y1, 0, idx, out=yy1)
torch.index_select(x2, 0, idx, out=xx2)
torch.index_select(y2, 0, idx, out=yy2)
# store element-wise max with next highest score
xx1 = torch.clamp(xx1, min=x1[i])
yy1 = torch.clamp(yy1, min=y1[i])
xx2 = torch.clamp(xx2, max=x2[i])
yy2 = torch.clamp(yy2, max=y2[i])
w.resize_as_(xx2)
h.resize_as_(yy2)
w = xx2 - xx1
h = yy2 - yy1
# check sizes of xx1 and xx2.. after each iteration
w = torch.clamp(w, min=0.0)
h = torch.clamp(h, min=0.0)
inter = w*h
# IoU = i / (area(a) + area(b) - i)
rem_areas = torch.index_select(area, 0, idx) # load remaining areas)
union = (rem_areas - inter) + area[i]
IoU = inter/union # store result in iou
# keep only elements with an IoU <= overlap
idx = idx[IoU.le(overlap)]
return keep, count
#def nms(boxes, scores, overlap=0.5, top_k=200):
# """Apply non-maximum suppression at test time to avoid detecting too many
# overlapping bounding boxes for a given object.
# Args:
# boxes: (tensor) The location preds for the img, Shape: [num_priors,4].
# scores: (tensor) The class predscores for the img, Shape:[num_priors].
# overlap: (float) The overlap thresh for suppressing unnecessary boxes.
# top_k: (int) The Maximum number of box preds to consider.
# Return:
# The indices of the kept boxes with respect to num_priors.
# """
#
# keep = scores.new(scores.size(0)).zero_().long()
# if boxes.numel() == 0:
# return keep
# x1 = boxes[:, 0]
# y1 = boxes[:, 1]
# x2 = boxes[:, 2]
# y2 = boxes[:, 3]
# area = torch.mul(x2 - x1, y2 - y1)
# v, idx = scores.sort(0) # sort in ascending order
# # I = I[v >= 0.01]
# idx = idx[-top_k:] # indices of the top-k largest vals
# xx1 = boxes.new()
# yy1 = boxes.new()
# xx2 = boxes.new()
# yy2 = boxes.new()
# w = boxes.new()
# h = boxes.new()
#
# # keep = torch.Tensor()
# count = 0
# while idx.numel() > 0:
# i = idx[-1] # index of current largest val
# # keep.append(i)
# keep[count] = i
# count += 1
# if idx.size(0) == 1:
# break
# idx = idx[:-1] # remove kept element from view
# # load bboxes of next highest vals
# torch.index_select(x1, 0, idx, out=xx1)
# torch.index_select(y1, 0, idx, out=yy1)
# torch.index_select(x2, 0, idx, out=xx2)
# torch.index_select(y2, 0, idx, out=yy2)
# # store element-wise max with next highest score
# xx1 = torch.clamp(xx1, min=x1[i])
# yy1 = torch.clamp(yy1, min=y1[i])
# xx2 = torch.clamp(xx2, max=x2[i])
# yy2 = torch.clamp(yy2, max=y2[i])
# w.resize_as_(xx2)
# h.resize_as_(yy2)
# w = xx2 - xx1
# h = yy2 - yy1
# # check sizes of xx1 and xx2.. after each iteration
# w = torch.clamp(w, min=0.0)
# h = torch.clamp(h, min=0.0)
# inter = w*h
# # IoU = i / (area(a) + area(b) - i)
# rem_areas = torch.index_select(area, 0, idx) # load remaining areas)
# union = (rem_areas - inter) + area[i]
# IoU = inter/union # store result in iou
# # keep only elements with an IoU <= overlap
# idx = idx[IoU.le(overlap)]
# return keep, count
4 changes: 4 additions & 0 deletions layers/functions/1
@@ -0,0 +1,4 @@
#to do



79 changes: 56 additions & 23 deletions layers/functions/detection.py
@@ -1,8 +1,25 @@
import torch
from torch.autograd import Function
from ..box_utils import decode, nms
from ..box_utils import decode
from data import voc as cfg

class paper_box(object):
def __init__(self, index, x, y, box):
self.index = index
self.x = x
self.y = y
self.box = box
def box_iou(a, b):
if a.box[2] < b.box[0] or a.box[0] > b.box[2]:
return 0
if a.box[1] > b.box[3] or a.box[3] < b.box[1]:
return 0
width = min(a.box[2], b.box[2]) - max(a.box[0], b.box[0])
height = min(a.box[3], b.box[3]) - max(a.box[1], a.box[1])
iou = width * height
a_area = (a.box[2] - a.box[0]) * (a.box[3] - a.box[1])
b_area = (b.box[2] - b.box[0]) * (b.box[3] - b.box[1])
return (iou / (a_area + b_area - iou))

class Detect(Function):
"""At test time, Detect is the final layer of SSD. Decode location preds,
Expand Down Expand Up @@ -36,27 +53,43 @@ def forward(self, loc_data, conf_data, prior_data):
output = torch.zeros(num, self.num_classes, self.top_k, 5)
conf_preds = conf_data.view(num, num_priors,
self.num_classes).transpose(2, 1)
#next we will specific the exact layer and its output
#we get the all predicted boxes and its confidence
decoded_boxes = decode(loc_data[0], prior_data, self.variance)
conf_data = conf_data[0]
loc_data = loc_data[0]
all_boxes = torch.cat((decoded_boxes, conf_data), 1)
# for i in range(self.num_classes):
# index = []
# for j in range(len(loc_data)):
# index.append(j)
# #in the specific class, we will do something specifical
# for j in range(len(loc_data)):
# for k in range(len(loc_data) - j):
# if conf_data[j][i] < conf_data[k][i]:
# index[j] =
return all_boxes

# Decode predictions into bboxes.
for i in range(num):
decoded_boxes = decode(loc_data[i], prior_data, self.variance)
# For each class, perform nms
conf_scores = conf_preds[i].clone()

for cl in range(1, self.num_classes):
c_mask = conf_scores[cl].gt(self.conf_thresh)
scores = conf_scores[cl][c_mask]
if scores.dim() == 0:
continue
l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
boxes = decoded_boxes[l_mask].view(-1, 4)
# idx of highest scoring and non-overlapping boxes per class
ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)
output[i, cl, :count] = \
torch.cat((scores[ids[:count]].unsqueeze(1),
boxes[ids[:count]]), 1)
flt = output.contiguous().view(num, -1, 5)
_, idx = flt[:, :, 0].sort(1, descending=True)
_, rank = idx.sort(1)
flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0)
return output
# for i in range(num):
# decoded_boxes = decode(loc_data[i], prior_data, self.variance)
# # For each class, perform nms
# conf_scores = conf_preds[i].clone()
#
# for cl in range(1, self.num_classes):
# c_mask = conf_scores[cl].gt(self.conf_thresh)
# scores = conf_scores[cl][c_mask]
# if scores.size(0) == 0:
# continue
# l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
# boxes = decoded_boxes[l_mask].view(-1, 4)
# # idx of highest scoring and non-overlapping boxes per class
# ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)
# output[i, cl, :count] = \
# torch.cat((scores[ids[:count]].unsqueeze(1),
# boxes[ids[:count]]), 1)
# flt = output.contiguous().view(num, -1, 5)
# _, idx = flt[:, :, 0].sort(1, descending=True)
# _, rank = idx.sort(1)
# flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0)
# return output
7 changes: 7 additions & 0 deletions layers/functions/nms.py
@@ -0,0 +1,7 @@
#to do

def nms(all_boxes):
return all_boxes



7 changes: 7 additions & 0 deletions layers/functions/!
@@ -0,0 +1,7 @@
#to do

def nms:
return 0



6 changes: 4 additions & 2 deletions layers/modules/multibox_loss.py
Expand Up @@ -94,8 +94,8 @@ def forward(self, predictions, targets):
loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1))

# Hard Negative Mining
loss_c[pos] = 0 # filter out pos boxes for now
loss_c = loss_c.view(num, -1)
loss_c[pos] = 0 # filter out pos boxes for now
_, loss_idx = loss_c.sort(1, descending=True)
_, idx_rank = loss_idx.sort(1)
num_pos = pos.long().sum(1, keepdim=True)
Expand All @@ -111,7 +111,9 @@ def forward(self, predictions, targets):

# Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N

N = num_pos.data.sum()
N = num_pos.data.sum().double()
loss_l = loss_l.double()
loss_c = loss_c.double()
loss_l /= N
loss_c /= N
return loss_l, loss_c
16 changes: 10 additions & 6 deletions train.py
Expand Up @@ -150,19 +150,23 @@ def train():
batch_iterator = iter(data_loader)
for iteration in range(args.start_iter, cfg['max_iter']):
if args.visdom and iteration != 0 and (iteration % epoch_size == 0):
epoch += 1
update_vis_plot(epoch, loc_loss, conf_loss, epoch_plot, None,
'append', epoch_size)
# reset epoch loss counters
loc_loss = 0
conf_loss = 0
epoch += 1

if iteration in cfg['lr_steps']:
step_index += 1
adjust_learning_rate(optimizer, args.gamma, step_index)

# load train data
images, targets = next(batch_iterator)
try:
images, targets = next(batch_iterator)
except StopIteration:
batch_iterator = iter(data_loader)
images, targets = next(batch_iterator)

if args.cuda:
images = Variable(images.cuda())
Expand All @@ -180,15 +184,15 @@ def train():
loss.backward()
optimizer.step()
t1 = time.time()
loc_loss += loss_l.data[0]
conf_loss += loss_c.data[0]
loc_loss += loss_l.data
conf_loss += loss_c.data

if iteration % 10 == 0:
print('timer: %.4f sec.' % (t1 - t0))
print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.data[0]), end=' ')
print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.data), end=' ')

if args.visdom:
update_vis_plot(iteration, loss_l.data[0], loss_c.data[0],
update_vis_plot(iteration, loss_l.data, loss_c.data,
iter_plot, epoch_plot, 'append')

if iteration != 0 and iteration % 5000 == 0:
Expand Down