Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Subt code add #527

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion data/__init__.py
@@ -1,5 +1,5 @@
from .voc0712 import VOCDetection, VOCAnnotationTransform, VOC_CLASSES, VOC_ROOT

from .subt_artifact import SUBTDetection, SUBTAnnotationTransform, SUBT_CLASSES, SUBT_ROOT
from .coco import COCODetection, COCOAnnotationTransform, COCO_CLASSES, COCO_ROOT, get_label_map
from .config import *
import torch
Expand Down
4 changes: 3 additions & 1 deletion data/coco.py
Expand Up @@ -8,7 +8,9 @@
import cv2
import numpy as np

COCO_ROOT = osp.join(HOME, 'data/coco/')
# COCO_ROOT = osp.join(HOME, 'data/coco/')
# COCO_ROOT = osp.join(HOME, '/content/ssd.pytorch/data/')
COCO_ROOT = osp.join(HOME, 'data/')
IMAGES = 'images'
ANNOTATIONS = 'annotations'
COCO_API = 'PythonAPI'
Expand Down
18 changes: 17 additions & 1 deletion data/config.py
Expand Up @@ -2,7 +2,8 @@
import os.path

# gets home dir cross platform
HOME = os.path.expanduser("~")
# HOME = os.path.expanduser("~")
HOME = '/content/ssd.pytorch/'

# for making bounding boxes pretty
COLORS = ((255, 0, 0, 128), (0, 255, 0, 128), (0, 0, 255, 128),
Expand Down Expand Up @@ -40,3 +41,18 @@
'clip': True,
'name': 'COCO',
}

SubT = {
'num_classes': 6,
'lr_steps': (4000, 8000, 12000),
'max_iter': 7500,
'feature_maps': [38, 19, 10, 5, 3, 1],
'min_dim': 300,
'steps': [8, 16, 32, 64, 100, 300],
'min_sizes': [21, 45, 99, 153, 207, 261],
'max_sizes': [45, 99, 153, 207, 261, 315],
'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
'variance': [0.1, 0.2],
'clip': True,
'name': 'SubT',
}
6 changes: 3 additions & 3 deletions data/scripts/VOC2007.sh
Expand Up @@ -7,9 +7,9 @@ start=`date +%s`
if [ -z "$1" ]
then
# navigate to ~/data
echo "navigating to ~/data/ ..."
mkdir -p ~/data
cd ~/data/
echo "navigating to /content/ssd.pytorch/data ..."
# mkdir -p /content/data
cd /content/ssd.pytorch/data
else
# check if is valid directory
if [ ! -d $1 ]; then
Expand Down
202 changes: 202 additions & 0 deletions data/subt_artifact.py
@@ -0,0 +1,202 @@
"""VOC Dataset Classes

Original author: Francisco Massa
https://github.com/fmassa/vision/blob/voc_dataset/torchvision/datasets/voc.py

Updated by: Ellis Brown, Max deGroot
"""
from .config import HOME
import os.path as osp
import sys
import torch
import torch.utils.data as data
if '/opt/ros/kinetic/lib/python2.7/dist-packages' in sys.path:
sys.path.remove('/opt/ros/kinetic/lib/python2.7/dist-packages')
import cv2
import cv2
import numpy as np
if sys.version_info[0] == 2:
import xml.etree.cElementTree as ET
else:
import xml.etree.ElementTree as ET

SUBT_CLASSES = [ # always index 0
'missle','backpack','blueline','drill','can']

#SUBT_CLASSES = ( # always index 0
# 'valve', '')

# note: if you used our download scripts, this should be right
SUBT_ROOT = osp.join(HOME, "data/subt_artifact/")


class SUBTAnnotationTransform(object):
"""Transforms a VOC annotation into a Tensor of bbox coords and label index
Initilized with a dictionary lookup of classnames to indexes

Arguments:
class_to_ind (dict, optional): dictionary lookup of classnames -> indexes
(default: alphabetic indexing of VOC's 20 classes)
keep_difficult (bool, optional): keep difficult instances or not
(default: False)
height (int): height
width (int): width
"""

def __init__(self, class_to_ind=None, keep_difficult=False):
self.class_to_ind = class_to_ind or dict(
zip(SUBT_CLASSES, range(len(SUBT_CLASSES))))
self.keep_difficult = keep_difficult
def __call__(self, target, width, height):
"""
Arguments:
target (annotation) : the target annotation to be made usable
will be an ET.Element
Returns:
a list containing lists of bounding boxes [bbox coords, class name]
"""
res = []
for obj in target.iter('object'):
#difficult = int(obj.find('difficult').text) == 1
#if not self.keep_difficult and difficult:
# continue
name = obj.find('name').text.lower().strip()
if name not in self.class_to_ind:
continue
bbox = obj.find('bndbox')
if bbox is not None:
pts = ['xmin', 'ymin', 'xmax', 'ymax']
bndbox = []
for i, pt in enumerate(pts):
cur_pt = int(bbox.find(pt).text) - 1
# scale height or width
cur_pt = cur_pt / width if i % 2 == 0 else cur_pt / height
bndbox.append(cur_pt)
label_idx = self.class_to_ind[name]
bndbox.append(label_idx)
res += [bndbox] # [xmin, ymin, xmax, ymax, label_ind]
# img_id = target.find('filename').text[:-4]
else: # For LabelMe tool
polygons = obj.find('polygon')
x = []
y = []
bndbox = []
for polygon in polygons.iter('pt'):
# scale height or width
x.append(int(polygon.find('x').text) / width)
y.append(int(polygon.find('y').text) / height)
bndbox.append(min(x))
bndbox.append(min(y))
bndbox.append(max(x))
bndbox.append(max(y))
label_idx = self.class_to_ind[name]
bndbox.append(label_idx)
res += [bndbox] # [xmin, ymin, xmax, ymax, label_ind]

return res # [[xmin, ymin, xmax, ymax, label_ind], ... ]


class SUBTDetection(data.Dataset):
"""VOC Detection Dataset Object

input is image, target is annotation

Arguments:
root (string): filepath to VOCdevkit folder.
image_set (string): imageset to use (eg. 'train', 'val', 'test')
transform (callable, optional): transformation to perform on the
input image
target_transform (callable, optional): transformation to perform on the
target `annotation`
(eg: take in caption string, return tensor of word indices)
dataset_name (string, optional): which dataset to load
(default: 'VOC2007')
"""

def __init__(self, root,
image_sets=['train', 'val'],
transform=None, target_transform=SUBTAnnotationTransform(),
dataset_name='SUBT'):
self.root = root
self.image_set = image_sets
self.transform = transform
self.target_transform = target_transform
self.name = dataset_name
self._annopath = osp.join('%s', 'Annotations', '%s.xml')
self._imgpath = osp.join('%s', 'JPEGImages', '%s.jpg')
self.ids = list()
for name in image_sets:
rootpath = osp.join(self.root)
for line in open(osp.join(rootpath, 'ImageSets', 'Main', name + '.txt')):
self.ids.append((rootpath, line.strip()))

def __getitem__(self, index):
im, gt, h, w = self.pull_item(index)

return im, gt

def __len__(self):
return len(self.ids)

def pull_item(self, index):
img_id = self.ids[index]

target = ET.parse(self._annopath % img_id).getroot()
img = cv2.imread(self._imgpath % img_id)
height, width, channels = img.shape

if self.target_transform is not None:
target = self.target_transform(target, width, height)

if self.transform is not None:
target = np.array(target)
img, boxes, labels = self.transform(img, target[: , :4], target[: , 4])
# to rgb
img = img[:, :, (2, 1, 0)]
# img = img.transpose(2, 0, 1)
target = np.hstack((boxes, np.expand_dims(labels, axis=1)))
return torch.from_numpy(img).permute(2, 0, 1), target, height, width

def pull_image(self, index):
'''Returns the original image object at index in PIL form

Note: not using self.__getitem__(), as any transformations passed in
could mess up this functionality.

Argument:
index (int): index of img to show
Return:
PIL img
'''
img_id = self.ids[index]
return cv2.imread(self._imgpath % img_id, cv2.IMREAD_COLOR)

def pull_anno(self, index):
'''Returns the original annotation of image at index

Note: not using self.__getitem__(), as any transformations passed in
could mess up this functionality.

Argument:
index (int): index of img to get annotation of
Return:
list: [img_id, [(label, bbox coords),...]]
eg: ('001718', [('dog', (96, 13, 438, 332))])
'''
img_id = self.ids[index]
anno = ET.parse(self._annopath % img_id).getroot()
gt = self.target_transform(anno, 1, 1)
return img_id[1], gt

def pull_tensor(self, index):
'''Returns the original image at an index in tensor form

Note: not using self.__getitem__(), as any transformations passed in
could mess up this functionality.

Argument:
index (int): index of img to show
Return:
tensorized version of img, squeezed
'''
return torch.Tensor(self.pull_image(index)).unsqueeze_(0)
18 changes: 18 additions & 0 deletions layers/box_utils.py
Expand Up @@ -212,6 +212,18 @@ def nms(boxes, scores, overlap=0.5, top_k=200):
if idx.size(0) == 1:
break
idx = idx[:-1] # remove kept element from view
########################################################
idx = torch.autograd.Variable(idx, requires_grad=False)
idx = idx.data
x1 = torch.autograd.Variable(x1, requires_grad=False)
x1 = x1.data
y1 = torch.autograd.Variable(y1, requires_grad=False)
y1 = y1.data
x2 = torch.autograd.Variable(x2, requires_grad=False)
x2 = x2.data
y2 = torch.autograd.Variable(y2, requires_grad=False)
y2 = y2.data
########################################################
# load bboxes of next highest vals
torch.index_select(x1, 0, idx, out=xx1)
torch.index_select(y1, 0, idx, out=yy1)
Expand All @@ -231,6 +243,12 @@ def nms(boxes, scores, overlap=0.5, top_k=200):
h = torch.clamp(h, min=0.0)
inter = w*h
# IoU = i / (area(a) + area(b) - i)
################################################
area = torch.autograd.Variable(area, requires_grad=False)
area = area.data
idx= torch.autograd.Variable(idx, requires_grad=False)
idx = idx.data
################################################
rem_areas = torch.index_select(area, 0, idx) # load remaining areas)
union = (rem_areas - inter) + area[i]
IoU = inter/union # store result in iou
Expand Down
27 changes: 26 additions & 1 deletion layers/modules/multibox_loss.py
Expand Up @@ -30,10 +30,25 @@ class MultiBoxLoss(nn.Module):
See: https://arxiv.org/pdf/1512.02325.pdf for more details.
"""

def __init__(self, num_classes, overlap_thresh, prior_for_matching,
# def __init__(self, num_classes, overlap_thresh, prior_for_matching,
# bkg_label, neg_mining, neg_pos, neg_overlap, encode_target,
# use_gpu=True):
# super(MultiBoxLoss, self).__init__()
# self.use_gpu = use_gpu
# self.num_classes = num_classes
# self.threshold = overlap_thresh
# self.background_label = bkg_label
# self.encode_target = encode_target
# self.use_prior_for_matching = prior_for_matching
# self.do_neg_mining = neg_mining
# self.negpos_ratio = neg_pos
# self.neg_overlap = neg_overlap
# self.variance = cfg['variance']
def __init__(self, batch_size, num_classes, overlap_thresh, prior_for_matching,
bkg_label, neg_mining, neg_pos, neg_overlap, encode_target,
use_gpu=True):
super(MultiBoxLoss, self).__init__()
self.batch_size = batch_size
self.use_gpu = use_gpu
self.num_classes = num_classes
self.threshold = overlap_thresh
Expand Down Expand Up @@ -94,6 +109,16 @@ def forward(self, predictions, targets):
loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1))

# Hard Negative Mining
# loss_c[pos] = 0 # filter out pos boxes for now
# loss_c = loss_c.view(num, -1)
# _, loss_idx = loss_c.sort(1, descending=True)
# _, idx_rank = loss_idx.sort(1)
# num_pos = pos.long().sum(1, keepdim=True)
# num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1)
# neg = idx_rank < num_neg.expand_as(idx_rank)
a = int(len(loss_c)/8732)
loss_c = loss_c.reshape((a, 8732))
#print(loss_c.shape,pos.shape)
loss_c[pos] = 0 # filter out pos boxes for now
loss_c = loss_c.view(num, -1)
_, loss_idx = loss_c.sort(1, descending=True)
Expand Down
13 changes: 7 additions & 6 deletions ssd.py
Expand Up @@ -96,12 +96,13 @@ def forward(self, x):
loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
if self.phase == "test":
output = self.detect(
loc.view(loc.size(0), -1, 4), # loc preds
self.softmax(conf.view(conf.size(0), -1,
self.num_classes)), # conf preds
self.priors.type(type(x.data)) # default boxes
)
# output = self.detect(
# loc.view(loc.size(0), -1, 4), # loc preds
# self.softmax(conf.view(conf.size(0), -1,
# self.num_classes)), # conf preds
# self.priors.type(type(x.data)) # default boxes
# )
output=self.detect.forward(loc.view(loc.size(0), -1, 4),self.softmax(conf.view(conf.size(0), -1,self.num_classes)),self.priors.type(type(x.data)))
else:
output = (
loc.view(loc.size(0), -1, 4),
Expand Down