amdegroot · austin2408 · Nov 16, 2020 · Nov 16, 2020 · Nov 18, 2020 · Nov 19, 2020
diff --git a/data/__init__.py b/data/__init__.py
@@ -1,5 +1,5 @@
 from .voc0712 import VOCDetection, VOCAnnotationTransform, VOC_CLASSES, VOC_ROOT
-
+from .subt_artifact import SUBTDetection, SUBTAnnotationTransform, SUBT_CLASSES, SUBT_ROOT
 from .coco import COCODetection, COCOAnnotationTransform, COCO_CLASSES, COCO_ROOT, get_label_map
 from .config import *
 import torch

diff --git a/data/coco.py b/data/coco.py
@@ -8,7 +8,9 @@
 import cv2
 import numpy as np
 
-COCO_ROOT = osp.join(HOME, 'data/coco/')
+# COCO_ROOT = osp.join(HOME, 'data/coco/')
+# COCO_ROOT = osp.join(HOME, '/content/ssd.pytorch/data/')
+COCO_ROOT = osp.join(HOME, 'data/')
 IMAGES = 'images'
 ANNOTATIONS = 'annotations'
 COCO_API = 'PythonAPI'

diff --git a/data/config.py b/data/config.py
@@ -2,7 +2,8 @@
 import os.path
 
 # gets home dir cross platform
-HOME = os.path.expanduser("~")
+# HOME = os.path.expanduser("~")
+HOME = '/content/ssd.pytorch/'
 
 # for making bounding boxes pretty
 COLORS = ((255, 0, 0, 128), (0, 255, 0, 128), (0, 0, 255, 128),
@@ -40,3 +41,18 @@
     'clip': True,
     'name': 'COCO',
 }
+
+SubT = {
+    'num_classes': 6,
+    'lr_steps': (4000, 8000, 12000),
+    'max_iter': 7500,
+    'feature_maps': [38, 19, 10, 5, 3, 1],
+    'min_dim': 300,
+    'steps': [8, 16, 32, 64, 100, 300],
+    'min_sizes': [21, 45, 99, 153, 207, 261],
+    'max_sizes': [45, 99, 153, 207, 261, 315],
+    'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
+    'variance': [0.1, 0.2],
+    'clip': True,
+    'name': 'SubT',
+}
diff --git a/data/scripts/VOC2007.sh b/data/scripts/VOC2007.sh
@@ -7,9 +7,9 @@ start=`date +%s`
 if [ -z "$1" ]
   then
     # navigate to ~/data
-    echo "navigating to ~/data/ ..." 
-    mkdir -p ~/data
-    cd ~/data/
+    echo "navigating to /content/ssd.pytorch/data ..." 
+    # mkdir -p /content/data
+    cd /content/ssd.pytorch/data
   else
     # check if is valid directory
     if [ ! -d $1 ]; then

diff --git a/data/subt_artifact.py b/data/subt_artifact.py
@@ -0,0 +1,202 @@
+"""VOC Dataset Classes
+
+Original author: Francisco Massa
+https://github.com/fmassa/vision/blob/voc_dataset/torchvision/datasets/voc.py
+
+Updated by: Ellis Brown, Max deGroot
+"""
+from .config import HOME
+import os.path as osp
+import sys
+import torch
+import torch.utils.data as data
+if '/opt/ros/kinetic/lib/python2.7/dist-packages' in sys.path:
+    sys.path.remove('/opt/ros/kinetic/lib/python2.7/dist-packages')
+import cv2
+import cv2
+import numpy as np
+if sys.version_info[0] == 2:
+    import xml.etree.cElementTree as ET
+else:
+    import xml.etree.ElementTree as ET
+
+SUBT_CLASSES = [  # always index 0
+    'missle','backpack','blueline','drill','can']
+
+#SUBT_CLASSES = (  # always index 0
+#    'valve', '')
+
+# note: if you used our download scripts, this should be right
+SUBT_ROOT = osp.join(HOME, "data/subt_artifact/")
+
+
+class SUBTAnnotationTransform(object):
+    """Transforms a VOC annotation into a Tensor of bbox coords and label index
+    Initilized with a dictionary lookup of classnames to indexes
+
+    Arguments:
+        class_to_ind (dict, optional): dictionary lookup of classnames -> indexes
+            (default: alphabetic indexing of VOC's 20 classes)
+        keep_difficult (bool, optional): keep difficult instances or not
+            (default: False)
+        height (int): height
+        width (int): width
+    """
+
+    def __init__(self, class_to_ind=None, keep_difficult=False):
+        self.class_to_ind = class_to_ind or dict(
+            zip(SUBT_CLASSES, range(len(SUBT_CLASSES))))
+        self.keep_difficult = keep_difficult
+    def __call__(self, target, width, height):
+        """
+        Arguments:
+            target (annotation) : the target annotation to be made usable
+                will be an ET.Element
+        Returns:
+            a list containing lists of bounding boxes  [bbox coords, class name]
+        """
+        res = []
+        for obj in target.iter('object'):
+            #difficult = int(obj.find('difficult').text) == 1
+            #if not self.keep_difficult and difficult:
+            #    continue
+            name = obj.find('name').text.lower().strip()
+            if name not in self.class_to_ind:
+                continue
+            bbox = obj.find('bndbox')
+            if bbox is not None:
+                pts = ['xmin', 'ymin', 'xmax', 'ymax']
+                bndbox = []
+                for i, pt in enumerate(pts):
+                    cur_pt = int(bbox.find(pt).text) - 1
+                    # scale height or width
+                    cur_pt = cur_pt / width if i % 2 == 0 else cur_pt / height
+                    bndbox.append(cur_pt)
+                label_idx = self.class_to_ind[name]
+                bndbox.append(label_idx)
+                res += [bndbox]  # [xmin, ymin, xmax, ymax, label_ind]
+                # img_id = target.find('filename').text[:-4]
+            else: # For LabelMe tool
+                polygons = obj.find('polygon')
+                x = []
+                y = []
+                bndbox = []
+                for polygon in polygons.iter('pt'):
+                    # scale height or width
+                    x.append(int(polygon.find('x').text) / width)
+                    y.append(int(polygon.find('y').text) / height)
+                bndbox.append(min(x))
+                bndbox.append(min(y))
+                bndbox.append(max(x))
+                bndbox.append(max(y))
+                label_idx = self.class_to_ind[name]
+                bndbox.append(label_idx)
+                res += [bndbox]  # [xmin, ymin, xmax, ymax, label_ind]
+
+        return res  # [[xmin, ymin, xmax, ymax, label_ind], ... ]
+
+
+class SUBTDetection(data.Dataset):
+    """VOC Detection Dataset Object
+
+    input is image, target is annotation
+
+    Arguments:
+        root (string): filepath to VOCdevkit folder.
+        image_set (string): imageset to use (eg. 'train', 'val', 'test')
+        transform (callable, optional): transformation to perform on the
+            input image
+        target_transform (callable, optional): transformation to perform on the
+            target `annotation`
+            (eg: take in caption string, return tensor of word indices)
+        dataset_name (string, optional): which dataset to load
+            (default: 'VOC2007')
+    """
+
+    def __init__(self, root,
+                 image_sets=['train', 'val'],
+                 transform=None, target_transform=SUBTAnnotationTransform(),
+                 dataset_name='SUBT'):
+        self.root = root
+        self.image_set = image_sets
+        self.transform = transform
+        self.target_transform = target_transform
+        self.name = dataset_name
+        self._annopath = osp.join('%s', 'Annotations', '%s.xml')
+        self._imgpath = osp.join('%s', 'JPEGImages', '%s.jpg')
+        self.ids = list()
+        for name in image_sets:
+            rootpath = osp.join(self.root)
+            for line in open(osp.join(rootpath, 'ImageSets', 'Main', name + '.txt')):
+                self.ids.append((rootpath, line.strip()))
+
+    def __getitem__(self, index):
+        im, gt, h, w = self.pull_item(index)
+
+        return im, gt
+
+    def __len__(self):
+        return len(self.ids)
+
+    def pull_item(self, index):
+        img_id = self.ids[index]
+
+        target = ET.parse(self._annopath % img_id).getroot()
+        img = cv2.imread(self._imgpath % img_id)
+        height, width, channels = img.shape
+
+        if self.target_transform is not None:
+            target = self.target_transform(target, width, height)
+
+        if self.transform is not None:
+            target = np.array(target)
+            img, boxes, labels = self.transform(img, target[: , :4], target[: , 4])
+            # to rgb
+            img = img[:, :, (2, 1, 0)]
+            # img = img.transpose(2, 0, 1)
+            target = np.hstack((boxes, np.expand_dims(labels, axis=1)))
+        return torch.from_numpy(img).permute(2, 0, 1), target, height, width
+
+    def pull_image(self, index):
+        '''Returns the original image object at index in PIL form
+
+        Note: not using self.__getitem__(), as any transformations passed in
+        could mess up this functionality.
+
+        Argument:
+            index (int): index of img to show
+        Return:
+            PIL img
+        '''
+        img_id = self.ids[index]
+        return cv2.imread(self._imgpath % img_id, cv2.IMREAD_COLOR)
+
+    def pull_anno(self, index):
+        '''Returns the original annotation of image at index
+
+        Note: not using self.__getitem__(), as any transformations passed in
+        could mess up this functionality.
+
+        Argument:
+            index (int): index of img to get annotation of
+        Return:
+            list:  [img_id, [(label, bbox coords),...]]
+                eg: ('001718', [('dog', (96, 13, 438, 332))])
+        '''
+        img_id = self.ids[index]
+        anno = ET.parse(self._annopath % img_id).getroot()
+        gt = self.target_transform(anno, 1, 1)
+        return img_id[1], gt
+
+    def pull_tensor(self, index):
+        '''Returns the original image at an index in tensor form
+
+        Note: not using self.__getitem__(), as any transformations passed in
+        could mess up this functionality.
+
+        Argument:
+            index (int): index of img to show
+        Return:
+            tensorized version of img, squeezed
+        '''
+        return torch.Tensor(self.pull_image(index)).unsqueeze_(0)
diff --git a/layers/box_utils.py b/layers/box_utils.py
@@ -212,6 +212,18 @@ def nms(boxes, scores, overlap=0.5, top_k=200):
         if idx.size(0) == 1:
             break
         idx = idx[:-1]  # remove kept element from view
+        ########################################################
+        idx = torch.autograd.Variable(idx, requires_grad=False)
+        idx = idx.data
+        x1 = torch.autograd.Variable(x1, requires_grad=False)
+        x1 = x1.data
+        y1 = torch.autograd.Variable(y1, requires_grad=False)
+        y1 = y1.data
+        x2 = torch.autograd.Variable(x2, requires_grad=False)
+        x2 = x2.data
+        y2 = torch.autograd.Variable(y2, requires_grad=False)
+        y2 = y2.data
+        ########################################################
         # load bboxes of next highest vals
         torch.index_select(x1, 0, idx, out=xx1)
         torch.index_select(y1, 0, idx, out=yy1)
@@ -231,6 +243,12 @@ def nms(boxes, scores, overlap=0.5, top_k=200):
         h = torch.clamp(h, min=0.0)
         inter = w*h
         # IoU = i / (area(a) + area(b) - i)
+        ################################################
+        area = torch.autograd.Variable(area, requires_grad=False)
+        area = area.data
+        idx= torch.autograd.Variable(idx, requires_grad=False)
+        idx = idx.data
+        ################################################
         rem_areas = torch.index_select(area, 0, idx)  # load remaining areas)
         union = (rem_areas - inter) + area[i]
         IoU = inter/union  # store result in iou

diff --git a/layers/modules/multibox_loss.py b/layers/modules/multibox_loss.py
@@ -30,10 +30,25 @@ class MultiBoxLoss(nn.Module):
         See: https://arxiv.org/pdf/1512.02325.pdf for more details.
     """
 
-    def __init__(self, num_classes, overlap_thresh, prior_for_matching,
+    # def __init__(self, num_classes, overlap_thresh, prior_for_matching,
+    #              bkg_label, neg_mining, neg_pos, neg_overlap, encode_target,
+    #              use_gpu=True):
+    #     super(MultiBoxLoss, self).__init__()
+    #     self.use_gpu = use_gpu
+    #     self.num_classes = num_classes
+    #     self.threshold = overlap_thresh
+    #     self.background_label = bkg_label
+    #     self.encode_target = encode_target
+    #     self.use_prior_for_matching = prior_for_matching
+    #     self.do_neg_mining = neg_mining
+    #     self.negpos_ratio = neg_pos
+    #     self.neg_overlap = neg_overlap
+    #     self.variance = cfg['variance']
+    def __init__(self, batch_size, num_classes, overlap_thresh, prior_for_matching,
                  bkg_label, neg_mining, neg_pos, neg_overlap, encode_target,
                  use_gpu=True):
         super(MultiBoxLoss, self).__init__()
+        self.batch_size = batch_size
         self.use_gpu = use_gpu
         self.num_classes = num_classes
         self.threshold = overlap_thresh
@@ -94,6 +109,16 @@ def forward(self, predictions, targets):
         loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1))
 
         # Hard Negative Mining
+        # loss_c[pos] = 0  # filter out pos boxes for now
+        # loss_c = loss_c.view(num, -1)
+        # _, loss_idx = loss_c.sort(1, descending=True)
+        # _, idx_rank = loss_idx.sort(1)
+        # num_pos = pos.long().sum(1, keepdim=True)
+        # num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1)
+        # neg = idx_rank < num_neg.expand_as(idx_rank)
+        a = int(len(loss_c)/8732)
+        loss_c = loss_c.reshape((a, 8732))
+        #print(loss_c.shape,pos.shape)
         loss_c[pos] = 0  # filter out pos boxes for now
         loss_c = loss_c.view(num, -1)
         _, loss_idx = loss_c.sort(1, descending=True)

diff --git a/ssd.py b/ssd.py
@@ -96,12 +96,13 @@ def forward(self, x):
         loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
         conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
         if self.phase == "test":
-            output = self.detect(
-                loc.view(loc.size(0), -1, 4),                   # loc preds
-                self.softmax(conf.view(conf.size(0), -1,
-                             self.num_classes)),                # conf preds
-                self.priors.type(type(x.data))                  # default boxes
-            )
+            # output = self.detect(
+            #     loc.view(loc.size(0), -1, 4),                   # loc preds
+            #     self.softmax(conf.view(conf.size(0), -1,
+            #                  self.num_classes)),                # conf preds
+            #     self.priors.type(type(x.data))                  # default boxes
+            # )
+            output=self.detect.forward(loc.view(loc.size(0), -1, 4),self.softmax(conf.view(conf.size(0), -1,self.num_classes)),self.priors.type(type(x.data)))
         else:
             output = (
                 loc.view(loc.size(0), -1, 4),