kmaninis
diff --git a/‎.gitignore
Lines changed: 0 additions & 2 deletions b/‎.gitignore
Lines changed: 0 additions & 2 deletions
diff --git a/‎README.md
Lines changed: 45 additions & 1 deletion b/‎README.md
Lines changed: 45 additions & 1 deletion
diff --git a/‎cluster/script.sh
Lines changed: 0 additions & 19 deletions b/‎cluster/script.sh
Lines changed: 0 additions & 19 deletions
diff --git a/‎cluster/wrapper.sh
Lines changed: 0 additions & 7 deletions b/‎cluster/wrapper.sh
Lines changed: 0 additions & 7 deletions
diff --git a/‎dataloaders/coco.py
Lines changed: 0 additions & 183 deletions b/‎dataloaders/coco.py
Lines changed: 0 additions & 183 deletions
diff --git a/‎dataloaders/custom_transforms.py
Lines changed: 15 additions & 12 deletions b/‎dataloaders/custom_transforms.py
Lines changed: 15 additions & 12 deletions
@@ -1,12 +1,10 @@
-mypath.py
 .idea/
 *.gv
 *.pdf
 models/*
 runs/*
 *.pth
 *.pyc
-path.py
 params.py
 *.mat
 *.txt
 
@@ -1 +1,45 @@
-# OSVOS-PyTorch
+# OSVOS: One-Shot Video Object Segmentation
+Check our [project page](http://www.vision.ee.ethz.ch/~cvlsegmentation/osvos) for additional information.
+![OSVOS](doc/ims/osvos.png)
+
+OSVOS is a method that tackles the task of semi-supervised video object segmentation. It is based on a fully-convolutional neural network architecture that is able to successively transfer generic semantic information, learned on ImageNet, to the task of foreground segmentation, and finally to learning the appearance of a single annotated object of the test sequence (hence one-shot). Experiments on DAVIS 2016 show that OSVOS is faster than currently available techniques and improves the state of the art by a significant margin (79.8% vs 68.0%).
+
+
+This PyTorch code is a posteriori implementation of OSVOS and it does not contain the boundary snapping branch. The results published in the paper were obtained using the Caffe version that can be found at [OSVOS-caffe](https://github.com/kmaninis/OSVOS-caffe). TensorFlow implementation is also available at [OSVOS-TensorFlow](https://github.com/scaelles/OSVOS-TensorFlow).
+
+
+### Installation:
+1. Clone the OSVOS-PyTorch repository
+   ```Shell
+   git clone https://github.com/kmaninis/OSVOS-PyTorch.git
+   ```
+2. Install - if necessary - the required dependencies:
+   
+   - Python (tested with Anaconda 2.7 and 3.6) 
+   - PyTorch (`conda install pytorch torchvision -c pytorch` - tested with PyTorch 0.3, CUDA 8.0)
+   - Other python dependencies: numpy, scipy, matplotlib, opencv-python
+   - Optionally, install tensorboard (`pip install tensorboard tensorboardx`)
+3. Edit the paths in mypath.py
+
+### Online training and testing
+1. Download the [parent model](https://data.vision.ee.ethz.ch/kmaninis/share/OSVOS/Downloads/models/pth_parent_model.zip) (55 MB), and unzip it under `models/`.
+2. Edit in file `osvos_demo.py` the 'User defined parameters' (eg. gpu_id, etc).
+3. Run `python train_online.py`.
+
+### Training the parent network (optional)
+1. All the training sequences of DAVIS 2016 are required to train the parent model, thus download them from [here](https://graphics.ethz.ch/Downloads/Data/Davis/DAVIS-data.zip).
+2. Download the [VGG model](https://data.vision.ee.ethz.ch/kmaninis/share/OSVOS/Downloads/models/vgg_mat.zip) (55 MB) pretrained on ImageNet, and unzip it under `models/`.
+3. Edit the 'User defined parameters' (eg. gpu_id) in file `train_parent.py`.
+4. Run `train_parent.py`. This step takes 20 hours to train (Titan-X Pascal).
+
+Enjoy!
+
+### Citation:
+	@Inproceedings{Cae+17,
+	  Title          = {One-Shot Video Object Segmentation},
+	  Author         = {S. Caelles and K.K. Maninis and J. Pont-Tuset and L. Leal-Taix\'e and D. Cremers and L. {Van Gool}},
+	  Booktitle      = {Computer Vision and Pattern Recognition (CVPR)},
+	  Year           = {2017}
+	}
+If you encounter any problems with the code, want to report bugs, etc. please contact us at {kmaninis, scaelles}[at]vision[dot]ee[dot]ethz[dot]ch.
+
@@ -29,29 +29,26 @@ def __call__(self, sample):
                  (self.scales[1] - self.scales[0]) / 2 + 1
         elif type(self.rots) == list:
             # Fixed range of scales and rotations
-            rot = self.rots[random.randint(0, len(self.rots)-1)]
-            sc = self.scales[random.randint(0, len(self.scales) - 1)]
+            rot = self.rots[random.randint(0, len(self.rots))]
+            sc = self.scales[random.randint(0, len(self.scales))]
 
         for elem in sample.keys():
+            if 'fname' in elem:
+                continue
+
             tmp = sample[elem]
 
             h, w = tmp.shape[:2]
             center = (w / 2, h / 2)
+            assert(center != 0)  # Strange behaviour warpAffine
             M = cv2.getRotationMatrix2D(center, rot, sc)
 
-            if tmp.ndim==2:
+            if ((tmp == 0) | (tmp == 1)).all():
                 flagval = cv2.INTER_NEAREST
             else:
                 flagval = cv2.INTER_CUBIC
-
             tmp = cv2.warpAffine(tmp, M, (w, h), flags=flagval)
 
-            if tmp.min() < 0.0:
-                tmp = tmp - tmp.min()
-
-            if tmp.max() > 1.0:
-                tmp = tmp / tmp.max()
-
             sample[elem] = tmp
 
         return sample
@@ -71,9 +68,11 @@ def __call__(self, sample):
         sc = self.scales[random.randint(0, len(self.scales) - 1)]
 
         for elem in sample.keys():
+            if 'fname' in elem:
+                continue
             tmp = sample[elem]
 
-            if tmp.ndim==2:
+            if tmp.ndim == 2:
                 flagval = cv2.INTER_NEAREST
             else:
                 flagval = cv2.INTER_CUBIC
@@ -92,6 +91,8 @@ def __call__(self, sample):
 
         if random.random() < 0.5:
             for elem in sample.keys():
+                if 'fname' in elem:
+                    continue
                 tmp = sample[elem]
                 tmp = cv2.flip(tmp, flipCode=1)
                 sample[elem] = tmp
@@ -105,6 +106,8 @@ class ToTensor(object):
     def __call__(self, sample):
 
         for elem in sample.keys():
+            if 'fname' in elem:
+                continue
             tmp = sample[elem]
 
             if tmp.ndim == 2:
@@ -117,4 +120,4 @@ def __call__(self, sample):
             tmp = tmp.transpose((2, 0, 1))
             sample[elem] = torch.from_numpy(tmp)
 
-        return sample
+        return sample