Add MIT LICENSE and a pretrained model zoo.

kdexd · kdexd · commit 57889ca9829f · 2020-06-11T06:37:36.000-04:00
diff --git a/.gitignore b/.gitignore
@@ -47,8 +47,8 @@ ENV/
 scripts/test_*
 
 # Data (symlinks) directory, model checkpoints, tensorboard logs etc.
-data/
 datasets/
 checkpoints/
 virtex/utils/assets/
 !virtex/data/
+virtex/model_zoo/configs
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,16 @@
+Copyright (c) 2020, Karan Desai.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
+associated documentation files (the "Software"), to deal in the Software without restriction,
+including without limitation the rights to use, copy, modify, merge, publish, distribute,
+sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
+NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
+OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/configs/_base_bicaptioning_R_50_L1_H1024.yaml b/configs/_base_bicaptioning_R_50_L1_H1024.yaml
@@ -9,6 +9,11 @@ DATA:
   ROOT: "datasets/coco"
   TOKENIZER_VOCAB: "datasets/vocab/coco_10k.vocab"
   TOKENIZER_MODEL: "datasets/vocab/coco_10k.model"
+  VOCAB_SIZE: 10000
+  UNK_INDEX: 0
+  SOS_INDEX: 1
+  EOS_INDEX: 2
+  MASK_INDEX: 3
 
   IMAGE_CROP_SIZE: 224
   MAX_CAPTION_LENGTH: 30
diff --git a/configs/task_ablations/multilabel_classification_R_50.yaml b/configs/task_ablations/multilabel_classification_R_50.yaml
@@ -1,5 +1,8 @@
 _BASE_: "../_base_bicaptioning_R_50_L1_H1024.yaml"
 
+DATA:
+  VOCAB_SIZE: 81
+
 MODEL:
   NAME: "multilabel_classification"
   TEXTUAL:
diff --git a/docs/virtex/config.rst b/docs/virtex/config.rst
@@ -14,5 +14,5 @@ Config References
 .. literalinclude:: ../../virtex/config.py
   :language: python
   :linenos:
-  :lines: 53-171
+  :lines: 53-189
   :dedent: 8
diff --git a/setup.py b/setup.py
@@ -1,12 +1,51 @@
 #!/usr/bin/env python
+import glob
+import os
 from setuptools import setup
+import shutil
+from typing import List
+
+
+def get_model_zoo_configs() -> List[str]:
+    """
+    Return a list of configs to include in package for model zoo. Copy over
+    these configs inside virtex/model_zoo.
+    """
+
+    # Use absolute paths while symlinking.
+    source_configs_dir = os.path.join(
+        os.path.dirname(os.path.realpath(__file__)), "configs"
+    )
+    destination = os.path.join(
+        os.path.dirname(os.path.realpath(__file__)), "virtex", "model_zoo", "configs"
+    )
+    # Symlink the config directory inside package to have a cleaner pip install.
+
+    # Remove stale symlink/directory from a previous build.
+    if os.path.exists(source_configs_dir):
+        if os.path.islink(destination):
+            os.unlink(destination)
+        elif os.path.isdir(destination):
+            shutil.rmtree(destination)
+
+    if not os.path.exists(destination):
+        try:
+            os.symlink(source_configs_dir, destination)
+        except OSError:
+            # Fall back to copying if symlink fails: ex. on Windows.
+            shutil.copytree(source_configs_dir, destination)
+
+    config_paths = glob.glob("configs/**/*.yaml", recursive=True)
+    return config_paths
 
 
 setup(
     name="virtex",
     version="0.9",
-    author="Karan Desai, Justin Johnson",
+    author="Karan Desai and Justin Johnson",
     description="VirTex: Learning Visual Representations with Textual Annotations",
+    package_data={"virtex.model_zoo": get_model_zoo_configs()},
+    python_requires=">=3.6",
     license="Apache 2.0",
     zip_safe=True,
 )
diff --git a/virtex/config.py b/virtex/config.py
@@ -62,6 +62,24 @@ def __init__(
         # Path to .model file generated by ``sentencepiece``.
         _C.DATA.TOKENIZER_MODEL = "datasets/vocab/coco_10k.model"
 
+        # Handy config params for vocab size and indices of special tokens.
+        # While these can be picked up from the tokenizer, having these in
+        # the config makes it easy to create a model without instantiating too
+        # many tokenizer instances (especially when not needed, e.g. model zoo).
+        # These must match according to what's present in ``TOKENIZER_VOCAB``
+        # and ``TOKENIZER_MODEL`` above.
+        _C.DATA.VOCAB_SIZE = 10000
+        # Index of out-of-vocabulary (and padding) token.
+        _C.DATA.UNK_INDEX = 0
+        # Index of the start-of-sentence [SOS] token.
+        _C.DATA.SOS_INDEX = 1
+        # Index of the end-of-sentence [EOS] token.
+        _C.DATA.EOS_INDEX = 2
+        # Index of the word masking token. While not used for captioning, having
+        # this extra token makes it possible to train an MLM model without
+        # re-creating a new vocab mapping.
+        _C.DATA.MASK_INDEX = 3
+
         # Size of the image (square) to crop from original input image.
         _C.DATA.IMAGE_CROP_SIZE = 224
         # Maximum length of input caption (number of tokens).
diff --git a/virtex/factories.py b/virtex/factories.py
@@ -346,23 +346,17 @@ class TextualHeadFactory(Factory):
     # fmt: on
 
     @classmethod
-    def from_config(
-        cls, config: Config, tokenizer: Optional[SentencePieceBPETokenizer] = None
-    ) -> nn.Module:
+    def from_config(cls, config: Config) -> nn.Module:
         r"""
         Create a textual head directly from config.
 
         Parameters
         ----------
         config: virtex.config.Config
             Config object with all the parameters.
-        tokenizer: virtex.data.tokenizers.SentencePieceBPETokenizer, optional (default = None)
-            A tokenizer which has the mapping between word tokens and their
-            integer IDs.
         """
 
         _C = config
-        tokenizer = tokenizer or TokenizerFactory.from_config(_C)
 
         # Get architectural hyper-params as per name by matching regex.
         name, architecture = _C.MODEL.TEXTUAL.NAME.split("::")
@@ -374,7 +368,7 @@ def from_config(
         feedforward_size = int(architecture.group(4))
 
         kwargs = {
-            "vocab_size": tokenizer.get_vocab_size(),
+            "vocab_size": _C.DATA.VOCAB_SIZE,
             "hidden_size": hidden_size,
         }
 
@@ -384,7 +378,7 @@ def from_config(
                 attention_heads=attention_heads,
                 feedforward_size=feedforward_size,
                 dropout=_C.MODEL.TEXTUAL.DROPOUT,
-                padding_idx=tokenizer.token_to_id("[UNK]"),
+                padding_idx=_C.DATA.UNK_INDEX,
                 max_caption_length=_C.DATA.MAX_CAPTION_LENGTH,
             )
         return cls.create(name, **kwargs)
@@ -406,55 +400,39 @@ class PretrainingModelFactory(Factory):
     }
 
     @classmethod
-    def from_config(
-        cls, config: Config, tokenizer: Optional[SentencePieceBPETokenizer] = None
-    ) -> nn.Module:
+    def from_config(cls, config: Config) -> nn.Module:
         r"""
         Create a model directly from config.
 
         Parameters
         ----------
         config: virtex.config.Config
             Config object with all the parameters.
-        tokenizer: virtex.data.tokenizers.SentencePieceBPETokenizer, optional (default = None)
-            A tokenizer which has the mapping between word tokens and their
-            integer IDs.
         """
 
         _C = config
-        tokenizer = tokenizer or TokenizerFactory.from_config(_C)
-
-        if _C.MODEL.NAME == "multilabel_classification":
-            # Pass a dummy tokenizer object to TextualHeadFactory for
-            # `multilabel_classification`, which can return vocab size as `81`
-            # (80 COCO categories + background).
-            class DummyTokenizer(object):
-                def get_vocab_size(self) -> int:
-                    return 81
-
-            tokenizer = DummyTokenizer()  # type: ignore
 
         # Build visual and textual streams based on config.
         visual = VisualBackboneFactory.from_config(_C)
-        textual = TextualHeadFactory.from_config(_C, tokenizer)
+        textual = TextualHeadFactory.from_config(_C)
 
         # Add model specific kwargs. Refer call signatures of specific models
         # for matching kwargs here.
         kwargs = {}
         if "captioning" in _C.MODEL.NAME:
             kwargs.update(
                 max_decoding_steps=_C.DATA.MAX_CAPTION_LENGTH,
-                sos_index=tokenizer.token_to_id("[SOS]"),
-                eos_index=tokenizer.token_to_id("[EOS]"),
+                sos_index=_C.DATA.SOS_INDEX,
+                eos_index=_C.DATA.EOS_INDEX,
             )
 
         elif _C.MODEL.NAME == "token_classification":
             kwargs.update(
                 ignore_indices=[
-                    tokenizer.token_to_id("[UNK]"),
-                    tokenizer.token_to_id("[SOS]"),
-                    tokenizer.token_to_id("[EOS]"),
-                    tokenizer.token_to_id("[MASK]"),
+                    _C.DATA.UNK_INDEX,
+                    _C.DATA.SOS_INDEX,
+                    _C.DATA.EOS_INDEX,
+                    _C.DATA.MASK_INDEX
                 ],
             )
         elif _C.MODEL.NAME == "multilabel_classification":
diff --git a/virtex/model_zoo/__init__.py b/virtex/model_zoo/__init__.py
@@ -0,0 +1,3 @@
+from .model_zoo import get
+
+__all__ = ["get"]
diff --git a/virtex/model_zoo/model_zoo.py b/virtex/model_zoo/model_zoo.py
@@ -0,0 +1,103 @@
+r"""
+A utility module which provides functionality to easily load common VirTex
+models (optionally with pretrained weights) using a single line of code.
+
+Get our full best performing VirTex model (with pretrained weights as):
+
+>>> import virtex.model_zoo as mz
+>>> model = mz.get("width_ablations/bicaptioning_R_50_L1_H2048.yaml", pretrained=True)
+
+Any config available in ``configs/`` directory under project root can be
+specified here, although this command need not be executed from project root.
+
+Part of this code is adapted from Detectron2's model zoo; which was originally
+implemented by the developers of this codebase, with reviews and further
+changes by Detectron2 developers.
+"""
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import os
+import pkg_resources
+
+from fvcore.common.download import download
+import torch
+
+from virtex.config import Config
+from virtex.factories import PretrainingModelFactory
+from virtex.utils.checkpointing import CheckpointManager
+
+
+class _ModelZooUrls(object):
+    r"""Mapping from config names to URL suffixes of pretrained weights."""
+
+    URL_PREFIX = "https://umich.box.com/shared/static"
+
+    CONFIG_PATH_TO_URL_SUFFIX = {
+
+        # Pretraining Task Ablations
+        "task_ablations/bicaptioning_R_50_L1_H2048.yaml": "fm1nq819q74vr0kqcd3gkivlzf06xvko.pth",
+        "task_ablations/captioning_R_50_L1_H2048.yaml": "7fopt8k2eutz9qvth2hh6j00o7z4o7ps.pth",
+        "task_ablations/token_classification_R_50.yaml": "qwvfnji51g4gvba7i5mrw2ph5z8yfty9.pth",
+        "task_ablations/multilabel_classification_R_50.yaml": "tk1hlcue9c3268bds3h036ckk7a9btlr.pth",
+
+        # Width Ablations
+        "width_ablations/bicaptioning_R_50_L1_H512.yaml": "qostt3be0pgnd0xf55vdte3wa49x6k99.pth",
+        "width_ablations/bicaptioning_R_50_L1_H768.yaml": "v0p80tya0wjgsj0liqyvt386903xbwxc.pth",
+        "width_ablations/bicaptioning_R_50_L1_H1024.yaml": "s2o3tvujcx2djoz1ouvuea27hrys1fbm.pth",
+        "width_ablations/bicaptioning_R_50_L1_H2048.yaml": "fm1nq819q74vr0kqcd3gkivlzf06xvko.pth",
+
+        # Depth Ablations
+        "depth_ablations/bicaptioning_R_50_L1_H1024.yaml": "s2o3tvujcx2djoz1ouvuea27hrys1fbm.pth",
+        "depth_ablations/bicaptioning_R_50_L2_H1024.yaml": "5enura2ao2b0iyigcuikfsdd0osun0it.pth",
+        "depth_ablations/bicaptioning_R_50_L3_H1024.yaml": "xit11ev6h3q7h8wth5qokewxcn6yot2n.pth",
+        "depth_ablations/bicaptioning_R_50_L4_H1024.yaml": "secpwhjx9oq59mkzsztjaews6n3680bj.pth",
+
+        # Backbone Ablations
+        "backbone_ablations/bicaptioning_R_50_L1_H1024.yaml": "s2o3tvujcx2djoz1ouvuea27hrys1fbm.pth",
+        "backbone_ablations/bicaptioning_R_50W2X_L1_H1024.yaml": "0rlu15xq796tz3ebvz7lf5dbpti421le.pth",
+        "backbone_ablations/bicaptioning_R_101_L1_H1024.yaml": "i3p45pr78jdz74r29qkj23v8kzb6gcsq.pth",
+    }
+    # Backbone from best model: fotpti1uk6bpoobeazysfc6fdbndvy90.pth
+
+
+def get(config_path, pretrained: bool = False):
+    r"""
+    Get a model specified by relative path under Detectron2's official ``configs/`` directory.
+
+    Parameters
+    ----------
+    config_path: str
+        Name of config file relative to ``configs/`` directory under project
+        root. (For example, ``width_ablations/bicaptioning_R_50_L1_H2048.yaml``)
+    pretrained: bool, optional (default = False)
+        If ``True``, will initialize the model with the pretrained weights. If
+        ``False``, the weights will be initialized randomly.
+    """
+
+    # Get the original path to config file (shipped with inside the package).
+    _pkg_config_path = pkg_resources.resource_filename(
+        "virtex.model_zoo", os.path.join("configs", config_path)
+    )
+    if not os.path.exists(_pkg_config_path):
+        raise RuntimeError("{} not available in Model Zoo!".format(config_path))
+
+    _C = Config(_pkg_config_path)
+    model = PretrainingModelFactory.from_config(_C)
+
+    if pretrained:
+        # Get URL for the checkpoint for this config path.
+        if config_path in _ModelZooUrls.CONFIG_PATH_TO_URL_SUFFIX:
+            url_suffix = _ModelZooUrls.CONFIG_PATH_TO_URL_SUFFIX[config_path]
+            checkpoint_url = f"{_ModelZooUrls.URL_PREFIX}/{url_suffix}"
+        else:
+            raise RuntimeError("{} not available in Model Zoo!".format(config_path))
+
+        # Download the pretrained model weights and save with a sensible name.
+        # This will be downloaded only if it does not exist.
+        checkpoint_path = download(
+            checkpoint_url,
+            dir=os.path.expanduser("~/.torch/virtex_cache"),
+            filename=os.path.basename(config_path).replace(".yaml", ".pth")
+        )
+        CheckpointManager(model=model).load(checkpoint_path)
+
+    return model

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+from .model_zoo import get`
	`2`	`+`
	`3`	`+__all__ = ["get"]`