nerfstudio-project · ethanweber · Sep 8, 2023 · Sep 12, 2023 · Sep 12, 2023 · Sep 12, 2023
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -34,7 +34,7 @@
   "python.envFile": "${workspaceFolder}/.env",
   "python.formatting.provider": "none",
   "black-formatter.args": ["--line-length=120"],
-  "python.linting.pylintEnabled": false,
+  "python.linting.pylintEnabled": true,
   "python.linting.flake8Enabled": false,
   "python.linting.enabled": true,
   "python.testing.unittestEnabled": false,

diff --git a/nerfstudio/cameras/camera_optimizers.py b/nerfstudio/cameras/camera_optimizers.py
@@ -33,6 +33,7 @@
 from nerfstudio.utils import poses as pose_utils
 from nerfstudio.engine.optimizers import OptimizerConfig
 from nerfstudio.engine.schedulers import SchedulerConfig
+from nerfstudio.cameras.cameras import Cameras
 
 
 @dataclass
@@ -44,10 +45,10 @@ class CameraOptimizerConfig(InstantiateConfig):
     mode: Literal["off", "SO3xR3", "SE3"] = "off"
     """Pose optimization strategy to use. If enabled, we recommend SO3xR3."""
 
-    trans_l2_penalty: float = 1e-2
+    trans_l2_penalty: float = 1e-4
     """L2 penalty on translation parameters."""
 
-    rot_l2_penalty: float = 1e-3
+    rot_l2_penalty: float = 1e-4
     """L2 penalty on rotation parameters."""
 
     optimizer: Optional[OptimizerConfig] = field(default=None)
@@ -146,6 +147,16 @@ def apply_to_raybundle(self, raybundle: RayBundle) -> None:
             raybundle.origins = raybundle.origins + correction_matrices[:, :3, 3]
             raybundle.directions = torch.bmm(correction_matrices[:, :3, :3], raybundle.directions[..., None]).squeeze()
 
+    def apply_to_camera(self, camera: Cameras) -> None:
+        """Apply the pose correction to the raybundle"""
+        if self.config.mode != "off":
+            assert camera.metadata is not None, "Must provide id of camera in its metadata"
+            assert "cam_idx" in camera.metadata, "Must provide id of camera in its metadata"
+            camera_idx = camera.metadata["cam_idx"]
+            adj = self([camera_idx])  # type: ignore
+            adj = torch.cat([adj, torch.Tensor([0, 0, 0, 1])[None, None].to(adj)], dim=1)
+            camera.camera_to_worlds = torch.bmm(camera.camera_to_worlds, adj)
+
     def get_loss_dict(self, loss_dict: dict) -> None:
         """Add regularization"""
         if self.config.mode != "off":

diff --git a/nerfstudio/configs/base_config.py b/nerfstudio/configs/base_config.py
@@ -88,6 +88,7 @@ class LocalWriterConfig(InstantiateConfig):
         writer.EventName.VIS_RAYS_PER_SEC,
         writer.EventName.TEST_RAYS_PER_SEC,
         writer.EventName.ETA,
+        writer.EventName.GAUSSIAN_NUM,
     )
     """specifies which stats will be logged/printed to terminal"""
     max_log_size: int = 10
@@ -144,7 +145,7 @@ class ViewerConfig(PrintableConfig):
     """Whether to kill the training job when it has completed. Note this will stop rendering in the viewer."""
     image_format: Literal["jpeg", "png"] = "jpeg"
     """Image format viewer should use; jpeg is lossy compression, while png is lossless."""
-    jpeg_quality: int = 90
+    jpeg_quality: int = 70
     """Quality tradeoff to use for jpeg compression."""
     make_share_url: bool = False
     """Viewer beta feature: print a shareable URL. `vis` must be set to viewer_beta; this flag is otherwise ignored."""
diff --git a/nerfstudio/configs/method_configs.py b/nerfstudio/configs/method_configs.py
@@ -22,7 +22,6 @@
 from typing import Dict
 
 import tyro
-from nerfstudio.data.pixel_samplers import PairPixelSamplerConfig
 
 from nerfstudio.cameras.camera_optimizers import CameraOptimizerConfig
 from nerfstudio.configs.base_config import ViewerConfig
@@ -49,6 +48,7 @@
 from nerfstudio.data.datasets.depth_dataset import DepthDataset
 from nerfstudio.data.datasets.sdf_dataset import SDFDataset
 from nerfstudio.data.datasets.semantic_dataset import SemanticDataset
+from nerfstudio.data.pixel_samplers import PairPixelSamplerConfig
 from nerfstudio.engine.optimizers import AdamOptimizerConfig, RAdamOptimizerConfig
 from nerfstudio.engine.schedulers import (
     CosineDecaySchedulerConfig,
@@ -59,8 +59,10 @@
 from nerfstudio.field_components.temporal_distortions import TemporalDistortionKind
 from nerfstudio.fields.sdf_field import SDFFieldConfig
 from nerfstudio.models.depth_nerfacto import DepthNerfactoModelConfig
+from nerfstudio.models.gaussian_splatting import GaussianSplattingModelConfig
 from nerfstudio.models.generfacto import GenerfactoModelConfig
 from nerfstudio.models.instant_ngp import InstantNGPModelConfig
+from nerfstudio.data.dataparsers.colmap_dataparser import ColmapDataParserConfig
 from nerfstudio.models.mipnerf import MipNerfModel
 from nerfstudio.models.nerfacto import NerfactoModelConfig
 from nerfstudio.models.neus import NeuSModelConfig
@@ -69,6 +71,7 @@
 from nerfstudio.models.tensorf import TensoRFModelConfig
 from nerfstudio.models.vanilla_nerf import NeRFModel, VanillaModelConfig
 from nerfstudio.pipelines.base_pipeline import VanillaPipelineConfig
+from nerfstudio.data.datamanagers.full_images_datamanager import FullImageDatamanagerConfig
 from nerfstudio.pipelines.dynamic_batch import DynamicBatchPipelineConfig
 from nerfstudio.plugins.registry import discover_methods
 
@@ -87,6 +90,7 @@
     "generfacto": "Generative Text to NeRF model",
     "neus": "Implementation of NeuS. (slow)",
     "neus-facto": "Implementation of NeuS-Facto. (slow)",
+    "gaussian-splatting": "Gaussian Splatting model",
 }
 
 method_configs["nerfacto"] = TrainerConfig(
@@ -594,6 +598,68 @@
     vis="viewer",
 )
 
+method_configs["gaussian-splatting"] = TrainerConfig(
+    method_name="gaussian-splatting",
+    steps_per_eval_image=100,
+    steps_per_eval_batch=100,
+    steps_per_save=2000,
+    steps_per_eval_all_images=100000,
+    max_num_iterations=30000,
+    mixed_precision=False,
+    gradient_accumulation_steps={"camera_opt": 100, "color": 10, "shs": 10},
+    pipeline=VanillaPipelineConfig(
+        datamanager=FullImageDatamanagerConfig(
+            dataparser=ColmapDataParserConfig(load_3D_points=True),
+        ),
+        model=GaussianSplattingModelConfig(),
+    ),
+    optimizers={
+        "xyz": {
+            "optimizer": AdamOptimizerConfig(lr=1.6e-4, eps=1e-15),
+            "scheduler": ExponentialDecaySchedulerConfig(
+                lr_final=1.6e-6,
+                max_steps=30000,
+            ),
+        },
+        "color": {
+            "optimizer": AdamOptimizerConfig(lr=2.5e-3, eps=1e-15),
+            "scheduler": ExponentialDecaySchedulerConfig(
+                lr_final=1e-3,
+                max_steps=30000,
+            ),
+        },
+        "shs": {
+            "optimizer": AdamOptimizerConfig(lr=2.5e-3 / 20, eps=1e-15),
+            "scheduler": ExponentialDecaySchedulerConfig(
+                lr_final=1e-3 / 20,
+                max_steps=30000,
+            ),
+        },
+        "opacity": {
+            "optimizer": AdamOptimizerConfig(lr=0.05, eps=1e-15),
+            "scheduler": None,
+        },
+        "nd_values": {
+            "optimizer": AdamOptimizerConfig(lr=2.5e-3, eps=1e-15),
+            "scheduler": ExponentialDecaySchedulerConfig(
+                lr_final=1e-3,
+                max_steps=30000,
+            ),
+        },
+        "scaling": {
+            "optimizer": AdamOptimizerConfig(lr=0.005, eps=1e-15),
+            "scheduler": ExponentialDecaySchedulerConfig(lr_final=1e-3, max_steps=30000),
+        },
+        "rotation": {"optimizer": AdamOptimizerConfig(lr=0.001, eps=1e-15), "scheduler": None},
+        "camera_opt": {
+            "optimizer": AdamOptimizerConfig(lr=1e-3, eps=1e-15),
+            "scheduler": ExponentialDecaySchedulerConfig(lr_final=5e-5, max_steps=30000),
+        },
+    },
+    viewer=ViewerConfig(num_rays_per_chunk=1 << 15),
+    vis="viewer_beta",
+)
+
 
 def merge_methods(methods, method_descriptions, new_methods, new_descriptions, overwrite=True):
     """Merge new methods and descriptions into existing methods and descriptions.

diff --git a/nerfstudio/data/datamanagers/base_datamanager.py b/nerfstudio/data/datamanagers/base_datamanager.py
@@ -21,12 +21,13 @@
 from abc import abstractmethod
 from collections import defaultdict
 from dataclasses import dataclass, field
-from pathlib import Path
 from functools import cached_property
+from pathlib import Path
 from typing import (
     Any,
     Callable,
     Dict,
+    ForwardRef,
     Generic,
     List,
     Literal,
@@ -35,9 +36,8 @@
     Type,
     Union,
     cast,
-    ForwardRef,
-    get_origin,
     get_args,
+    get_origin,
 )
 
 import torch
@@ -47,17 +47,17 @@
 from typing_extensions import TypeVar
 
 from nerfstudio.cameras.camera_optimizers import CameraOptimizerConfig
-from nerfstudio.cameras.cameras import CameraType
+from nerfstudio.cameras.cameras import Cameras, CameraType
 from nerfstudio.cameras.rays import RayBundle
 from nerfstudio.configs.base_config import InstantiateConfig
 from nerfstudio.configs.dataparser_configs import AnnotatedDataParserUnion
 from nerfstudio.data.dataparsers.base_dataparser import DataparserOutputs
 from nerfstudio.data.dataparsers.blender_dataparser import BlenderDataParserConfig
 from nerfstudio.data.datasets.base_dataset import InputDataset
 from nerfstudio.data.pixel_samplers import (
+    PatchPixelSamplerConfig,
     PixelSampler,
     PixelSamplerConfig,
-    PatchPixelSamplerConfig,
 )
 from nerfstudio.data.utils.dataloaders import (
     CacheDataloader,
@@ -67,9 +67,8 @@
 from nerfstudio.data.utils.nerfstudio_collate import nerfstudio_collate
 from nerfstudio.engine.callbacks import TrainingCallback, TrainingCallbackAttributes
 from nerfstudio.model_components.ray_generators import RayGenerator
-from nerfstudio.utils.misc import IterableWrapper
+from nerfstudio.utils.misc import IterableWrapper, get_orig_class
 from nerfstudio.utils.rich_utils import CONSOLE
-from nerfstudio.utils.misc import get_orig_class
 
 
 def variable_res_collate(batch: List[Dict]) -> Dict:
@@ -131,7 +130,7 @@ class DataManager(nn.Module):
     To get data, use the next_train and next_eval functions.
     This data manager's next_train and next_eval methods will return 2 things:
 
-    1. A Raybundle: This will contain the rays we are sampling, with latents and
+    1. A rays: This will contain the rays/camera we are sampling, with latents and
         conditionals attached (everything needed at inference)
     2. A "batch" of auxiliary information: This will contain the mask, the ground truth
         pixels, etc needed to actually train, score, etc the model
@@ -246,7 +245,7 @@ def setup_eval(self):
         """Sets up the data manager for evaluation"""
 
     @abstractmethod
-    def next_train(self, step: int) -> Tuple[RayBundle, Dict]:
+    def next_train(self, step: int) -> Tuple[Union[RayBundle, Cameras], Dict]:
         """Returns the next batch of data from the train data manager.
 
         Args:
@@ -258,25 +257,25 @@ def next_train(self, step: int) -> Tuple[RayBundle, Dict]:
         raise NotImplementedError
 
     @abstractmethod
-    def next_eval(self, step: int) -> Tuple[RayBundle, Dict]:
+    def next_eval(self, step: int) -> Tuple[Union[RayBundle, Cameras], Dict]:
         """Returns the next batch of data from the eval data manager.
 
         Args:
             step: the step number of the eval image to retrieve
         Returns:
-            A tuple of the ray bundle for the image, and a dictionary of additional batch information
+            A tuple of the ray/camera for the image, and a dictionary of additional batch information
             such as the groundtruth image.
         """
         raise NotImplementedError
 
     @abstractmethod
-    def next_eval_image(self, step: int) -> Tuple[int, RayBundle, Dict]:
+    def next_eval_image(self, step: int) -> Tuple[int, Union[RayBundle, Cameras], Dict]:
         """Retrieve the next eval image.
 
         Args:
             step: the step number of the eval image to retrieve
         Returns:
-            A tuple of the step number, the ray bundle for the image, and a dictionary of
+            A tuple of the step number, the ray/camera for the image, and a dictionary of
             additional batch information such as the groundtruth image.
         """
         raise NotImplementedError
@@ -313,7 +312,7 @@ def get_param_groups(self) -> Dict[str, List[Parameter]]:
 
 @dataclass
 class VanillaDataManagerConfig(DataManagerConfig):
-    """A basic data manager"""
+    """A basic data manager for a ray-based model"""
 
     _target: Type = field(default_factory=lambda: VanillaDataManager)
     """Target class to instantiate."""