Source code for fvdb.gaussian_splatting

# Copyright Contributors to the OpenVDB Project
# SPDX-License-Identifier: Apache-2.0
#
import pathlib
from typing import Any, Mapping, Sequence, TypeVar, overload

import torch
from fvdb.enums import ProjectionType

from ._fvdb_cpp import GaussianSplat3d as GaussianSplat3dCpp
from ._fvdb_cpp import JaggedTensor as JaggedTensorCpp
from ._fvdb_cpp import ProjectedGaussianSplats as ProjectedGaussianSplatsCpp
from .grid import Grid
from .grid_batch import GridBatch
from .jagged_tensor import JaggedTensor
from .types import DeviceIdentifier, cast_check, resolve_device

JaggedTensorOrTensorT = TypeVar("JaggedTensorOrTensorT", JaggedTensor, torch.Tensor)


[docs] class ProjectedGaussianSplats: """ A class representing a set of Gaussian splats projected onto a batch of 2D image planes. A :class:`ProjectedGaussianSplats` instance contains the 2D projections of 3D Gaussian splats, which can be used to render images onto the image planes. Instances of this class are created by calling the :meth:`GaussianSplat3d.project_gaussians_for_images`, :meth:`GaussianSplat3d.project_gaussians_for_images_and_depths`, etc. methods. .. note:: The reason to have a separate class for projected Gaussian splats is to be able to run projection once, and then render the splats multiple times (e.g. rendering crops) without re-projecting them each time. This can save significant computation time. """ __PRIVATE__ = object() def __init__(self, impl: ProjectedGaussianSplatsCpp, _private: Any = None) -> None: """ Private constructor. Use :meth:`GaussianSplat3d.project_gaussians_for_images` or similar methods to create instances. Args: impl (ProjectedGaussianSplatsCpp): The underlying C++ implementation. _private (Any): A private object to prevent direct construction. Must be :attr:`ProjectedGaussianSplats.__PRIVATE__`. """ if _private is not self.__PRIVATE__: raise ValueError( "ProjectedGaussianSplats constructor is private. Use GaussianSplat3d.project_gaussians_for_images or similar methods instead." ) self._impl = impl @property def antialias(self) -> bool: """ Return whether antialiasing was enabled during the projection of the Gaussian splats. Returns: antialias (bool): ``True`` if antialiasing was enabled during projection, ``False`` otherwise. """ return self._impl.antialias @property def inv_covar_2d(self) -> torch.Tensor: """ The inverse of the 2D covariance matrices of the Gaussians projected into each image plane. These define the spatial extent of ellipses for each splatted Gaussian. Note that since covariance matrices are symmetric, we pack them into a tensor of shape ``(num_projected_gaussians, 3)`` where each covariance matrix is represented as ``(Cxx, Cxy, Cyy)``. Returns: inv_covar_2d (torch.Tensor): A tensor of shape ``(C, N, D)`` representing the packed inverse 2D covariance matrices, where ``C`` is the number of image planes, ``N`` is the number of projected Gaussians, and ``D`` is number of feature channels for each Gaussian (see :attr:`GaussianSplat3d.num_channels`). """ return self._impl.conics @property def depths(self) -> torch.Tensor: """ Return the depth of each projected Gaussian in each image plane. The depth is defined as the distance from the camera to the mean of the Gaussian along the camera's viewing direction. Returns: depths (torch.Tensor): A tensor of shape ``(C, N)`` representing the depth of each projected Gaussian, where ``C`` is the number of image planes, and ``N`` is the number of projected Gaussians. """ return self._impl.depths @property def eps_2d(self) -> float: """ Return the epsilon value used during the projection of the Gaussian splats to avoid numerical issues. This value is used to clamp very small radii during projection. Returns: eps_2d (float): The epsilon value used during projection. """ return self._impl.eps_2d @property def far_plane(self) -> float: """ Return the far plane distance used during the projection of the Gaussian splats. Returns: far_plane (float): The far plane distance. """ return self._impl.far_plane @property def image_height(self) -> int: """ Return the height of the image planes used during the projection of the Gaussian splats. Returns: image_height (int): The height of the image planes. """ return self._impl.image_height @property def image_width(self) -> int: """ Return the width of the image planes used during the projection of the Gaussian splats. Returns: image_width (int): The width of the image planes. """ return self._impl.image_width @property def means2d(self) -> torch.Tensor: """ Return the 2D projected means (in pixel units) of the Gaussians in each image plane. Returns: means2d (torch.Tensor): A tensor of shape ``(C, N, 2)`` representing the 2D projected means, where ``C`` is the number of image planes, ``N`` is the number of projected Gaussians, and the last dimension contains the (x, y) coordinates of the means in pixel space. """ return self._impl.means2d @property def min_radius_2d(self) -> float: """ Return the minimum radius (in pixels) used to clip Gaussians during projection. Gaussians whose radius projected to less than this value are ignored to avoid numerical issues. Returns: min_radius_2d (float): The minimum radius used during projection. """ return self._impl.min_radius_2d @property def near_plane(self) -> float: """ Return the near plane distance used during the projection of the Gaussian splats. Returns: near_plane (float): The near plane distance. """ return self._impl.near_plane @property def opacities(self) -> torch.Tensor: """ Return the opacities of each projected Gaussian in each image plane. Returns: opacities (torch.Tensor): A tensor of shape ``(C, N)`` representing the opacity of each projected Gaussian, where ``C`` is the number of image planes, and ``N`` is the number of projected Gaussians. """ return self._impl.opacities @property def projection_type(self) -> ProjectionType: """ Return the projection type used during the projection of the Gaussian splats. Returns: projection_type (ProjectionType): The projection type (*e.g.* :attr:`ProjectionType.PERSPECTIVE` or :attr:`ProjectionType.ORTHOGRAPHIC`). """ return GaussianSplat3d._proj_type_from_cpp(self._impl.projection_type) @property def radii(self) -> torch.Tensor: """ Return the 2D radii (in pixels) of each projected Gaussian in each image plane. The radius of a Gaussian is the maximum extent of the Gaussian along any direction in the image plane. Returns: radii (torch.Tensor): A tensor of shape ``(C, N)`` representing the 2D radius of each projected Gaussian, where ``C`` is the number of image planes, and ``N`` is the number of projected Gaussians. """ return self._impl.radii @property def render_quantities(self) -> torch.Tensor: """ Return the render quantities of each projected Gaussian in each image plane. The render quantities are used for shading and lighting calculations during rendering. Returns: render_quantities (torch.Tensor): A tensor of shape ``(C, N, D)`` representing the render quantities of each projected Gaussian, where ``C`` is the number of image planes, ``N`` is the number of projected Gaussians, and ``D`` is the number of feature channels for each Gaussian (see :attr:`GaussianSplat3d.num_channels`). """ return self._impl.render_quantities @property def sh_degree_to_use(self) -> int: """ Return the spherical harmonic degree used during the projection of the Gaussian splats. .. note:: This indicates up to which degree the spherical harmonics coefficients were projected for each Gaussian. For example, if this value is ``0``, only the diffuse (degree 0) coefficients were projected. If this value is ``2``, coefficients up to degree 2 were projected. Returns: sh_degree_to_use (int): The spherical harmonic degree used during projection. """ return self._impl.sh_degree_to_use @property def tile_gaussian_ids(self) -> torch.Tensor: """ Return a tensor containing the ID of each tile/gaussian intersection. Returns: tile_gaussian_ids (torch.Tensor): A tensor of shape ``(M,)`` containing the IDs of the Gaussians. """ return self._impl.tile_gaussian_ids @property def tile_offsets(self) -> torch.Tensor: """ Return the starting offset of the set of intersections for each tile into :attr:`tile_gaussian_ids`. Returns: tile_offsets (torch.Tensor): A tensor of shape ``(C, TH, TW,)`` where ``C`` is the number of image planes, ``TH`` is the number of tiles in the height dimension, and ``TW`` is the number of tiles in the width dimension. """ return self._impl.tile_offsets
[docs] class GaussianSplat3d: """ An efficient data structure representing a Gaussian splat radiance field in 3D space. A :class:`GaussianSplat3d` instance contains a set of 3D Gaussian splats, each defined by its mean position, orientation (quaternion), scale, opacity, and spherical harmonics coefficients for color representation. Together, these define a radiance field which can be volume rendered to produce images and depths from arbitrary viewpoints. This class provides a variety of methods for rendering and manipulating Gaussian splats radiance fields. These include: - Rendering images with arbitrary channels using spherical harmonics for view-dependent color representation (:meth:`render_images`, :meth:`render_images_and_depths`). - Rendering depth maps (:meth:`render_depths`, :meth:`render_images_and_depths`). - Rendering features at arbitrary sparse pixel locations (:meth:`sparse_render_images`, :meth:`sparse_render_images_and_depths`). - Rendering depths at arbitrary sparse pixel locations (:meth:`sparse_render_depths`). - Computing which gaussians contribute to each pixel in an image plane (:meth:`render_num_contributing_gaussians`, :meth:`render_contributing_gaussian_ids`). - Computing the set of Gaussians which contribute to a set of sparse pixel locations (:meth:`sparse_render_num_contributing_gaussians`, :meth:`sparse_render_contributing_gaussian_ids`). - Saving and loading Gaussian splat data to/from PLY files (:meth:`save_to_ply`, :meth:`from_ply`). - Slicing, indexing, and masking Gaussians to create new :class:`GaussianSplat3d` instances. - Concatenating multiple :class:`GaussianSplat3d` instances into a single instance (:meth:`cat`). Background ----------- Mathematically, the radiance field represented by a :class:`GaussianSplat3d` is defined as a sum of anisotropic 3D Gaussians, with view-dependent features represented using spherical harmonics. The radiance field :math:`R(x, v)` accepts as input a 3D position :math:`x \\in \\mathbb{R}^3` and a viewing direction :math:`v \\in \\mathbb{S}^2`, and is defined as: .. math:: R(x, v) = \\sum_{i=1}^{N} o_i \\cdot \\alpha_i(x) \\cdot SH(v; C_i) \\alpha_i(x) = \\exp\\left(-\\frac{1}{2}(x - \\mu_i)^T \\Sigma_i^{-1} (x - \\mu_i)\\right) \\Sigma_i = R(q_i)^T \\cdot \\text{diag}(S_i) \\cdot R(q_i) where: - :math:`N` is the number of Gaussians (see :attr:`num_gaussians`). - :math:`\\mu_i \\in \\mathbb{R}^3` is the mean of the i-th Gaussian (see :attr:`means`). - :math:`\\Sigma_i \\in \\mathbb{R}^{3 \\times 3}` is the covariance matrix of the i-th Gaussian, defined by its scale diagonal scale :math:`S_i \\in \\mathbb{R}^3` (see :attr:`scales`) and orientation quaternion :math:`q_i \\in \\mathbb{R}^4` (see :attr:`quats`). - :math:`o_i \\in [0, 1]` is the opacity of the i-th Gaussian (see :attr:`opacities`). - :math:`SH(v; C_i)` is the spherical harmonics function evaluated at direction :math:`v` with coefficients :math:`C_i`. - :math:`R(q_i)` is the rotation matrix corresponding to the quaternion :math:`q_i`. To render images from a :class:`GaussianSplat3d`, you volume render the radiance field using .. math:: I(u, v) = \\int_{t \\in r(u, v)} T(t) R(r(t), d) dt where :math:`r(u, v)` is the camera ray through pixel :math:`(u, v)`, :math:`d` is the viewing direction of the ray, and :math:`T(t) = \\exp\\left(-\\int_{0}^{t} R(r(s), s) ds\\right)` is the accumulated transmittance along the ray up to distance :math:`t`. and to render depths you compute .. math:: D(u, v) = \\int_{t \\in r(u, v)} t \\cdot T(t) \\sum_{i=1}^{N} o_i \\cdot \\alpha_i(r(t), d) dt """ PLY_VERSION_STRING = "fvdb_ply 1.0.0" """ Version string written to PLY files saved using the :meth:`save_to_ply` method. This string will be written in the comment section of the PLY file to identify the version of the fvdb library used to save the file. The comment will have the form ``comment fvdb_gs_ply <PLY_VERSION_STRING>``. """ __PRIVATE__ = object() def __init__( self, impl: GaussianSplat3dCpp, _private: Any = None, ) -> None: """ Initializes the :class:`GaussianSplat3d` with an existing C++ implementation. This constructor is used to wrap an existing instance of :class:`GaussianSplat3dCpp`. It is only called internally within this class and should not be used directly. .. note:: You should never call this constructor directly. Instead, use the :meth:`from_tensors` or :meth:`from_ply` class methods to create new instances of :class:`GaussianSplat3d`. Args: impl (GaussianSplat3dCpp): An instance of the C++ implementation. """ if _private is not self.__PRIVATE__: raise ValueError("GaussianSplat3d constructor is private. Use from_tensors or from_ply instead.") self._impl = impl
[docs] @classmethod def from_tensors( cls, means: torch.Tensor, quats: torch.Tensor, log_scales: torch.Tensor, logit_opacities: torch.Tensor, sh0: torch.Tensor, shN: torch.Tensor, accumulate_mean_2d_gradients: bool = False, accumulate_max_2d_radii: bool = False, detach: bool = False, ) -> "GaussianSplat3d": """ Create a new :class:`GaussianSplat3d` from the provided tensors. This constructs a new Gaussian splat radiance field with the specified means, orientations, scales, opacities, and spherical harmonics coefficients. .. note:: The :class:`GaussianSplat3d` stores the log of scales scales (:attr:`log_scales`) rather than the scales directly. This ensures numerical stability, especially when optimizing the scales, since each gaussian is defined as :math:`\\exp(R(q)^T S R(q))` where :math:`R(q)` is rotation matrix defined by the unit quaternion of the Gaussian, and :math:`S = diag(exp(log_scales))`. .. note:: The :class:`GaussianSplat3d` stores the logit of opacities (:attr:`logit_opacities`) rather than the opacities directly. The actual opacities are obtained by applying the sigmoid function to the logit opacities. This ensures opacities are always in the range ``[0, 1]`` and improves numerical stability during optimization. Args: means (torch.Tensor): Tensor of shape ``(N, 3)`` representing the means of the gaussians, where ``N`` is the number of gaussians. quats (torch.Tensor): Tensor of shape ``(N, 4)`` representing the quaternions (orientations) of the gaussians, where ``N`` is the number of gaussians. log_scales (torch.Tensor): Tensor of shape ``(N, 3)`` representing the log scales of the gaussians, where ``N`` is the number of gaussians. logit_opacities (torch.Tensor): Tensor of shape ``(N,)`` representing the logit opacities of the gaussians, where ``N`` is the number of gaussians. sh0 (torch.Tensor): Tensor of shape ``(N, 1, D)`` representing the diffuse SH coefficients where ``D`` is the number of channels (see :attr:`num_channels`). shN (torch.Tensor): Tensor of shape ``(N, K-1, D)`` representing the directionally varying SH coefficients where ``D`` is the number of channels (see :attr:`num_channels`), and ``K`` is the number of spherical harmonic bases (see :attr:`num_sh_bases`). accumulate_mean_2d_gradients (bool, optional): If ``True``, tracks the average norm of the gradient of projected means for each Gaussian during the backward pass of projection. This is useful for some optimization techniques, such as the one in the `original paper <https://repo-sam.inria.fr/fungraph/3d-gaussian-splatting/>`_. Defaults to ``False``. accumulate_max_2d_radii (bool, optional): If ``True``, tracks the maximum 2D radii for each Gaussian during the backward pass of projection. This is useful for some optimization techniques, such as the one in the `original paper <https://repo-sam.inria.fr/fungraph/3d-gaussian-splatting/>`_. Defaults to ``False``. detach (bool, optional): If ``True``, creates copies of the input tensors and detaches them from the computation graph. Defaults to ``False``. """ return GaussianSplat3d( GaussianSplat3dCpp( means=means, quats=quats, log_scales=log_scales, logit_opacities=logit_opacities, sh0=sh0, shN=shN, accumulate_mean_2d_gradients=accumulate_mean_2d_gradients, accumulate_max_2d_radii=accumulate_max_2d_radii, detach=detach, ), _private=cls.__PRIVATE__, )
[docs] @classmethod def from_ply( cls, filename: pathlib.Path | str, device: DeviceIdentifier = "cuda" ) -> "tuple[GaussianSplat3d, dict[str, str | int | float | torch.Tensor]]": """ Create a `GaussianSplat3d` instance from a PLY file. Args: filename (str): The name of the file to load the PLY data from. device (torch.device): The device to load the data onto. Default is "cuda". Returns: splats (GaussianSplat3d): An instance of GaussianSplat3d initialized with the data from the PLY file. metadata (dict[str, str | int | float | torch.Tensor]): A dictionary of metadata where the keys are strings and the values are either strings, ints, floats, or tensors. Can be empty if no metadata is saved in the PLY file. """ device = resolve_device(device) if isinstance(filename, pathlib.Path): filename = str(filename) gs_impl, metadata = GaussianSplat3dCpp.from_ply(filename=filename, device=device) return cls(impl=gs_impl, _private=cls.__PRIVATE__), metadata
@overload def __getitem__(self, index: slice) -> "GaussianSplat3d": ... @overload def __getitem__(self, index: torch.Tensor) -> "GaussianSplat3d": ...
[docs] def __getitem__(self, index: slice | torch.Tensor) -> "GaussianSplat3d": """ Select Gaussians using either an integer index tensor, a boolean mask tensor, or a slice. .. note:: If :attr:`accumulate_mean_2d_gradients` or :attr:`accumulate_max_2d_radii` is enabled on this :class:`GaussianSplat3d` instance, the returned :class:`GaussianSplat3d` will also contain the corresponding accumulated values. Example usage: .. code-block:: python # Using a slice gs_subset = gsplat3d[10:20] # Selects Gaussians from index 10 to 19 # Using an integer index tensor indices = torch.tensor([0, 2, 4, 6]) gs_subset = gsplat3d[indices] # Selects Gaussians at indices 0, 2, 4, and 6 # Using a boolean mask tensor mask = torch.tensor([True, False, True, False, ...]) # Length must be num_gaussians gs_subset = gsplat3d[mask] # Selects Gaussians where mask is True Args: index (slice | torch.Tensor): A slice object or a 1D tensor containing either integer indices or a boolean mask. Returns: gaussian_splat_3d (GaussianSplat3d): A new instance of :class:`GaussianSplat3d` containing only the selected Gaussians. """ if isinstance(index, slice): return GaussianSplat3d( impl=self._impl.slice_select( index.start if index.start is not None else 0, index.stop if index.stop is not None else self.num_gaussians, index.step if index.step is not None else 1, ), _private=self.__PRIVATE__, ) elif isinstance(index, torch.Tensor): if index.dim() != 1: raise ValueError("Expected 'index' to be a 1D tensor.") if index.dtype == torch.bool: if len(index) != self.num_gaussians: raise ValueError( f"Expected 'index_or_mask' to have the same length as the number of Gaussians ({self.num_gaussians}), " f"but got {len(index)}." ) return GaussianSplat3d(impl=self._impl.mask_select(index), _private=self.__PRIVATE__) elif index.dtype == torch.int64 or index.dtype == torch.int32: return GaussianSplat3d(impl=self._impl.index_select(index), _private=self.__PRIVATE__) else: raise ValueError("Expected 'index' to be a boolean or integer (int32 or int64) tensor.") else: raise TypeError("Expected 'index' to be a slice or a torch.Tensor.")
@overload def __setitem__(self, index: slice, value: "GaussianSplat3d") -> None: ... @overload def __setitem__(self, index: torch.Tensor, value: "GaussianSplat3d") -> None: ...
[docs] def __setitem__(self, index: torch.Tensor | slice, value: "GaussianSplat3d") -> None: """ Set the values of Gaussians in this :class:`GaussianSplat3d` instance using either an integer index tensor, a boolean mask tensor, or a slice. .. note:: If using integer indices with duplicate indices, the Gaussian set from ``value`` at the duplicate indices will overwrite in a random order. .. note:: If :attr:`accumulate_mean_2d_gradients` or :attr:`accumulate_max_2d_radii` is enabled on this :class:`GaussianSplat3d` instance, the corresponding accumulated values will also be updated for the selected Gaussians, based on the values from the ``value`` instance. If ``value`` does not have these accumulations enabled, the accumulated values for the selected Gaussians will be reset to zero. Example: .. code-block:: python # Using a slice gs_subset: GaussianSplat3d = ... # Some GaussianSplat3d instance with 10 Gaussians gsplat3d[10:20] = gs_subset # Sets Gaussians from index 10 to 19 # Using an integer index tensor indices = torch.tensor([0, 2, 4, 6]) gs_subset: GaussianSplat3d = ... # Some GaussianSplat3d instance with 4 Gaussians gsplat3d[indices] = gs_subset # Sets Gaussians at indices 0, 2, 4, and 6 # Using a boolean mask tensor mask = torch.tensor([True, False, True, False, ...]) # Length must be num_gaussians gs_subset: GaussianSplat3d = ... # Some GaussianSplat3d instance with num unmasked Gaussians gsplat3d[mask] = gs_subset # Sets Gaussians where mask is True Args: index (torch.Tensor | slice): A slice object or a 1D tensor containing either integer indices or a boolean mask. value (GaussianSplat3d): The :class:`GaussianSplat3d` instance containing the new values to set. Must have the same number of Gaussians as the selected indices or mask. """ if isinstance(index, slice): self._impl.slice_set( index.start if index.start is not None else 0, index.stop if index.stop is not None else self.num_gaussians, index.step if index.step is not None else 1, value._impl, ) return elif isinstance(index, torch.Tensor): if index.dim() != 1: raise ValueError("Expected 'index' to be a 1D tensor.") if index.dtype == torch.bool: if len(index) != self.num_gaussians: raise ValueError( f"Expected 'index' to have the same length as the number of Gaussians ({self.num_gaussians}), " f"but got {len(index)}." ) self._impl.mask_set(index, value._impl) elif index.dtype == torch.int64 or index.dtype == torch.int32: self._impl.index_set(index, value._impl) else: raise ValueError("Expected 'index' to be a boolean or integer (int32 or int64) tensor.") else: raise TypeError("Expected 'index' to be a slice or a torch.Tensor")
[docs] def detach(self) -> "GaussianSplat3d": """ Return a new :class:`GaussianSplat3d` instance whose tensors are detached from the computation graph. This is useful when you want to stop tracking gradients for this instance. Returns: gaussian_splat (GaussianSplat3d): A new :class:`GaussianSplat3d` instance whose tensors are detached. """ return GaussianSplat3d(impl=self._impl.detach(), _private=self.__PRIVATE__)
[docs] def detach_(self) -> None: """ Detaches this :class:`GaussianSplat3d` instance from the computation graph in place. This modifies the current instance to stop tracking gradients. .. note:: This method modifies the current instance and does not return a new instance. """ self._impl.detach_in_place()
[docs] @staticmethod def cat( splats: "Sequence[GaussianSplat3d]", accumulate_mean_2d_gradients: bool = False, accumulate_max_2d_radii: bool = False, detach: bool = False, ) -> "GaussianSplat3d": """ Concatenates a sequence of :class:`GaussianSplat3d` instances into a single :class:`GaussianSplat3d` instance. The returned :class:`GaussianSplat3d` will contain all the Gaussians from the input instances, in the order they were provided. .. note:: All input :class:`GaussianSplat3d` instances must have the same number of channels and spherical harmonic degree. .. note:: If ``accumulate_mean_2d_gradients`` is ``True``, the concatenated instance will track the average norm of projected mean gradients for each Gaussian during the backward pass of projection. This value is copied over from each input instance if they were tracking it, and initialized to zero otherwise. .. note:: If ``accumulate_max_2d_radii`` is ``True``, the concatenated instance will track the maximum 2D radii for each Gaussian during the backward pass of projection. This value is copied over from each input instance if they were tracking it, and initialized to zero otherwise. Args: splats (Sequence[GaussianSplat3d]): A sequence of :class:`GaussianSplat3d` instances to concatenate. accumulate_mean_2d_gradients (bool): If True, copies over the accumulated mean 2D gradients for each :class:`GaussianSplat3d` into the new one, or initializes it to zero if the input instance was not tracking it. Defaults to ``False``. accumulate_max_2d_radii (bool): If ``True``, copies the accumulated maximum 2D radii for each :class:`GaussianSplat3d` into the concatenated one, or initializes it to zero if the input instance was not tracking it. Defaults to ``False``. detach (bool): If ``True``, detaches the concatenated :class:`GaussianSplat3d` from the computation graph. Defaults to ``False``. Returns: GaussianSplat3d: A new instance of GaussianSplat3d containing the concatenated Gaussians. """ splat_list = [splat._impl for splat in splats] return GaussianSplat3d( impl=GaussianSplat3dCpp.cat(splat_list, accumulate_mean_2d_gradients, accumulate_max_2d_radii, detach), _private=GaussianSplat3d.__PRIVATE__, )
[docs] @classmethod def from_state_dict(cls, state_dict: dict[str, torch.Tensor]) -> "GaussianSplat3d": """ Creates a :class:`GaussianSplat3d` instance from a state dictionary generated by :meth:`state_dict`. This method is typically used to load a saved state of the :class:`GaussianSplat3d` instance. A state dictionary must contains the following keys which are all the required parameters to initialize a :class:`GaussianSplat3d`. Here ``N`` denotes the number of Gaussians (see :attr:`num_gaussians`) - ``'means'``: Tensor of shape ``(N, 3)`` representing the means of the Gaussians. - ``'quats'``: Tensor of shape ``(N, 4)`` representing the quaternions of the Gaussians. - ``'log_scales'``: Tensor of shape ``(N, 3)`` representing the log scales of the Gaussians. - ``'logit_opacities'``: Tensor of shape ``(N,)`` representing the logit opacities of the Gaussians. - ``'sh0'``: Tensor of shape ``(N, 1, D)`` representing the diffuse SH coefficients where ``D`` is the number of channels (see :attr:`num_channels`). - ``'shN'``: Tensor of shape ``(N, K-1, D)`` representing the directionally varying SH coefficients where ``D`` is the number of channels (see :attr:`num_channels`), and ``K`` is the number of spherical harmonic bases (see :attr:`num_sh_bases`). - ``'accumulate_max_2d_radii'``: bool Tensor with a single element indicating whether to track the maximum 2D radii for gradients. - ``'accumulate_mean_2d_gradients'``: bool Tensor with a single element indicating whether to track the average norm of the gradient of projected means for each Gaussian. It can also optionally contain the following keys: - ``'accumulated_gradient_step_counts'``: Tensor of shape ``(N,)`` representing the accumulated gradient step counts for each Gaussian. - ``'accumulated_max_2d_radii'``: Tensor of shape ``(N,)`` representing the maximum 2D projected radius for each Gaussian across every iteration of optimization. - ``'accumulated_mean_2d_gradient_norms'``: Tensor of shape ``(N,)`` representing the average norm of the gradient of projected means for each Gaussian across every iteration of optimization. Args: state_dict (dict[str, torch.Tensor]): A dictionary containing the state of the :class:`GaussianSplat3d` instance, usually generated via the :meth:`state_dict` method. Returns: gaussian_splat (GaussianSplat3d): An instance of :class:`GaussianSplat3d` initialized with the provided state dictionary. """ return cls(impl=GaussianSplat3dCpp.from_state_dict(state_dict), _private=cls.__PRIVATE__)
@property def device(self) -> torch.device: """ Returns the device on which the Tensors managed by this :class:`GaussianSplat3d` instance is stored. Returns: device (torch.device): The device of this :class:`GaussianSplat3d` instance. """ return self._impl.device @property def dtype(self) -> torch.dtype: """ Returns the data type of of the tensors managed by this :class:`GaussianSplat3d` instance (e.g., ``torch.float32``, ``torch.float64``). Returns: torch.dtype: The data type of the tensors managed by this :class:`GaussianSplat3d` instance. """ return self._impl.dtype @property def sh_degree(self) -> int: """ Returns the degree of the spherical harmonics used in the Gaussian splatting representation. This value is 0 for diffuse SH coefficients and >= 1 for directionally varying SH coefficients. .. note:: This is **not** the same as the number of spherical harmonics bases (see :attr:`num_sh_bases`). The relationship between the degree and the number of bases is given by :math:`K = (sh\\_degree + 1)^2`, where :math:`K` is the number of spherical harmonics bases. Returns: sh_degree (int): The degree of the spherical harmonics. """ return self._impl.sh_degree @property def num_channels(self) -> int: """ Returns the number of channels in the Gaussian splatting representation. For example, if you are rendering RGB images, this method will return 3. Returns: num_channels (int): The number of channels. """ return self._impl.num_channels @property def num_gaussians(self) -> int: """ Returns the number of Gaussians in the Gaussian splatting representation. This is the total number of individual gaussian splats that are being used to represent the scene. Returns: num_gaussians (int): The number of Gaussians. """ return self._impl.num_gaussians @property def num_sh_bases(self) -> int: """ Returns the number of spherical harmonics (SH) bases used in the Gaussian splatting representation. .. note:: The number of SH bases is related to the SH degree (see :attr:`sh_degree`) by the formula :math:`K = (sh\\_degree + 1)^2`, where :math:`K` is the number of spherical harmonics bases. Returns: num_sh_bases (int): The number of spherical harmonics bases. """ return self._impl.num_sh_bases @property def log_scales(self) -> torch.Tensor: """ Returns the log of the scales for each Gaussian. Gaussians are represented in 3D space, as ellipsoids defined by their means, orientations (quaternions), and scales. *i.e.* .. math:: g_i(x) = \\exp(-0.5 (x - \\mu_i)^T \\Sigma_i^{-1} (x - \\mu_i)) where :math:`\\mu_i` is the mean and :math:`\\Sigma_i = R(q_i)^T S_i R(q_i)` is the covariance of the i-th Gaussian with :math:`R(q_i)` being the rotation matrix defined by the unit quaternion :math:`q_i` of the Gaussian, and :math:`S_i = diag(\\exp(log\\_scales_i))`. .. note:: The :class:`GaussianSplat3d` stores the log of scales scales (:attr:`log_scales`) rather than the scales directly. This ensures numerical stability, especially when optimizing the scales. To read the scales directly, see the :attr:`scales` property (which is read-only). Returns: log_scales (torch.Tensor): A tensor of shape ``(N, 3)`` where ``N`` is the number of Gaussians (see :attr:`num_gaussians`). Each row represents the log of the scale of a Gaussian in 3D space. """ return self._impl.log_scales @log_scales.setter def log_scales(self, value: torch.Tensor) -> None: """ Sets the log of the scales for each Gaussian. Gaussians are represented in 3D space, as ellipsoids defined by their means, orientations (quaternions), and scales. *i.e.* .. math:: g_i(x) = \\exp(-0.5 (x - \\mu_i)^T \\Sigma_i^{-1} (x - \\mu_i)) where :math:`\\mu_i` is the mean and :math:`\\Sigma_i = R(q_i)^T S_i R(q_i)` is the covariance of the i-th Gaussian with :math:`R(q_i)` being the rotation matrix defined by the unit quaternion :math:`q_i` of the Gaussian, and :math:`S_i = diag(\\exp(log\\_scales_i))`. .. note:: The :class:`GaussianSplat3d` stores the log of scales scales (:attr:`log_scales`) rather than the scales directly. This ensures numerical stability, especially when optimizing the scales. To read the scales directly, see the :attr:`scales` property (which is read-only). Args: value (torch.Tensor): A tensor of shape ``(N, 3)`` where ``N`` is the number of Gaussians (see :attr:`num_gaussians`). Each row represents the log of the scale of a Gaussian in 3D space. """ self._impl.log_scales = cast_check(value, torch.Tensor, "log_scales") @property def logit_opacities(self) -> torch.Tensor: """ Return the logit (inverse of sigmoid) of the opacities of each Gaussian in the scene. .. note:: The :class:`GaussianSplat3d` stores the logit of opacities (:attr:`logit_opacities`) rather than the opacities directly. The actual opacities are obtained by applying the sigmoid function to the logit opacities. To read the opacities directly, see the :attr:`opacities` property (which is read-only). Returns: logit_opacities (torch.Tensor): A tensor of shape ``(N,)`` where ``N`` is the number of Gaussians (see :attr:`num_gaussians`). Each row represents the logit of the opacity of a Gaussian in 3D space. """ return self._impl.logit_opacities @logit_opacities.setter def logit_opacities(self, value: torch.Tensor) -> None: """ Set the logit (inverse of sigmoid) of the opacities of each Gaussian in the scene. .. note:: The :class:`GaussianSplat3d` stores the logit of opacities (:attr:`logit_opacities`) rather than the opacities directly. The actual opacities are obtained by applying the sigmoid function to the logit opacities. To read the opacities directly, see the :attr:`opacities` property (which is read-only). Args: value (torch.Tensor): A tensor of shape ``(N,)`` where ``N`` is the number of Gaussians (see :attr:`num_gaussians`). Each row represents the logit of the opacity of a Gaussian in 3D space. """ self._impl.logit_opacities = cast_check(value, torch.Tensor, "logit_opacities") @property def means(self) -> torch.Tensor: """ Return the means (3d positions) of the Gaussians in this :class:`GaussianSplat3d`. The means represent the center of each Gaussian in 3D space. *i.e* each Gaussian :math:`g_i` is defined as: .. math:: g_i(x) = \\exp(-0.5 (x - \\mu_i)^T \\Sigma_i^{-1} (x - \\mu_i)) where :math:`\\mu_i` is the mean and :math:`\\Sigma_i = R(q_i)^T S_i R(q_i)` is the covariance of the i-th Gaussian with :math:`R(q_i)` being the rotation matrix defined by the unit quaternion :math:`q_i` of the Gaussian, and :math:`S_i = diag(\\exp(log\\_scales_i))`. Returns: torch.Tensor: A tensor of shape (N, 3) where N is the number of Gaussians (see `num_gaussians`). Each row represents the mean of a Gaussian in 3D space. """ return self._impl.means @means.setter def means(self, value: torch.Tensor) -> None: """ Sets the means (3d positions) of the Gaussians in this :class:`GaussianSplat3d`. The means represent the center of each Gaussian in 3D space. *i.e* each Gaussian :math:`g_i` is defined as: .. math:: g_i(x) = \\exp(-0.5 (x - \\mu_i)^T \\Sigma_i^{-1} (x - \\mu_i)) where :math:`\\mu_i` is the mean and :math:`\\Sigma_i = R(q_i)^T S_i R(q_i)` is the covariance of the i-th Gaussian with :math:`R(q_i)` being the rotation matrix defined by the unit quaternion :math:`q_i` of the Gaussian, and :math:`S_i = diag(\\exp(log\\_scales_i))`. Args: value (torch.Tensor): A tensor of shape ``(N, 3)`` where ``N`` is the number of Gaussians (see :attr:`num_gaussians`). Each row represents the mean of a Gaussian in 3D space. """ self._impl.means = cast_check(value, torch.Tensor, "means") @property def quats(self) -> torch.Tensor: """ Returns the unit quaternions representing the orientation of the covariance of the Gaussians in this :class:`GaussianSplat3d`. *i.e* each Gaussian :math:`g_i` is defined as: .. math:: g_i(x) = \\exp(-0.5 (x - \\mu_i)^T \\Sigma_i^{-1} (x - \\mu_i)) where :math:`\\mu_i` is the mean and :math:`\\Sigma_i = R(q_i)^T S_i R(q_i)` is the covariance of the i-th Gaussian with :math:`R(q_i)` being the rotation matrix defined by the unit quaternion :math:`q_i` of the Gaussian, and :math:`S_i = diag(\\exp(log\\_scales_i))`. Returns: quats (torch.Tensor): A tensor of shape ``(N, 4)`` where ``N`` is the number of Gaussians (see :attr:`num_gaussians`). Each row represents the unit quaternion of a Gaussian in 3D space. """ return self._impl.quats @quats.setter def quats(self, value: torch.Tensor) -> None: """ Sets the unit quaternions representing the orientation of the covariance of the Gaussians in this :class:`GaussianSplat3d`. *i.e* each Gaussian :math:`g_i` is defined as: .. math:: g_i(x) = \\exp(-0.5 (x - \\mu_i)^T \\Sigma_i^{-1} (x - \\mu_i)) where :math:`\\mu_i` is the mean and :math:`\\Sigma_i = R(q_i)^T S_i R(q_i)` is the covariance of the i-th Gaussian with :math:`R(q_i)` being the rotation matrix defined by the unit quaternion :math:`q_i` of the Gaussian, and :math:`S_i = diag(\\exp(log\\_scales_i))`. Args: value (torch.Tensor): A tensor of shape ``(N, 4)`` where ``N`` is the number of Gaussians (see :attr:`num_gaussians`). Each row represents the unit quaternion of a Gaussian in 3D space. """ self._impl.quats = cast_check(value, torch.Tensor, "quats") @property def requires_grad(self) -> bool: """ Returns whether the tensors tracked by this :class:`GaussianSplat3d` instance are set to require gradients. This is typically set to True if you want to optimize the parameters of the Gaussians. Example: .. code-block:: python gsplat3d = GaussianSplat3d(...) # Some GaussianSplat3d instance gsplat3d.requires_grad = True # Enable gradient tracking for optimization assert gsplat3d.means.requires_grad # Now the means will require gradients assert gsplat3d.covariances.requires_grad # Now the covariances will require gradients assert gsplat3d.logit_opacities.requires_grad # Now the logit opacities will require gradients assert gsplat3d.log_scales.requires_grad # Now the log scales will require gradients assert gsplat3d.sh0.requires_grad # Now the SH coefficients will require gradients assert gsplat3d.shN.requires_grad # Now the SH coefficients will require gradients Returns: requires_grad (bool): ``True`` if gradients are required, ``False`` otherwise. """ return self._impl.requires_grad @requires_grad.setter def requires_grad(self, value: bool) -> None: """ Sets whether the tensors tracked by this :class:`GaussianSplat3d` instance require gradients. This is typically set to True if you want to optimize the parameters of the Gaussians. Example: .. code-block:: python gsplat3d = GaussianSplat3d(...) # Some GaussianSplat3d instance gsplat3d.requires_grad = True # Enable gradient tracking for optimization assert gsplat3d.means.requires_grad # Now the means will require gradients assert gsplat3d.covariances.requires_grad # Now the covariances will require gradients assert gsplat3d.logit_opacities.requires_grad # Now the logit opacities will require gradients assert gsplat3d.log_scales.requires_grad # Now the log scales will require gradients assert gsplat3d.sh0.requires_grad # Now the SH coefficients will require gradients assert gsplat3d.shN.requires_grad # Now the SH coefficients will require gradients Returns: requires_grad (bool): ``True`` if gradients are required, ``False`` otherwise. """ self._impl.requires_grad = cast_check(value, bool, "requires_grad") @property def sh0(self) -> torch.Tensor: """ Returns the diffuse spherical harmonics coefficients of the Gaussians in this :class:`GaussianSplat3d`. These coefficients are used to represent the diffuse color/feature of each Gaussian. Returns: sh0 (torch.Tensor): A tensor of shape ``(N, 1, D)`` where ``N`` is the number of Gaussians (see :attr:`num_gaussians`), and ``D`` is the number of channels (see :attr:`num_channels`). Each row represents the diffuse SH coefficients for a Gaussian. """ return self._impl.sh0 @sh0.setter def sh0(self, value: torch.Tensor) -> None: """ Sets the diffuse spherical harmonics coefficients of the Gaussians in this :class:`GaussianSplat3d`. These coefficients are used to represent the diffuse color/feature of each Gaussian. Args: value (torch.Tensor): A tensor of shape ``(N, 1, D)`` where ``N`` is the number of Gaussians (see :attr:`num_gaussians`), and ``D`` is the number of channels (see :attr:`num_channels`). Each row represents the diffuse SH coefficients for a Gaussian. """ self._impl.sh0 = cast_check(value, torch.Tensor, "sh0") @property def shN(self) -> torch.Tensor: """ Returns the directionally varying spherical harmonics coefficients of the Gaussians in the scene. These coefficients are used to represent a direction dependent color/feature of each Gaussian. Returns: torch.Tensor: A tensor of shape (N, K-1, D) where N is the number of Gaussians (see `num_gaussians`), D is the number of channels (see `num_channels`), and K is the number of spherical harmonic bases (see `num_sh_bases`). Each row represents the directionally varying SH coefficients for a Gaussian. """ return self._impl.shN @shN.setter def shN(self, value: torch.Tensor) -> None: """ Sets the directionally varying spherical harmonics coefficients of the Gaussians in this :class:`GaussianSplat3d`. These coefficients are used to represent a direction dependent color/feature of each Gaussian. Args: value (torch.Tensor): A tensor of shape ``(N, K-1, D)`` where ``N`` is the number of Gaussians (see :attr:`num_gaussians`), ``D`` is the number of channels (see :attr:`num_channels`), and ``K`` is the number of spherical harmonic bases (see :attr:`num_sh_bases`). Each row represents the directionally varying SH coefficients for a Gaussian. """ self._impl.shN = cast_check(value, torch.Tensor, "shN") @property def opacities(self) -> torch.Tensor: """ Returns the opacities of the Gaussians in the Gaussian splatting representation. The opacities encode the visibility of each Gaussian in the scene. .. note:: This property is read only. :class:`GaussianSplat3d` stores the logit (inverse of sigmoid) of the opacities to ensure numerical stability, which you can modify. See :attr:`logit_opacities`. Returns: opacities (torch.Tensor): A tensor of shape ``(N,)`` where ``N`` is the number of Gaussians (see :attr:`num_gaussians`). Each element represents the opacity of a Gaussian. """ return self._impl.opacities @property def scales(self) -> torch.Tensor: """ Returns the scales of the Gaussians in the Gaussian splatting representation. The scales are the eigenvalues of the covariance matrix of each Gaussian. *i.e* each Gaussian :math:`g_i` is defined as: .. math:: g_i(x) = \\exp(-0.5 (x - \\mu_i)^T \\Sigma_i^{-1} (x - \\mu_i)) where :math:`\\mu_i` is the mean and :math:`\\Sigma_i = R(q_i)^T S_i R(q_i)` is the covariance of the i-th Gaussian with :math:`R(q_i)` being the rotation matrix defined by the unit quaternion :math:`q_i` of the Gaussian, and :math:`S_i = diag(\\exp(log\\_scales_i))`. .. note:: This property is read only. :class:`GaussianSplat3d` stores the log of scales to ensure numerical stability, which you can modify. See :attr:`log_scales`. Returns: scales (torch.Tensor): A tensor of shape ``(N, 3)`` where ``N`` is the number of Gaussians. Each row represents the scale of a Gaussian in 3D space. """ return self._impl.scales @property def accumulated_gradient_step_counts(self) -> torch.Tensor: """ Returns the accumulated gradient step counts for each Gaussian. If this :class:`GaussianSplat3d` instance is set to track accumulated gradients (*i.e* :attr:`accumulate_mean_2d_gradients` is ``True``), then this tensor contains the number of Gradient steps that have been applied to each Gaussian during optimization. If :attr:`accumulate_mean_2d_gradients` is ``False``, this property will be an empty tensor. .. note:: To reset the counts, call call the :meth:`reset_accumulated_gradient_state` method. Returns: step_counts (torch.Tensor): A tensor of shape ``(N,)`` where ``N`` is the number of Gaussians (see :attr:`num_gaussians`). Each element represents the accumulated gradient step count for a Gaussian. """ return self._impl.accumulated_gradient_step_counts @property def accumulated_max_2d_radii(self) -> torch.Tensor: """ Returns the maximum 2D projected radius (in pixels) for each Gaussian across all calls to `render_*` functions. This is used by certain optimization techniques to ensure that the Gaussians do not become too large or too small during the optimization process. If :this :class:`GaussianSplat3d` instance is set to track maximum 2D radii (*i.e* :attr:`accumulate_max_2d_radii` is ``True``), then this tensor contains the maximum 2D radius for each Gaussian. If :attr:`accumulate_max_2d_radii` is ``False``, this property will be an empty tensor. .. note:: To reset the maximum radii to zero, you can call the :meth:`reset_accumulated_gradient_state` method. Returns: max_radii (torch.Tensor): A tensor of shape ``(N,)`` where ``N`` is the number of Gaussians (see :attr:`num_gaussians`). Each element represents the maximum 2D radius for a Gaussian across all optimization iterations. """ return self._impl.accumulated_max_2d_radii @property def accumulate_max_2d_radii(self) -> bool: """ Returns whether to track the maximum 2D projected radius of each Gaussian across calls to `render_*` functions. This is used by certain optimization techniques to ensure that the Gaussians do not become too large or too small during the optimization process. .. seealso:: See :attr:`accumulated_max_2d_radii` for the actual maximum radii values. Returns: accumulate_max_radii (bool): ``True`` if the maximum 2D radii are being tracked across rendering calls, ``False`` otherwise. """ return self._impl.accumulate_max_2d_radii @accumulate_max_2d_radii.setter def accumulate_max_2d_radii(self, value) -> None: """ Sets whether to track the maximum 2D projected radius of each Gaussian across calls to `render_*` functions. This is used by certain optimization techniques to ensure that the Gaussians do not become too large or too small during the optimization process. .. seealso:: See :attr:`accumulated_max_2d_radii` for the actual maximum radii values. Args: value (bool): ``True`` if the maximum 2D radii are being tracked across rendering calls, ``False`` otherwise. """ self._impl.accumulate_max_2d_radii = cast_check(value, bool, "accumulate_max_2d_radii") @property def accumulate_mean_2d_gradients(self) -> bool: """ Returns whether to track the average norm of the gradient of projected means for each Gaussian during the backward pass of projection. This property is used by certain optimization techniques to split/prune/duplicate Gaussians. The accumulated 2d gradient norms are defined as follows: .. math:: \\sum_{t=1}^{T} \\| \\partial_{L_t} \\mu_i^{2D} \\|_2 where :math:`\\mu_i^{2D}` is the projection of the mean of Gaussian :math:`g_i` onto the image plane, and :math:`L_t` is the loss at iteration :math:`t`. .. seealso:: See :attr:`accumulated_mean_2d_gradient_norms` for the actual average norms of the gradients. Returns: accumulate_mean_2d_grads (bool): ``True`` if the average norm of the gradient of projected means is being tracked, ``False`` otherwise. """ return self._impl.accumulate_mean_2d_gradients @accumulate_mean_2d_gradients.setter def accumulate_mean_2d_gradients(self, value: bool) -> None: """ Sets whether to track the average norm of the gradient of projected means for each Gaussian during the backward pass of projection. This property is used by certain optimization techniques to split/prune/duplicate Gaussians. The accumulated 2d gradient norms are defined as follows: .. math:: \\sum_{t=1}^{T} \\| \\partial_{L_t} \\mu_i^{2D} \\|_2 where :math:`\\mu_i^{2D}` is the projection of the mean of Gaussian :math:`g_i` onto the image plane, and :math:`L_t` is the loss at iteration :math:`t`. .. seealso:: See :attr:`accumulated_mean_2d_gradient_norms` for the actual average norms of the gradients. Args: value (bool): ``True`` if the average norm of the gradient of projected means is being tracked, ``False`` otherwise. """ self._impl.accumulate_mean_2d_gradients = cast_check(value, bool, "accumulate_mean_2d_gradients") @property def accumulated_mean_2d_gradient_norms(self) -> torch.Tensor: """ Returns the average norm of the gradient of projected (2D) means for each Gaussian across every backward pass. This is used by certain optimization techniques to split/prune/duplicate Gaussians. The accumulated 2d gradient norms are defined as follows: .. math:: \\sum_{t=1}^{T} \\| \\partial_{L_t} \\mu_i^{2D} \\|_2 where :math:`\\mu_i^{2D}` is the projection of the mean of Gaussian :math:`g_i` onto the image plane, and :math:`L_t` is the loss at iteration :math:`t`. .. note:: To reset the accumulated norms, call the :meth:`reset_accumulated_gradient_state` method. Returns: accumulated_grad_2d_norms (torch.Tensor): A tensor of shape ``(N,)`` where ``N`` is the number of Gaussians (see :attr:`num_gaussians`). Each element represents the average norm of the gradient of projected means for a Gaussian across all optimization iterations. The norm is computed in 2D space, i.e., the projected means. """ return self._impl.accumulated_mean_2d_gradient_norms
[docs] def project_gaussians_for_depths( self, world_to_camera_matrices: torch.Tensor, projection_matrices: torch.Tensor, image_width: int, image_height: int, near: float, far: float, projection_type=ProjectionType.PERSPECTIVE, min_radius_2d: float = 0.0, eps_2d: float = 0.3, antialias: bool = False, ) -> ProjectedGaussianSplats: """ Projects this :class:`GaussianSplat3d` onto one or more image planes for rendering depth images in those planes. You can render depth images from the projected Gaussians by calling :meth:`render_projected_gaussians`. .. note:: The reason to have a separate projection and rendering step is to enable rendering crops of an image without having to project the Gaussians again. .. note:: All images being rendered must have the same width and height. .. seealso:: :class:`fvdb.ProjectedGaussianSplats` for the projected Gaussians representation. .. code-block:: python # Assume gaussian_splat_3d is an instance of GaussianSplat3d # Project the Gaussians for rendering depth images onto C image planes projected_gaussians = gaussian_splat_3d.project_gaussians_for_depths( world_to_camera_matrices, # tensor of shape [C, 4, 4] projection_matrices, # tensor of shape [C, 3, 3] image_width, # width of the C images image_height, # height of the C images near, # near clipping plane far) # far clipping plane # Now render a crop of size 100x100 starting at (10, 10) from the projected Gaussians # in each image plane. # Returns a tensor of shape [C, 100, 100, 1] containing the depth images, # and a tensor of shape [C, 100, 100, 1] containing the final alpha (opacity) values # of each pixel. cropped_depth_images_1, cropped_alphas = gaussian_splat_3d.render_from_projected_gaussians( projected_gaussians, crop_width=100, crop_height=100, crop_origin_w=10, crop_origin_h=10) # To get the depth images, divide the last channel by the alpha values true_depths_1 = cropped_images_1[..., -1:] / cropped_alphas Args: world_to_camera_matrices (torch.Tensor): Tensor of shape ``(C, 4, 4)`` representing the world-to-camera transformation matrices for ``C`` cameras. Each matrix transforms points from world coordinates to camera coordinates. projection_matrices (torch.Tensor): Tensor of shape ``(C, 3, 3)`` representing the projection matrices for ``C`` cameras. Each matrix projects points in camera space into homogeneous pixel coordinates. image_width (int): The width of the images to be rendered. Note that all images must have the same width. image_height (int): The height of the images to be rendered. Note that all images must have the same height. near (float): The near clipping plane distance for the projection. far (float): The far clipping plane distance for the projection. projection_type (ProjectionType): The type of projection to use. Default is :attr:`ProjectionType.PERSPECTIVE`. min_radius_2d (float): The minimum radius (in pixels) below which Gaussians are ignored during rendering. eps_2d (float): A value used to pad Gaussians when projecting them onto the image plane, to avoid very projected Gaussians which create artifacts and numerical issues. antialias (bool): If ``True``, applies opacity correction to the projected Gaussians when using ``eps_2d > 0.0``. Returns: projected_gaussians (ProjectedGaussianSplats): An instance of ProjectedGaussianSplats containing the projected Gaussians. This object contains the projected 2D representations of the Gaussians, which can be used for rendering depth images or further processing. """ return ProjectedGaussianSplats( self._impl.project_gaussians_for_depths( world_to_camera_matrices=world_to_camera_matrices, projection_matrices=projection_matrices, image_width=image_width, image_height=image_height, near=near, far=far, projection_type=self._proj_type_to_cpp(projection_type), min_radius_2d=min_radius_2d, eps_2d=eps_2d, antialias=antialias, ), _private=ProjectedGaussianSplats.__PRIVATE__, )
[docs] def project_gaussians_for_images( self, world_to_camera_matrices: torch.Tensor, projection_matrices: torch.Tensor, image_width: int, image_height: int, near: float, far: float, projection_type=ProjectionType.PERSPECTIVE, sh_degree_to_use: int = -1, min_radius_2d: float = 0.0, eps_2d: float = 0.3, antialias: bool = False, ) -> ProjectedGaussianSplats: """ Projects this :class:`GaussianSplat3d` onto one or more image planes for rendering multi-channel (see :attr:`num_channels`) images in those planes. You can render images from the projected Gaussians by calling :meth:`render_projected_gaussians`. .. note:: The reason to have a separate projection and rendering step is to enable rendering crops of an image without having to project the Gaussians again. .. note:: All images being rendered must have the same width and height. .. seealso:: :class:`fvdb.ProjectedGaussianSplats` for the projected Gaussians representation. .. code-block:: python # Assume gaussian_splat_3d is an instance of GaussianSplat3d # Project the Gaussians for rendering images onto C image planes projected_gaussians = gaussian_splat_3d.project_gaussians_for_images( world_to_camera_matrices, # tensor of shape [C, 4, 4] projection_matrices, # tensor of shape [C, 3, 3] image_width, # width of the C images image_height, # height of the C images near, # near clipping plane far) # far clipping plane # Now render a crop of size 100x100 starting at (10, 10) from the projected Gaussians # in each image plane. # Returns a tensor of shape [C, 100, 100, D] containing the images (where D is num_channels), # and a tensor of shape [C, 100, 100, 1] containing the final alpha (opacity) values # of each pixel. cropped_images_1, cropped_alphas = gaussian_splat_3d.render_from_projected_gaussians( projected_gaussians, crop_width=100, crop_height=100, crop_origin_w=10, crop_origin_h=10) Args: world_to_camera_matrices (torch.Tensor): Tensor of shape ``(C, 4, 4)`` representing the world-to-camera transformation matrices for ``C`` cameras. Each matrix transforms points from world coordinates to camera coordinates. projection_matrices (torch.Tensor): Tensor of shape ``(C, 3, 3)`` representing the projection matrices for ``C`` cameras. Each matrix projects points in camera space into homogeneous pixel coordinates. image_width (int): The width of the images to be rendered. Note that all images must have the same width. image_height (int): The height of the images to be rendered. Note that all images must have the same height. near (float): The near clipping plane distance for the projection. far (float): The far clipping plane distance for the projection. projection_type (ProjectionType): The type of projection to use. Default is :attr:`ProjectionType.PERSPECTIVE`. sh_degree_to_use (int): The degree of spherical harmonics to use for rendering. -1 means use all available SH bases. 0 means use only the first SH base (constant color). Note that you can't use more SH bases than available in the GaussianSplat3d instance. Default is -1. min_radius_2d (float): The minimum radius (in pixels) below which Gaussians are ignored during rendering. eps_2d (float): A value used to pad Gaussians when projecting them onto the image plane, to avoid very projected Gaussians which create artifacts and numerical issues. antialias (bool): If ``True``, applies opacity correction to the projected Gaussians when using ``eps_2d > 0.0``. Returns: projected_gaussians (ProjectedGaussianSplats): An instance of ProjectedGaussianSplats containing the projected Gaussians. This object contains the projected 2D representations of the Gaussians, which can be used for rendering images or further processing. """ return ProjectedGaussianSplats( self._impl.project_gaussians_for_images( world_to_camera_matrices=world_to_camera_matrices, projection_matrices=projection_matrices, image_width=image_width, image_height=image_height, near=near, far=far, projection_type=self._proj_type_to_cpp(projection_type), sh_degree_to_use=sh_degree_to_use, min_radius_2d=min_radius_2d, eps_2d=eps_2d, antialias=antialias, ), _private=ProjectedGaussianSplats.__PRIVATE__, )
[docs] def project_gaussians_for_images_and_depths( self, world_to_camera_matrices: torch.Tensor, projection_matrices: torch.Tensor, image_width: int, image_height: int, near: float, far: float, projection_type=ProjectionType.PERSPECTIVE, sh_degree_to_use: int = -1, min_radius_2d: float = 0.0, eps_2d: float = 0.3, antialias: bool = False, ) -> ProjectedGaussianSplats: """ Projects this :class:`GaussianSplat3d` onto one or more image planes for rendering multi-channel (see :attr:`num_channels`) images with depths in the last channel. You can render images+depths from the projected Gaussians by calling :meth:`render_projected_gaussians`. .. note:: The reason to have a separate projection and rendering step is to enable rendering crops of an image without having to project the Gaussians again. .. note:: All images being rendered must have the same width and height. .. seealso:: :class:`fvdb.ProjectedGaussianSplats` for the projected Gaussians representation. .. code-block:: python # Assume gaussian_splat_3d is an instance of GaussianSplat3d # Project the Gaussians for rendering images onto C image planes projected_gaussians = gaussian_splat_3d.project_gaussians_for_images_and_depths( world_to_camera_matrices, # tensor of shape [C, 4, 4] projection_matrices, # tensor of shape [C, 3, 3] image_width, # width of the C images image_height, # height of the C images near, # near clipping plane far) # far clipping plane # Now render a crop of size 100x100 starting at (10, 10) from the projected Gaussians # in each image plane. # Returns a tensor of shape [C, 100, 100, D] containing the images (where D is num_channels + 1 for depth), # and a tensor of shape [C, 100, 100, 1] containing the final alpha (opacity) values # of each pixel. cropped_images_1, cropped_alphas = gaussian_splat_3d.render_from_projected_gaussians( projected_gaussians, crop_width=100, crop_height=100, crop_origin_w=10, crop_origin_h=10) cropped_images = cropped_images_1[..., :-1] # Extract image channels # Divide by alpha to get the final true depth values cropped_depths = cropped_images_1[..., -1:] / cropped_alphas # Extract depth channel Args: world_to_camera_matrices (torch.Tensor): Tensor of shape ``(C, 4, 4)`` representing the world-to-camera transformation matrices for ``C`` cameras. Each matrix transforms points from world coordinates to camera coordinates. projection_matrices (torch.Tensor): Tensor of shape ``(C, 3, 3)`` representing the projection matrices for ``C`` cameras. Each matrix projects points in camera space into homogeneous pixel coordinates. image_width (int): The width of the images to be rendered. Note that all images must have the same width. image_height (int): The height of the images to be rendered. Note that all images must have the same height. near (float): The near clipping plane distance for the projection. far (float): The far clipping plane distance for the projection. projection_type (ProjectionType): The type of projection to use. Default is :attr:`fvdb.ProjectionType.PERSPECTIVE`. sh_degree_to_use (int): The degree of spherical harmonics to use for rendering. -1 means use all available SH bases. 0 means use only the first SH base (constant color). Note that you can't use more SH bases than available in the GaussianSplat3d instance. Default is -1. min_radius_2d (float): The minimum radius (in pixels) below which Gaussians are ignored during rendering. eps_2d (float): A value used to pad Gaussians when projecting them onto the image plane, to avoid very projected Gaussians which create artifacts and numerical issues. antialias (bool): If ``True``, applies opacity correction to the projected Gaussians when using ``eps_2d > 0.0``. Returns: projected_gaussians (ProjectedGaussianSplats): An instance of ProjectedGaussianSplats containing the projected Gaussians. This object contains the projected 2D representations of the Gaussians, which can be used for rendering images or further processing. """ return ProjectedGaussianSplats( self._impl.project_gaussians_for_images_and_depths( world_to_camera_matrices=world_to_camera_matrices, projection_matrices=projection_matrices, image_width=image_width, image_height=image_height, near=near, far=far, projection_type=self._proj_type_to_cpp(projection_type), sh_degree_to_use=sh_degree_to_use, min_radius_2d=min_radius_2d, eps_2d=eps_2d, antialias=antialias, ), _private=ProjectedGaussianSplats.__PRIVATE__, )
[docs] def render_from_projected_gaussians( self, projected_gaussians: ProjectedGaussianSplats, crop_width: int = -1, crop_height: int = -1, crop_origin_w: int = -1, crop_origin_h: int = -1, tile_size: int = 16, backgrounds: torch.Tensor | None = None, ) -> tuple[torch.Tensor, torch.Tensor]: """ Render a set of images from Gaussian splats that have already been projected onto image planes (See for example :meth:`project_gaussians_for_images`). This method is useful when you want to render images from pre-computed projected Gaussians, for example, when rendering crops of images without having to re-project the Gaussians. .. note:: If you want to render the full image, pass negative values for ``crop_width``, ``crop_height``, ``crop_origin_w``, and ``crop_origin_h`` (default behavior). To render full images, all these values must be negative or this method will raise an error. .. note:: If your crop goes beyond the image boundaries, the resulting image will be clipped to be within the image boundaries. Example: .. code-block:: python # Assume gaussian_splat_3d is an instance of GaussianSplat3d # Project the Gaussians for rendering images onto C image planes projected_gaussians = gaussian_splat_3d.project_gaussians_for_images_and_depths( world_to_camera_matrices, # tensor of shape [C, 4, 4] projection_matrices, # tensor of shape [C, 3, 3] image_width, # width of the C images image_height, # height of the C images near, # near clipping plane far) # far clipping plane # Now render a crop of size 100x100 starting at (10, 10) from the projected Gaussians # in each image plane. # Returns a tensor of shape [C, 100, 100, D] containing the images (where D is num_channels + 1 for depth), # and a tensor of shape [C, 100, 100, 1] containing the final alpha (opacity) values # of each pixel. cropped_images_1, cropped_alphas = gaussian_splat_3d.render_from_projected_gaussians( projected_gaussians, crop_width=100, crop_height=100, crop_origin_w=10, crop_origin_h=10) cropped_images = cropped_images_1[..., :-1] # Extract image channels # Divide by alpha to get the final true depth values cropped_depths = cropped_images_1[..., -1:] / cropped_alphas # Extract depth channel Args: projected_gaussians (ProjectedGaussianSplats): An instance of :class:`fvdb.ProjectedGaussianSplats` containing the projected Gaussians after spherical harmonic evaluation. This object should have been created by calling :meth:`project_gaussians_for_images`, :meth:`project_gaussians_for_depths`, :meth:`project_gaussians_for_images_and_depths`, etc. crop_width (int): The width of the crop to render. If -1, the full image width is used. Default is -1. crop_height (int): The height of the crop to render. If -1, the full image height is used. Default is -1. crop_origin_w (int): The x-coordinate of the top-left corner of the crop. If -1, the crop starts at (0, 0). Default is -1. crop_origin_h (int): The y-coordinate of the top-left corner of the crop. If -1, the crop starts at (0, 0). Default is -1. tile_size (int): The size of the tiles to use for rendering. Default is 16. This parameter controls the size of the tiles used for rendering the images. You shouldn't set this parameter unless you really know what you are doing. Returns: rendered_images (torch.Tensor): A tensor of shape ``(C, H, W, D)`` where ``C`` is the number of image planes, ``H`` is the height of the rendered images, ``W`` is the width of the rendered images, and ``D`` is the number of channels (e.g., RGB, RGBD, etc.). alpha_images (torch.Tensor): A tensor of shape ``(C, H, W, 1)`` where ``C`` is the number of cameras, ``H`` is the height of the images, and ``W`` is the width of the images. Each element represents the alpha value (opacity) at a pixel such that 0 <= alpha < 1, and 0 means the pixel is fully transparent, and 1 means the pixel is fully opaque. """ return self._impl.render_from_projected_gaussians( projected_gaussians=projected_gaussians._impl, crop_width=crop_width, crop_height=crop_height, crop_origin_w=crop_origin_w, crop_origin_h=crop_origin_h, tile_size=tile_size, backgrounds=backgrounds, )
[docs] def render_depths( self, world_to_camera_matrices: torch.Tensor, projection_matrices: torch.Tensor, image_width: int, image_height: int, near: float, far: float, projection_type=ProjectionType.PERSPECTIVE, tile_size: int = 16, min_radius_2d: float = 0.3, eps_2d: float = 0.3, antialias: bool = False, backgrounds: torch.Tensor | None = None, ) -> tuple[torch.Tensor, torch.Tensor]: """ Render ``C`` depth maps from this :class:`GaussianSplat3d` from ``C`` camera views. .. note:: All depth maps being rendered must have the same width and height. Example: .. code-block:: python # Assume gaussian_splat_3d is an instance of GaussianSplat3d # Render depth maps from C camera views # depth_images is a tensor of shape [C, H, W, 1] # alpha_images is a tensor of shape [C, H, W, 1] depth_images, alpha_images = gaussian_splat_3d.render_depths( world_to_camera_matrices, # tensor of shape [C, 4, 4] projection_matrices, # tensor of shape [C, 3, 3] image_width, # width of the depth maps image_height, # height of the depth maps near, # near clipping plane far) # far clipping plane true_depths = depth_images / alpha_images # Get true depth values by dividing by alpha Args: world_to_camera_matrices (torch.Tensor): Tensor of shape ``(C, 4, 4)`` representing the world-to-camera transformation matrices for ``C`` cameras. Each matrix transforms points from world coordinates to camera coordinates. projection_matrices (torch.Tensor): Tensor of shape ``(C, 3, 3)`` representing the projection matrices for ``C`` cameras. Each matrix projects points in camera space into homogeneous pixel coordinates. image_width (int): The width of the depth maps to be rendered. Note these are the same for all depth maps being rendered. image_height (int): The height of the depth maps to be rendered. Note these are the same for all depth maps being rendered. near (float): The near clipping plane distance for the projection. far (float): The far clipping plane distance for the projection. projection_type (ProjectionType): The type of projection to use. Default is :attr:`fvdb.ProjectionType.PERSPECTIVE`. tile_size (int): The size of the tiles to use for rendering. Default is 16. You shouldn't set this parameter unless you really know what you are doing. min_radius_2d (float): The minimum radius (in pixels) below which Gaussians are ignored during rendering. eps_2d (float): A value used to pad Gaussians when projecting them onto the image plane, to avoid very projected Gaussians which create artifacts and numerical issues. antialias (bool): If ``True``, applies opacity correction to the projected Gaussians when using ``eps_2d > 0.0``. Returns: depth_images (torch.Tensor): A tensor of shape ``(C, H, W, 1)`` where ``C`` is the number of camera views, ``H`` is the height of the depth maps, and ``W`` is the width of the depth maps. Each element represents the depth value at that pixel in the depth map. alpha_images (torch.Tensor): A tensor of shape ``(C, H, W, 1)`` where ``C`` is the number of camera views, ``H`` is the height of the images, and ``W`` is the width of the images. Each element represents the alpha value (opacity) at a pixel such that ``0 <= alpha < 1``, and 0 means the pixel is fully transparent, and 1 means the pixel is fully opaque. """ return self._impl.render_depths( world_to_camera_matrices=world_to_camera_matrices, projection_matrices=projection_matrices, image_width=image_width, image_height=image_height, near=near, far=far, projection_type=self._proj_type_to_cpp(projection_type), tile_size=tile_size, min_radius_2d=min_radius_2d, eps_2d=eps_2d, antialias=antialias, backgrounds=backgrounds, )
[docs] def sparse_render_depths( self, pixels_to_render: JaggedTensorOrTensorT, world_to_camera_matrices: torch.Tensor, projection_matrices: torch.Tensor, image_width: int, image_height: int, near: float, far: float, projection_type=ProjectionType.PERSPECTIVE, tile_size: int = 16, min_radius_2d: float = 0.3, eps_2d: float = 0.3, antialias: bool = False, ) -> tuple[JaggedTensorOrTensorT, JaggedTensorOrTensorT]: """ Render ``C`` collections of sparse depth values from this :class:`GaussianSplat3d` from ``C`` camera views at the specified pixel locations. Example: .. code-block:: python # Assume gaussian_splat_3d is an instance of GaussianSplat3d # pixels_to_render is a tensor of shape [C, P, 2] containing pixel coordinates to render # Render sparse depth values from C camera views at specified pixel locations # depth_values is a tensor of shape [C, P, 1] # alpha_values is a tensor of shape [C, P, 1] depth_values, alpha_values = gaussian_splat_3d.sparse_render_depths( pixels_to_render, # tensor of shape [C, P, 2] world_to_camera_matrices, # tensor of shape [C, 4, 4] projection_matrices, # tensor of shape [C, 3, 3] image_width, # width of the images image_height, # height of the images near, # near clipping plane far) # far clipping plane true_depths = depth_values / alpha_values # Get true depth values by dividing by alpha Args: pixels_to_render (torch.Tensor | JaggedTensor): A tensor of shape ``(C, P, 2)`` or a JaggedTensor where ``C`` is the number of camera views, and ``P`` is the number of pixel coordinates to render per camera. Each pixel coordinate is represented as (y, x) (row, col). world_to_camera_matrices (torch.Tensor): Tensor of shape ``(C, 4, 4)`` representing the world-to-camera transformation matrices for C cameras. Each matrix transforms points from world coordinates to camera coordinates. projection_matrices (torch.Tensor): Tensor of shape ``(C, 3, 3)`` representing the projection matrices for ``C`` cameras. Each matrix projects points in camera space into homogeneous pixel coordinates. image_width (int): The width of the images to be rendered. Note these are the same for all images being rendered. image_height (int): The height of the images to be rendered. Note these are the same for all images being rendered. near (float): The near clipping plane distance for the projection. far (float): The far clipping plane distance for the projection. projection_type (ProjectionType): The type of projection to use. Default is :attr:`fvdb.ProjectionType.PERSPECTIVE`. tile_size (int): The size of the tiles to use for rendering. Default is 16. You shouldn't set this parameter unless you really know what you are doing. min_radius_2d (float): The minimum radius (in pixels) below which Gaussians are ignored during rendering. eps_2d (float): A value used to pad Gaussians when projecting them onto the image plane, to avoid very projected Gaussians which create artifacts and numerical issues. antialias (bool): If ``True``, applies opacity correction to the projected Gaussians when using ``eps_2d > 0.0``. Returns: depth_values (torch.Tensor | JaggedTensor): A tensor of shape ``(C, P, 1)`` or a JaggedTensor where ``C`` is the number of camera views, and ``P`` is the number of pixel coordinates rendered per camera. Each element represents the depth value at that pixel. alpha_values (torch.Tensor | JaggedTensor): A tensor of shape ``(C, P, 1)`` or a JaggedTensor where ``C`` is the number of camera views, and ``P`` is the number of pixel coordinates rendered per camera. Each element represents the alpha value (opacity) at that pixel such that ``0 <= alpha < 1``, and 0 means the pixel is fully transparent, and 1 means the pixel is fully opaque. """ if isinstance(pixels_to_render, torch.Tensor): pixels_to_render_impl = JaggedTensorCpp(pixels_to_render) elif isinstance(pixels_to_render, JaggedTensor): pixels_to_render_impl: JaggedTensorCpp = pixels_to_render._impl else: raise TypeError("pixels_to_render must be either a torch.Tensor or a fvdb.JaggedTensor") ret_depths, ret_alphas = self._impl.sparse_render_depths( pixels_to_render=pixels_to_render_impl, world_to_camera_matrices=world_to_camera_matrices, projection_matrices=projection_matrices, image_width=image_width, image_height=image_height, near=near, far=far, projection_type=self._proj_type_to_cpp(projection_type), tile_size=tile_size, min_radius_2d=min_radius_2d, eps_2d=eps_2d, antialias=antialias, ) if isinstance(pixels_to_render, torch.Tensor): return ret_depths.jdata, ret_alphas.jdata else: return JaggedTensor(impl=ret_depths), JaggedTensor(impl=ret_alphas)
[docs] def render_images( self, world_to_camera_matrices: torch.Tensor, projection_matrices: torch.Tensor, image_width: int, image_height: int, near: float, far: float, projection_type=ProjectionType.PERSPECTIVE, sh_degree_to_use: int = -1, tile_size: int = 16, min_radius_2d: float = 0.0, eps_2d: float = 0.3, antialias: bool = False, backgrounds: torch.Tensor | None = None, ) -> tuple[torch.Tensor, torch.Tensor]: """ Render ``C`` multi-channel images (see :attr:`num_channels`) from this :class:`GaussianSplat3d` from ``C`` camera views. .. note:: All images being rendered must have the same width and height. Example: .. code-block:: python # Assume gaussian_splat_3d is an instance of GaussianSplat3d # Render images from C camera views. # images is a tensor of shape [C, H, W, D] where D is the number of channels # alpha_images is a tensor of shape [C, H, W, 1] images, alpha_images = gaussian_splat_3d.render_images( world_to_camera_matrices, # tensor of shape [C, 4, 4] projection_matrices, # tensor of shape [C, 3, 3] image_width, # width of the images image_height, # height of the images near, # near clipping plane far) # far clipping plane Args: world_to_camera_matrices (torch.Tensor): Tensor of shape ``(C, 4, 4)`` representing the world-to-camera transformation matrices for C cameras. Each matrix transforms points from world coordinates to camera coordinates. projection_matrices (torch.Tensor): Tensor of shape ``(C, 3, 3)`` representing the projection matrices for ``C`` cameras. Each matrix projects points in camera space into homogeneous pixel coordinates. image_width (int): The width of the images to be rendered. Note these are the same for all images being rendered. image_height (int): The height of the images to be rendered. Note these are the same for all images being rendered. near (float): The near clipping plane distance for the projection. far (float): The far clipping plane distance for the projection. projection_type (ProjectionType): The type of projection to use. Default is :attr:`fvdb.ProjectionType.PERSPECTIVE`. sh_degree_to_use (int): The degree of spherical harmonics to use for rendering. -1 means use all available SH bases. 0 means use only the first SH base (constant color). Note that you can't use more SH bases than available in the GaussianSplat3d instance. Default is -1. tile_size (int): The size of the tiles to use for rendering. Default is 16. You shouldn't set this parameter unless you really know what you are doing. min_radius_2d (float): The minimum radius (in pixels) below which Gaussians are ignored during rendering. eps_2d (float): A value used to pad Gaussians when projecting them onto the image plane, to avoid very projected Gaussians which create artifacts and numerical issues. antialias (bool): If ``True``, applies opacity correction to the projected Gaussians when using ``eps_2d > 0.0``. Returns: images (torch.Tensor): A tensor of shape ``(C, H, W, D)`` where ``C`` is the number of camera views, ``H`` is the height of the images, ``W`` is the width of the images, and ``D`` is the number of channels. alpha_images (torch.Tensor): A tensor of shape ``(C, H, W, 1)`` where ``C`` is the number of camera views, ``H`` is the height of the images, and ``W`` is the width of the images. Each element represents the alpha value (opacity) at a pixel such that ``0 <= alpha < 1``, and 0 means the pixel is fully transparent, and 1 means the pixel is fully opaque. """ return self._impl.render_images( world_to_camera_matrices=world_to_camera_matrices, projection_matrices=projection_matrices, image_width=image_width, image_height=image_height, near=near, far=far, projection_type=self._proj_type_to_cpp(projection_type), sh_degree_to_use=sh_degree_to_use, tile_size=tile_size, min_radius_2d=min_radius_2d, eps_2d=eps_2d, antialias=antialias, backgrounds=backgrounds, )
[docs] def sparse_render_images( self, pixels_to_render: JaggedTensorOrTensorT, world_to_camera_matrices: torch.Tensor, projection_matrices: torch.Tensor, image_width: int, image_height: int, near: float, far: float, projection_type=ProjectionType.PERSPECTIVE, sh_degree_to_use: int = -1, tile_size: int = 16, min_radius_2d: float = 0.0, eps_2d: float = 0.3, antialias: bool = False, ) -> tuple[JaggedTensorOrTensorT, JaggedTensorOrTensorT]: """ Render ``C`` collections of multi-channel features (see :attr:`num_channels`) from this :class:`GaussianSplat3d` from ``C`` camera views at the specified pixel locations. Example: .. code-block:: python # Assume gaussian_splat_3d is an instance of GaussianSplat3d # pixels_to_render is a tensor of shape [C, P, 2] containing pixel coordinates to render # Render sparse images from C camera views at specified pixel locations # features is a tensor of shape [C, P, D] where D is the number of channels # alphas is a tensor of shape [C, P, 1] features, alphas = gaussian_splat_3d.sparse_render_images( pixels_to_render, # tensor of shape [C, P, 2] world_to_camera_matrices, # tensor of shape [C, 4, 4] projection_matrices, # tensor of shape [C, 3, 3] image_width, # width of the images image_height, # height of the images near, # near clipping plane far) # far clipping plane Args: pixels_to_render (torch.Tensor | JaggedTensor): A tensor of shape ``(C, P, 2)`` or a :class:`~fvdb.JaggedTensor` where ``C`` is the number of camera views, and ``P`` is the number of pixel coordinates to render per camera. Each pixel coordinate is represented as (y, x) (row, col). world_to_camera_matrices (torch.Tensor): Tensor of shape ``(C, 4, 4)`` representing the world-to-camera transformation matrices for C cameras. Each matrix transforms points from world coordinates to camera coordinates. projection_matrices (torch.Tensor): Tensor of shape ``(C, 3, 3)`` representing the projection matrices for ``C`` cameras. Each matrix projects points in camera space into homogeneous pixel coordinates. image_width (int): The width of the images to be rendered. Note these are the same for all images being rendered. image_height (int): The height of the images to be rendered. Note these are the same for all images being rendered. near (float): The near clipping plane distance for the projection. far (float): The far clipping plane distance for the projection. projection_type (ProjectionType): The type of projection to use. Default is :attr:`fvdb.ProjectionType.PERSPECTIVE`. sh_degree_to_use (int): The degree of spherical harmonics to use for rendering. -1 means use all available SH bases. 0 means use only the first SH base (constant color). Note that you can't use more SH bases than available in the GaussianSplat3d instance. Default is -1. tile_size (int): The size of the tiles to use for rendering. Default is 16. You shouldn't set this parameter unless you really know what you are doing. min_radius_2d (float): The minimum radius (in pixels) below which Gaussians are ignored during rendering. eps_2d (float): A value used to pad Gaussians when projecting them onto the image plane, to avoid very projected Gaussians which create artifacts and numerical issues. antialias (bool): If ``True``, applies opacity correction to the projected Gaussians when using ``eps_2d > 0.0``. Returns: features (torch.Tensor | JaggedTensor): A tensor of shape ``(C, P, D)`` or a :class:`~fvdb.JaggedTensor` where ``C`` is the number of camera views, ``P`` is the number of pixel coordinates rendered per camera, and ``D`` is the number of channels. alpha_images (torch.Tensor | JaggedTensor): A tensor of shape ``(C, P, 1)`` or a :class:`~fvdb.JaggedTensor` where ``C`` is the number of camera views, and ``P`` is the number of pixel coordinates rendered per camera. Each element represents the alpha value (opacity) at that pixel such that ``0 <= alpha < 1``, and 0 means the pixel is fully transparent, and 1 means the pixel is fully opaque. """ if isinstance(pixels_to_render, torch.Tensor): pixels_to_render_impl = JaggedTensorCpp(pixels_to_render) elif isinstance(pixels_to_render, JaggedTensor): pixels_to_render_impl: JaggedTensorCpp = pixels_to_render._impl else: raise TypeError("pixels_to_render must be either a torch.Tensor or a fvdb.JaggedTensor") ret_features, ret_alphas = self._impl.sparse_render_images( pixels_to_render=pixels_to_render_impl, world_to_camera_matrices=world_to_camera_matrices, projection_matrices=projection_matrices, image_width=image_width, image_height=image_height, near=near, far=far, projection_type=self._proj_type_to_cpp(projection_type), sh_degree_to_use=sh_degree_to_use, tile_size=tile_size, min_radius_2d=min_radius_2d, eps_2d=eps_2d, antialias=antialias, ) if isinstance(pixels_to_render, torch.Tensor): return ret_features.jdata, ret_alphas.jdata else: return JaggedTensor(impl=ret_features), JaggedTensor(impl=ret_alphas)
[docs] def sparse_render_images_and_depths( self, pixels_to_render: JaggedTensorOrTensorT, world_to_camera_matrices: torch.Tensor, projection_matrices: torch.Tensor, image_width: int, image_height: int, near: float, far: float, projection_type=ProjectionType.PERSPECTIVE, sh_degree_to_use: int = -1, tile_size: int = 16, min_radius_2d: float = 0.0, eps_2d: float = 0.3, antialias: bool = False, ) -> tuple[JaggedTensorOrTensorT, JaggedTensorOrTensorT]: """ Render ``C`` collections of sparse multi-channel features (see :attr:`num_channels`) with depth as the last channel from this :class:`GaussianSplat3d` from ``C`` camera views at the specified pixel locations. Example: .. code-block:: python # Assume gaussian_splat_3d is an instance of GaussianSplat3d # pixels_to_render is a tensor of shape [C, P, 2] containing pixel coordinates to render # Render sparse images with depth from C camera views at specified pixel locations # features is a tensor of shape [C, P, D + 1] where D is the number of channels # alphas is a tensor of shape [C, P, 1] features, alphas = gaussian_splat_3d.sparse_render_images_and_depths( pixels_to_render, # tensor of shape [C, P, 2] world_to_camera_matrices, # tensor of shape [C, 4, 4] projection_matrices, # tensor of shape [C, 3, 3] image_width, # width of the images image_height, # height of the images near, # near clipping plane far) # far clipping plane Args: pixels_to_render (torch.Tensor | JaggedTensor): A tensor of shape ``(C, P, 2)`` or a :class:`~fvdb.JaggedTensor` where ``C`` is the number of camera views, and ``P`` is the number of pixel coordinates to render per camera. Each pixel coordinate is represented as (y, x) (row, col). world_to_camera_matrices (torch.Tensor): Tensor of shape ``(C, 4, 4)`` representing the world-to-camera transformation matrices for C cameras. Each matrix transforms points from world coordinates to camera coordinates. projection_matrices (torch.Tensor): Tensor of shape ``(C, 3, 3)`` representing the projection matrices for ``C`` cameras. Each matrix projects points in camera space into homogeneous pixel coordinates. image_width (int): The width of the images to be rendered. Note these are the same for all images being rendered. image_height (int): The height of the images to be rendered. Note these are the same for all images being rendered. near (float): The near clipping plane distance for the projection. far (float): The far clipping plane distance for the projection. projection_type (ProjectionType): The type of projection to use. Default is :attr:`fvdb.ProjectionType.PERSPECTIVE`. sh_degree_to_use (int): The degree of spherical harmonics to use for rendering. -1 means use all available SH bases. 0 means use only the first SH base (constant color). Note that you can't use more SH bases than available in the GaussianSplat3d instance. Default is -1. tile_size (int): The size of the tiles to use for rendering. Default is 16. You shouldn't set this parameter unless you really know what you are doing. min_radius_2d (float): The minimum radius (in pixels) below which Gaussians are ignored during rendering. eps_2d (float): A value used to pad Gaussians when projecting them onto the image plane, to avoid very projected Gaussians which create artifacts and numerical issues. antialias (bool): If ``True``, applies opacity correction to the projected Gaussians when using ``eps_2d > 0.0``. Returns: features_with_depths (torch.Tensor | JaggedTensor): A tensor of shape ``(C, P, D + 1)`` or a :class:`~fvdb.JaggedTensor` where ``C`` is the number of camera views, ``P`` is the number of pixel coordinates rendered per camera, and ``D`` is the number of channels. The last channel represents the depth value at that pixel. alpha_images (torch.Tensor | JaggedTensor): A tensor of shape ``(C, P, 1)`` or a :class:`~fvdb.JaggedTensor` where ``C`` is the number of camera views, and ``P`` is the number of pixel coordinates rendered per camera. Each element represents the alpha value (opacity) at that pixel such that ``0 <= alpha < 1``, and 0 means the pixel is fully transparent, and 1 means the pixel is fully opaque. """ if isinstance(pixels_to_render, torch.Tensor): pixels_to_render_impl = JaggedTensorCpp(pixels_to_render) elif isinstance(pixels_to_render, JaggedTensor): pixels_to_render_impl: JaggedTensorCpp = pixels_to_render._impl else: raise TypeError("pixels_to_render must be either a torch.Tensor or a fvdb.JaggedTensor") ret_features, ret_alphas = self._impl.sparse_render_images_and_depths( pixels_to_render=pixels_to_render_impl, world_to_camera_matrices=world_to_camera_matrices, projection_matrices=projection_matrices, image_width=image_width, image_height=image_height, near=near, far=far, projection_type=self._proj_type_to_cpp(projection_type), sh_degree_to_use=sh_degree_to_use, tile_size=tile_size, min_radius_2d=min_radius_2d, eps_2d=eps_2d, antialias=antialias, ) if isinstance(pixels_to_render, torch.Tensor): return ret_features.jdata, ret_alphas.jdata else: return JaggedTensor(impl=ret_features), JaggedTensor(impl=ret_alphas)
[docs] def render_images_and_depths( self, world_to_camera_matrices: torch.Tensor, projection_matrices: torch.Tensor, image_width: int, image_height: int, near: float, far: float, projection_type=ProjectionType.PERSPECTIVE, sh_degree_to_use: int = -1, tile_size: int = 16, min_radius_2d: float = 0.0, eps_2d: float = 0.3, antialias: bool = False, backgrounds: torch.Tensor | None = None, ) -> tuple[torch.Tensor, torch.Tensor]: """ Render ``C`` multi-channel images (see :attr:`num_channels`) with depth as the last channel from this :class:`GaussianSplat3d` from ``C`` camera views. .. note:: All images being rendered must have the same width and height. Example: .. code-block:: python # Assume gaussian_splat_3d is an instance of GaussianSplat3d # Render images with depth maps from C camera views. # images is a tensor of shape [C, H, W, D + 1] where D is the number of channels # alpha_images is a tensor of shape [C, H, W, 1] images, alpha_images = gaussian_splat_3d.render_images_and_depths( world_to_camera_matrices, # tensor of shape [C, 4, 4] projection_matrices, # tensor of shape [C, 3, 3] image_width, # width of the images image_height, # height of the images near, # near clipping plane far) # far clipping plane images = images[..., :-1] # Extract image channels depths = images[..., -1:] / alpha_images # Extract depth channel by dividing by alpha Args: world_to_camera_matrices (torch.Tensor): Tensor of shape ``(C, 4, 4)`` representing the world-to-camera transformation matrices for C cameras. Each matrix transforms points from world coordinates to camera coordinates. projection_matrices (torch.Tensor): Tensor of shape ``(C, 3, 3)`` representing the projection matrices for ``C`` cameras. Each matrix projects points in camera space into homogeneous pixel coordinates. image_width (int): The width of the images to be rendered. Note these are the same for all images being rendered. image_height (int): The height of the images to be rendered. Note these are the same for all images being rendered. near (float): The near clipping plane distance for the projection. far (float): The far clipping plane distance for the projection. projection_type (ProjectionType): The type of projection to use. Default is :attr:`fvdb.ProjectionType.PERSPECTIVE`. sh_degree_to_use (int): The degree of spherical harmonics to use for rendering. -1 means use all available SH bases. 0 means use only the first SH base (constant color). Note that you can't use more SH bases than available in the GaussianSplat3d instance. Default is -1. tile_size (int): The size of the tiles to use for rendering. Default is 16. You shouldn't set this parameter unless you really know what you are doing. min_radius_2d (float): The minimum radius (in pixels) below which Gaussians are ignored during rendering. eps_2d (float): A value used to pad Gaussians when projecting them onto the image plane, to avoid very projected Gaussians which create artifacts and numerical issues. antialias (bool): If ``True``, applies opacity correction to the projected Gaussians when using ``eps_2d > 0.0``. Returns: images (torch.Tensor): A tensor of shape ``(C, H, W, D + 1)`` where ``C`` is the number of camera views, ``H`` is the height of the images, ``W`` is the width of the images, and ``D`` is the number of channels. alpha_images (torch.Tensor): A tensor of shape ``(C, H, W, 1)`` where ``C`` is the number of camera views, ``H`` is the height of the images, and ``W`` is the width of the images. Each element represents the alpha value (opacity) at a pixel such that ``0 <= alpha < 1``, and 0 means the pixel is fully transparent, and 1 means the pixel is fully opaque. """ return self._impl.render_images_and_depths( world_to_camera_matrices=world_to_camera_matrices, projection_matrices=projection_matrices, image_width=image_width, image_height=image_height, near=near, far=far, projection_type=self._proj_type_to_cpp(projection_type), sh_degree_to_use=sh_degree_to_use, tile_size=tile_size, min_radius_2d=min_radius_2d, eps_2d=eps_2d, antialias=antialias, backgrounds=backgrounds, )
[docs] def render_num_contributing_gaussians( self, world_to_camera_matrices: torch.Tensor, projection_matrices: torch.Tensor, image_width: int, image_height: int, near: float, far: float, projection_type=ProjectionType.PERSPECTIVE, tile_size: int = 16, min_radius_2d: float = 0.0, eps_2d: float = 0.3, antialias: bool = False, ) -> tuple[torch.Tensor, torch.Tensor]: """ Renders ``C`` images where each pixel contains the number of contributing Gaussians for that pixel from ``C`` camera views. .. note:: All images being rendered must have the same width and height. Example: .. code-block:: python # Assume gaussian_splat_3d is an instance of GaussianSplat3d # Render images from C camera views. # images is a tensor of shape [C, H, W, D] where D is the number of channels # alpha_images is a tensor of shape [C, H, W, 1] num_gaussians, alpha_images = gaussian_splat_3d.render_num_contributing_gaussians( world_to_camera_matrices, # tensor of shape [C, 4, 4] projection_matrices, # tensor of shape [C, 3, 3] image_width, # width of the images image_height, # height of the images near, # near clipping plane far) # far clipping plane num_gaussians_cij = num_gaussians[c, i, j, 0] # Number of contributing Gaussians at pixel (i, j) in camera c Args: world_to_camera_matrices (torch.Tensor): Tensor of shape ``(C, 4, 4)`` representing the world-to-camera transformation matrices for C cameras. Each matrix transforms points from world coordinates to camera coordinates. projection_matrices (torch.Tensor): Tensor of shape ``(C, 3, 3)`` representing the projection matrices for ``C`` cameras. Each matrix projects points in camera space into homogeneous pixel coordinates. image_width (int): The width of the images to be rendered. Note these are the same for all images being rendered. image_height (int): The height of the images to be rendered. Note these are the same for all images being rendered. near (float): The near clipping plane distance for the projection. far (float): The far clipping plane distance for the projection. projection_type (ProjectionType): The type of projection to use. Default is :attr:`fvdb.ProjectionType.PERSPECTIVE`. tile_size (int): The size of the tiles to use for rendering. Default is 16. You shouldn't set this parameter unless you really know what you are doing. min_radius_2d (float): The minimum radius (in pixels) below which Gaussians are ignored during rendering. eps_2d (float): A value used to pad Gaussians when projecting them onto the image plane, to avoid very projected Gaussians which create artifacts and numerical issues. antialias (bool): If ``True``, applies opacity correction to the projected Gaussians when using ``eps_2d > 0.0``. Returns: images (torch.Tensor): A tensor of shape ``(C, H, W, 1)`` where ``C`` is the number of camera views, ``H`` is the height of the images, ``W`` is the width of the images. Each element represents the number of contributing Gaussians at that pixel. alpha_images (torch.Tensor): A tensor of shape ``(C, H, W, 1)`` where ``C`` is the number of camera views, ``H`` is the height of the images, and ``W`` is the width of the images. Each element represents the alpha value (opacity) at a pixel such that ``0 <= alpha < 1``, and 0 means the pixel is fully transparent, and 1 means the pixel is fully opaque. """ return self._impl.render_num_contributing_gaussians( world_to_camera_matrices=world_to_camera_matrices, projection_matrices=projection_matrices, image_width=image_width, image_height=image_height, near=near, far=far, projection_type=self._proj_type_to_cpp(projection_type), tile_size=tile_size, min_radius_2d=min_radius_2d, eps_2d=eps_2d, antialias=antialias, )
@overload def sparse_render_num_contributing_gaussians( self, pixels_to_render: torch.Tensor, world_to_camera_matrices: torch.Tensor, projection_matrices: torch.Tensor, image_width: int, image_height: int, near: float, far: float, projection_type=ProjectionType.PERSPECTIVE, tile_size: int = 16, min_radius_2d: float = 0.0, eps_2d: float = 0.3, antialias: bool = False, ) -> tuple[torch.Tensor, torch.Tensor]: ... @overload def sparse_render_num_contributing_gaussians( self, pixels_to_render: JaggedTensor, world_to_camera_matrices: torch.Tensor, projection_matrices: torch.Tensor, image_width: int, image_height: int, near: float, far: float, projection_type=ProjectionType.PERSPECTIVE, tile_size: int = 16, min_radius_2d: float = 0.0, eps_2d: float = 0.3, antialias: bool = False, ) -> tuple[JaggedTensor, JaggedTensor]: ...
[docs] def sparse_render_num_contributing_gaussians( self, pixels_to_render: JaggedTensor | torch.Tensor, world_to_camera_matrices: torch.Tensor, projection_matrices: torch.Tensor, image_width: int, image_height: int, near: float, far: float, projection_type=ProjectionType.PERSPECTIVE, tile_size: int = 16, min_radius_2d: float = 0.0, eps_2d: float = 0.3, antialias: bool = False, ) -> tuple[JaggedTensor | torch.Tensor, JaggedTensor | torch.Tensor]: """ Renders the number of Gaussians which contribute to each pixel specified in the input. .. seealso:: :meth:`render_num_contributing_gaussians` for rendering dense images of contributing Gaussians. Args: pixels_to_render (torch.Tensor | JaggedTensor): A :class:`fvdb.JaggedTensor` of shape ``(C, R_c, 2)`` representing the pixels to render for each camera, where ``C`` is the number of camera views and ``R_c`` is the number of pixels to render per camera. Each value is an (x, y) pixel coordinate. world_to_camera_matrices (torch.Tensor): Tensor of shape ``(C, 4, 4)`` representing the world-to-camera transformation matrices for C cameras. Each matrix transforms points from world coordinates to camera coordinates. projection_matrices (torch.Tensor): Tensor of shape ``(C, 3, 3)`` representing the projection matrices for ``C`` cameras. Each matrix projects points in camera space into homogeneous pixel coordinates. image_width (int): The width of the images to be rendered. Note these are the same for all images being rendered. image_height (int): The height of the images to be rendered. Note these are the same for all images being rendered. near (float): The near clipping plane distance for the projection. far (float): The far clipping plane distance for the projection. projection_type (ProjectionType): The type of projection to use. Default is :attr:`fvdb.ProjectionType.PERSPECTIVE`. tile_size (int): The size of the tiles to use for rendering. Default is 16. You shouldn't set this parameter unless you really know what you are doing. min_radius_2d (float): The minimum radius (in pixels) below which Gaussians are ignored during rendering. eps_2d (float): A value used to pad Gaussians when projecting them onto the image plane, to avoid very projected Gaussians which create artifacts and numerical issues. antialias (bool): If ``True``, applies opacity correction to the projected Gaussians when using ``eps_2d > 0.0``. Returns: num_contributing_gaussians (torch.Tensor | JaggedTensor): A tensor of shape ``(C, R)`` (if this method was called with ``pixels_to_render`` as a :class:`torch.Tensor`) or a :class:`fvdb.JaggedTensor` of shape ``(C, R_c)`` (if this method was called with ``pixels_to_render`` as a :class:`fvdb.JaggedTensor`) where ``C`` is the number of cameras, and ``R``/``R_c`` is the number of pixels to render per camera. Each element represents the number of contributing Gaussians at that pixel. alphas (torch.Tensor | JaggedTensor): A tensor of shape ``(C, R)`` (if this method was called with ``pixels_to_render`` as a :class:`torch.Tensor`) or a :class:`fvdb.JaggedTensor` of shape ``(C, R_c)`` (if this method was called with ``pixels_to_render`` as a :class:`fvdb.JaggedTensor`) where ``C`` is the number of cameras, and ``R``/``R_c`` is the number of pixels to render per camera. Each element represents the alpha value (opacity) at that pixel such that ``0 <= alpha < 1``, and 0 means the pixel is fully transparent, and 1 means the pixel is fully opaque. """ if isinstance(pixels_to_render, torch.Tensor): C, R, _ = pixels_to_render.shape tensors = [pixels_to_render[i] for i in range(C)] pixels_to_render_jagged = JaggedTensor(tensors) result_num_contributing_gaussians, result_alphas = self._impl.sparse_render_num_contributing_gaussians( pixels_to_render=pixels_to_render_jagged._impl, world_to_camera_matrices=world_to_camera_matrices, projection_matrices=projection_matrices, image_width=image_width, image_height=image_height, near=near, far=far, projection_type=self._proj_type_to_cpp(projection_type), tile_size=tile_size, min_radius_2d=min_radius_2d, eps_2d=eps_2d, antialias=antialias, ) num_contributing_gaussians_list = result_num_contributing_gaussians.unbind() alphas_list = result_alphas.unbind() dense_num_contributing_gaussians = torch.stack(num_contributing_gaussians_list, dim=0) # type: ignore # Shape: (C, R) dense_alphas = torch.stack(alphas_list, dim=0) # type: ignore # Shape: (C, R) return dense_num_contributing_gaussians, dense_alphas else: # Already a JaggedTensor, call C++ implementation directly result_num_contributing_gaussians_impl, result_alphas_impl = ( self._impl.sparse_render_num_contributing_gaussians( pixels_to_render=pixels_to_render._impl, world_to_camera_matrices=world_to_camera_matrices, projection_matrices=projection_matrices, image_width=image_width, image_height=image_height, near=near, far=far, projection_type=self._proj_type_to_cpp(projection_type), tile_size=tile_size, min_radius_2d=min_radius_2d, eps_2d=eps_2d, antialias=antialias, ) ) return JaggedTensor(impl=result_num_contributing_gaussians_impl), JaggedTensor(impl=result_alphas_impl)
[docs] def render_contributing_gaussian_ids( self, world_to_camera_matrices: torch.Tensor, projection_matrices: torch.Tensor, image_width: int, image_height: int, near: float, far: float, projection_type=ProjectionType.PERSPECTIVE, tile_size: int = 16, min_radius_2d: float = 0.0, eps_2d: float = 0.3, antialias: bool = False, top_k_contributors: int = 0, ) -> tuple[JaggedTensor, JaggedTensor]: """ Render the IDs of the Gaussians that are the contributors to the rendered images' pixels and the value of their weighted contributions to the rendered pixels. Args: world_to_camera_matrices (torch.Tensor): Tensor of shape ``(C, 4, 4)`` representing the world-to-camera transformation matrices for C cameras. Each matrix transforms points from world coordinates to camera coordinates. projection_matrices (torch.Tensor): Tensor of shape ``(C, 3, 3)`` representing the projection matrices for ``C`` cameras. Each matrix projects points in camera space into homogeneous pixel coordinates. image_width (int): The width of the images to be rendered. Note these are the same for all images being rendered. image_height (int): The height of the images to be rendered. Note these are the same for all images being rendered. near (float): The near clipping plane distance for the projection. far (float): The far clipping plane distance for the projection. projection_type (ProjectionType): The type of projection to use. Default is :attr:`fvdb.ProjectionType.PERSPECTIVE`. tile_size (int): The size of the tiles to use for rendering. Default is 16. You shouldn't set this parameter unless you really know what you are doing. min_radius_2d (float): The minimum radius (in pixels) below which Gaussians are ignored during rendering. eps_2d (float): A value used to pad Gaussians when projecting them onto the image plane, to avoid very projected Gaussians which create artifacts and numerical issues. antialias (bool): If ``True``, applies opacity correction to the projected Gaussians when using ``eps_2d > 0.0``. top_k_contributors (int): If greater than 0, returns only the top ``k`` most opaque Gaussians contributing to each pixel. If 0 (default), returns all contributing Gaussians per pixel. Returns: ids (fvdb.JaggedTensor): A ``[[C1P1 + C1P2 + ... C1P(imageWidth * imageHeight), 1], ... [CNP1 + CNP2 + ... CNP(imageWidth * imageHeight), 1]]`` jagged tensor containing the IDs of the contributing Gaussians of each rendered pixel for each camera. weights (fvdb.JaggedTensor): A ``[[C1P1 + C1P2 + ... C1P(imageWidth * imageHeight), 1], ... [CNP1 + CNP2 + ... CNP(imageWidth * imageHeight), 1]]`` jagged tensor containing the weights of the contributing Gaussians of each rendered pixel for each camera. The weights are in row-major order and sum to 1 for each pixel if that pixel is opaque (alpha=1). """ ids, weights = self._impl.render_contributing_gaussian_ids( world_to_camera_matrices=world_to_camera_matrices, projection_matrices=projection_matrices, image_width=image_width, image_height=image_height, near=near, far=far, projection_type=self._proj_type_to_cpp(projection_type), tile_size=tile_size, min_radius_2d=min_radius_2d, eps_2d=eps_2d, antialias=antialias, top_k_contributors=top_k_contributors, ) return JaggedTensor(impl=ids), JaggedTensor(impl=weights)
@overload def sparse_render_contributing_gaussian_ids( self, pixels_to_render: torch.Tensor, world_to_camera_matrices: torch.Tensor, projection_matrices: torch.Tensor, image_width: int, image_height: int, near: float, far: float, projection_type=ProjectionType.PERSPECTIVE, tile_size: int = 16, min_radius_2d: float = 0.0, eps_2d: float = 0.3, antialias: bool = False, top_k_contributors: int = 0, ) -> tuple[JaggedTensor, JaggedTensor]: ... @overload def sparse_render_contributing_gaussian_ids( self, pixels_to_render: JaggedTensor, world_to_camera_matrices: torch.Tensor, projection_matrices: torch.Tensor, image_width: int, image_height: int, near: float, far: float, projection_type=ProjectionType.PERSPECTIVE, tile_size: int = 16, min_radius_2d: float = 0.0, eps_2d: float = 0.3, antialias: bool = False, top_k_contributors: int = 0, ) -> tuple[JaggedTensor, JaggedTensor]: ...
[docs] def sparse_render_contributing_gaussian_ids( self, pixels_to_render: JaggedTensor | torch.Tensor, world_to_camera_matrices: torch.Tensor, projection_matrices: torch.Tensor, image_width: int, image_height: int, near: float, far: float, projection_type=ProjectionType.PERSPECTIVE, tile_size: int = 16, min_radius_2d: float = 0.0, eps_2d: float = 0.3, antialias: bool = False, top_k_contributors: int = 0, ) -> tuple[JaggedTensor, JaggedTensor]: """ Render the IDs of the Gaussians that are the contributors to the rendered images' pixels and the value of their weighted contributions to the rendered pixels. This function will render only a sparse subset of the pixels in the overall image, as specified by the ``pixels_to_render`` parameter. Args: pixels_to_render (torch.Tensor | JaggedTensor): A :class:`torch.Tensor` of shape ``(C, R, 2)`` or a :class:`fvdb.JaggedTensor` of shape ``(C, R_c, 2)`` representing the pixels to render for each camera, where ``C`` is the number of camera views and ``R``/``R_c`` is the number of pixels to render per camera. Each value is an (x, y) pixel coordinate. world_to_camera_matrices (torch.Tensor): Tensor of shape ``(C, 4, 4)`` representing the world-to-camera transformation matrices for ``C`` cameras. Each matrix transforms points from world coordinates to camera coordinates. projection_matrices (torch.Tensor): Tensor of shape ``(C, 3, 3)`` representing the projection matrices for ``C`` cameras. Each matrix projects points in camera space into homogeneous pixel coordinates. image_width (int): The width of the images to be rendered. Note these are the same for all images being rendered. image_height (int): The height of the images to be rendered. Note these are the same for all images being rendered. near (float): The near clipping plane distance for the projection. far (float): The far clipping plane distance for the projection. projection_type (ProjectionType): The type of projection to use. Default is :attr:`fvdb.ProjectionType.PERSPECTIVE`. tile_size (int): The size of the tiles to use for rendering. Default is 16. You shouldn't set this parameter unless you really know what you are doing. min_radius_2d (float): The minimum radius (in pixels) below which Gaussians are ignored during rendering. eps_2d (float): A value used to pad Gaussians when projecting them onto the image plane, to avoid very projected Gaussians which create artifacts and numerical issues. antialias (bool): If ``True``, applies opacity correction to the projected Gaussians when using ``eps_2d > 0.0``. top_k_contributors (int): If greater than 0, returns only the top ``k`` most opaque Gaussians contributing to each pixel, If 0 (default), returns all contributing Gaussians per pixel. Returns: ids (fvdb.JaggedTensor): A ``[[C1P1 + C1P2 + ... C1PN1, 1], ... [CNP1 + CNP2 + ... CNPNN, 1]]`` jagged tensor containing the IDs of the contributing Gaussians of each rendered pixel for each camera. The IDs are in row-major order. weights (fvdb.JaggedTensor): A ``[[C1P1 + C1P2 + ... C1PN1, 1], ... [CNP1 + CNP2 + ... CNPNN, 1]]`` jagged tensor containing the weights of the contributing Gaussians of each rendered pixel for each camera. The weights are in row-major order and sum to 1 for each pixel if that pixel is opaque (alpha=1). """ if isinstance(pixels_to_render, torch.Tensor): C, R, _ = pixels_to_render.shape tensors = [pixels_to_render[i] for i in range(C)] pixels_to_render_jagged = JaggedTensor(tensors) result_ids, result_weights = self._impl.sparse_render_contributing_gaussian_ids( pixels_to_render=pixels_to_render_jagged._impl, world_to_camera_matrices=world_to_camera_matrices, projection_matrices=projection_matrices, image_width=image_width, image_height=image_height, near=near, far=far, projection_type=self._proj_type_to_cpp(projection_type), tile_size=tile_size, min_radius_2d=min_radius_2d, eps_2d=eps_2d, antialias=antialias, top_k_contributors=top_k_contributors, ) return JaggedTensor(impl=result_ids), JaggedTensor(impl=result_weights) else: # Already a JaggedTensor, call C++ implementation directly result_ids_impl, result_weights_impl = self._impl.sparse_render_contributing_gaussian_ids( pixels_to_render=pixels_to_render._impl, world_to_camera_matrices=world_to_camera_matrices, projection_matrices=projection_matrices, image_width=image_width, image_height=image_height, near=near, far=far, projection_type=self._proj_type_to_cpp(projection_type), tile_size=tile_size, min_radius_2d=min_radius_2d, eps_2d=eps_2d, antialias=antialias, top_k_contributors=top_k_contributors, ) return JaggedTensor(impl=result_ids_impl), JaggedTensor(impl=result_weights_impl)
[docs] def relocate_gaussians( self, log_scales: torch.Tensor, logit_opacities: torch.Tensor, ratios: torch.Tensor, binomial_coeffs: torch.Tensor, n_max: int, min_opacity: float, ) -> tuple[torch.Tensor, torch.Tensor]: """ Relocate Gaussians by adjusting opacity and scale based on replication ratio. Args: log_scales (torch.Tensor): Log scales of the Gaussians to relocate [N, 3]. logit_opacities (torch.Tensor): Logit opacities of the Gaussians to relocate [N]. ratios (torch.Tensor): Replication ratios per Gaussian [N]. binomial_coeffs (torch.Tensor): Binomial coefficients table [nMax, nMax]. n_max (int): Maximum replication ratio (size of binomial table). Returns: tuple[torch.Tensor, torch.Tensor]: Tuple of (logit_opacities_new [N], log_scales_new [N, 3]). """ return self._impl.relocate_gaussians( log_scales, logit_opacities, ratios, binomial_coeffs, n_max, min_opacity, )
[docs] def add_noise_to_means(self, noise_scale: float, t: float = 0.005, k: float = 100.0) -> None: """ Add noise to the Gaussian positions (means), scaled by ``noise_scale``. Args: noise_scale (float): Noise scale factor applied to scale-dependent noise. t (float): Parameter t for noise scaling. Defaults to 0.005. k (float): Parameter k for noise scaling. Defaults to 100.0. """ self._impl.add_noise_to_means(noise_scale, t, k)
[docs] def reset_accumulated_gradient_state(self) -> None: """ Reset the accumulated projected gradients of the mans if :attr:`accumulate_mean_2d_gradients` is ``True``, and the accumulated max 2D radii if :attr:`accumulate_max_2d_radii` is ``True``. The values of :attr:`accumulated_projected_mean_2d_gradients`, :attr:`accumulated_max_2d_radii`, and :attr:`accumulated_gradient_step_counts` will be zeroed out after this call. .. seealso:: :meth:`accumulate_mean_2d_gradients` :meth:`accumulate_max_2d_radii` which control if we accumulate these values during rendering and backward passes. .. seealso:: :attr:`accumulated_mean_2d_gradient_norms` :attr:`accumulated_max_2d_radii` :attr:`accumulated_gradient_step_counts` for the actual accumulated state being reset. """ self._impl.reset_accumulated_gradient_state()
[docs] def save_ply( self, filename: pathlib.Path | str, metadata: Mapping[str, str | int | float | torch.Tensor] | None = None ) -> None: """ Save this :class:`GaussianSplat3d` to a PLY file. and include any metadata provided. Args: filename (pathlib.Path | str): The path to the PLY file to save. metadata (dict[str, str | int | float | torch.Tensor] | None): An optional dictionary of metadata where the keys are strings and the values are either strings, ints, floats, or tensors. Defaults to ``None``, """ if isinstance(filename, pathlib.Path): filename = str(filename) self._impl.save_ply(filename, metadata) # type: ignore -- mapping to dict is fine here
@overload def to(self, dtype: torch.dtype | None = None) -> "GaussianSplat3d": ... @overload def to( self, device: DeviceIdentifier | None = None, dtype: torch.dtype | None = None, ) -> "GaussianSplat3d": ... @overload def to( self, other: torch.Tensor, ) -> "GaussianSplat3d": ... @overload def to( self, other: "GaussianSplat3d", ) -> "GaussianSplat3d": ... @overload def to( self, other: Grid, ) -> "GaussianSplat3d": ... @overload def to( self, other: GridBatch, ) -> "GaussianSplat3d": ... @overload def to( self, other: JaggedTensor, ) -> "GaussianSplat3d": ...
[docs] def to( self, *args, **kwargs, ) -> "GaussianSplat3d": """ Move the :class:`GaussianSplat3d` instance to a different device or change its data type or both. Args: other (DeviceIdentifier | torch.Tensor | GaussianSplat3d | Grid | GridBatch | JaggedTensor): The target :class:`torch.Device`, :class:`torch.Tensor`, :class:`~fvdb.Grid`, :class:`~fvdb.GridBatch`, :class:`~fvdb.JaggedTensor`, or :class:`~fvdb.GaussianSplat3d` instance to which the :class:`GaussianSplat3d` instance should be moved. device (DeviceIdentifier, optional): The target ``device`` to move the :class:`GaussianSplat3d` instance to. dtype (torch.dtype, optional): The target data type for the :class:`GaussianSplat3d` instance. Returns: gaussian_splat_3d (GaussianSplat3d): A new instance of :class:`GaussianSplat3d` with the specified device and/or data type. """ # All values passed by keyword arguments if len(args) == 0: if len(kwargs) == 1: # .to(device=...) or .to(other=...) if "device" in kwargs: device = kwargs["device"] dtype = kwargs.get("dtype", self.dtype) elif "other" in kwargs: other = kwargs["other"] if isinstance(other, (torch.Tensor, JaggedTensor, GaussianSplat3d)): device = other.device dtype = other.dtype elif isinstance(other, (Grid, GridBatch)): device = other.device dtype = self.dtype else: raise TypeError( f"Invalid keyword arguments for to(): {kwargs}. Expected 'device' or 'other' and optionally 'dtype'." ) elif len(kwargs) == 2: # .to(device=..., dtype=...) or .to(dtype=..., device=...) if "device" in kwargs and "dtype" in kwargs: device = kwargs["device"] dtype = kwargs["dtype"] else: raise TypeError( f"Invalid keyword arguments for to(): {kwargs}. Expected 'device' or 'other' and optionally 'dtype'." ) else: raise TypeError( f"Invalid keyword arguments for to(): {kwargs}. Expected 'device' or 'other' and optionally 'dtype'." ) elif len(args) == 1 and isinstance(args[0], (torch.Tensor, GaussianSplat3d, JaggedTensor)): # .to(other) device = args[0].device dtype = args[0].dtype elif len(args) == 1 and isinstance(args[0], (Grid, GridBatch)): # .to(other) device = args[0].device dtype = self.dtype elif len(args) == 1: # .to(device) device = args[0] dtype = kwargs.get("dtype", self.dtype) elif len(args) == 2: # .to(device, dtype) device = args[0] dtype = args[1] else: raise TypeError( f"Invalid arguments for to(): {args}. Expected a DeviceIdentifier, torch.Tensor, GaussianSplat3d, Grid, GridBatch, or JaggedTensor." ) device = resolve_device(device, inherit_from=self) dtype = self.dtype if dtype is None else cast_check(dtype, torch.dtype, "dtype") return GaussianSplat3d( impl=self._impl.to( device=device, dtype=dtype, ), _private=GaussianSplat3d.__PRIVATE__, )
[docs] def set_state( self, means: torch.Tensor, quats: torch.Tensor, log_scales: torch.Tensor, logit_opacities: torch.Tensor, sh0: torch.Tensor, shN: torch.Tensor, ) -> None: """ Set the underlying tensors managed by this :class:`GaussianSplat3d` instance. Note: If :attr:`accumulate_mean_2d_gradients` and/or :attr:`accumulate_max_2d_radii` are ``True``, this method will reset the gradient state (see :meth:`reset_accumulated_gradient_state`). Args: means (torch.Tensor): Tensor of shape ``(N, 3)`` representing the means of the Gaussians. ``N`` is the number of Gaussians (see :attr:`num_gaussians`). quats (torch.Tensor): Tensor of shape ``(N, 4)`` representing the quaternions of the Gaussians. ``N`` is the number of Gaussians (see :attr:`num_gaussians`). log_scales (torch.Tensor): Tensor of shape ``(N, 3)`` representing the log scales of the Gaussians. ``N`` is the number of Gaussians (see :attr:`num_gaussians`). logit_opacities (torch.Tensor): Tensor of shape ``(N,)`` representing the logit opacities of the Gaussians. ``N`` is the number of Gaussians (see :attr:`num_gaussians`). sh0 (torch.Tensor): Tensor of shape ``(N, 1, D)`` representing the diffuse SH coefficients where ``N`` is the number of Gaussians (see :attr:`num_gaussians`), and ``D`` is the number of channels (see :attr:`num_channels`). shN (torch.Tensor): Tensor of shape ``(N, K-1, D)`` representing the directionally varying SH coefficients where ``N`` is the number of Gaussians (see :attr:`num_gaussians`), ``D`` is the number of channels (see :attr:`num_channels`), and ``K`` is the number of spherical harmonic bases (see :attr:`num_sh_bases`). """ self._impl.set_state( means=means, quats=quats, log_scales=log_scales, logit_opacities=logit_opacities, sh0=sh0, shN=shN, )
[docs] def state_dict(self) -> dict[str, torch.Tensor]: """ Return a dictionary containing the state of the GaussianSplat3d instance. This is useful for serializing the state of the object for saving or transferring. A state dictionary always contains the following keys where ``N`` denotes the number of Gaussians (see :attr:`num_gaussians`): - ``'means'``: Tensor of shape ``(N, 3)`` representing the means of the Gaussians. - ``'quats'``: Tensor of shape ``(N, 4)`` representing the quaternions of the Gaussians. - ``'log_scales'``: Tensor of shape ``(N, 3)`` representing the log scales of the Gaussians. - ``'logit_opacities'``: Tensor of shape ``(N,)`` representing the logit opacities of the Gaussians. - ``'sh0'``: Tensor of shape ``(N, 1, D)`` representing the diffuse SH coefficients where ``D`` is the number of channels (see :attr:`num_channels`). - ``'shN'``: Tensor of shape ``(N, K-1, D)`` representing the directionally varying SH coefficients where ``D`` is the number of channels (see :attr:`num_channels`), and ``K`` is the number of spherical harmonic bases (see :attr:`num_sh_bases`). - ``'accumulate_max_2d_radii'``: bool Tensor with a single element indicating whether to track the maximum 2D radii for gradients. - ``'accumulate_mean_2d_gradients'``: bool Tensor with a single element indicating whether to track the average norm of the gradient of projected means for each Gaussian. It can also optionally contain the following keys if :attr:`accumulate_mean_2d_gradients` and/or :attr:`accumulate_max_2d_radii` are set to ``True``: - ``'accumulated_gradient_step_counts'``: Tensor of shape ``(N,)`` representing the accumulated gradient step counts for each Gaussian. - ``'accumulated_max_2d_radii'``: Tensor of shape ``(N,)`` representing the maximum 2D projected radius for each Gaussian across every iteration of optimization. - ``'accumulated_mean_2d_gradient_norms'``: Tensor of shape ``(N,)`` representing the average norm of the gradient of projected means for each Gaussian across every iteration of optimization. .. seealso:: :meth:`from_state_dict` for constructing a :class:`GaussianSplat3d` from a state dictionary. Returns: state_dict (dict[str, torch.Tensor]): A dictionary containing the state of the :class:`GaussianSplat3d` instance. """ return self._impl.state_dict()
@staticmethod def _proj_type_from_cpp(proj_type: GaussianSplat3dCpp.ProjectionType) -> ProjectionType: if proj_type == GaussianSplat3dCpp.ProjectionType.PERSPECTIVE: return ProjectionType.PERSPECTIVE elif proj_type == GaussianSplat3dCpp.ProjectionType.ORTHOGRAPHIC: return ProjectionType.ORTHOGRAPHIC else: raise ValueError(f"Invalid projection type: {proj_type}") @staticmethod def _proj_type_to_cpp(proj_type: ProjectionType) -> GaussianSplat3dCpp.ProjectionType: if proj_type == ProjectionType.PERSPECTIVE: return GaussianSplat3dCpp.ProjectionType.PERSPECTIVE elif proj_type == ProjectionType.ORTHOGRAPHIC: return GaussianSplat3dCpp.ProjectionType.ORTHOGRAPHIC else: raise ValueError(f"Invalid projection type: {proj_type}")