InstantMesh / src /models /renderer /synthesizer_mesh.py
tokenid
init
ad06aed
# ORIGINAL LICENSE
# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
#
# Modified by Jiale Xu
# The modifications are subject to the same license as the original.
import itertools
import torch
import torch.nn as nn
from .utils.renderer import generate_planes, project_onto_planes, sample_from_planes
class OSGDecoder(nn.Module):
"""
Triplane decoder that gives RGB and sigma values from sampled features.
Using ReLU here instead of Softplus in the original implementation.
Reference:
EG3D: https://github.com/NVlabs/eg3d/blob/main/eg3d/training/triplane.py#L112
"""
def __init__(self, n_features: int,
hidden_dim: int = 64, num_layers: int = 4, activation: nn.Module = nn.ReLU):
super().__init__()
self.net_sdf = nn.Sequential(
nn.Linear(3 * n_features, hidden_dim),
activation(),
*itertools.chain(*[[
nn.Linear(hidden_dim, hidden_dim),
activation(),
] for _ in range(num_layers - 2)]),
nn.Linear(hidden_dim, 1),
)
self.net_rgb = nn.Sequential(
nn.Linear(3 * n_features, hidden_dim),
activation(),
*itertools.chain(*[[
nn.Linear(hidden_dim, hidden_dim),
activation(),
] for _ in range(num_layers - 2)]),
nn.Linear(hidden_dim, 3),
)
self.net_deformation = nn.Sequential(
nn.Linear(3 * n_features, hidden_dim),
activation(),
*itertools.chain(*[[
nn.Linear(hidden_dim, hidden_dim),
activation(),
] for _ in range(num_layers - 2)]),
nn.Linear(hidden_dim, 3),
)
self.net_weight = nn.Sequential(
nn.Linear(8 * 3 * n_features, hidden_dim),
activation(),
*itertools.chain(*[[
nn.Linear(hidden_dim, hidden_dim),
activation(),
] for _ in range(num_layers - 2)]),
nn.Linear(hidden_dim, 21),
)
# init all bias to zero
for m in self.modules():
if isinstance(m, nn.Linear):
nn.init.zeros_(m.bias)
def get_geometry_prediction(self, sampled_features, flexicubes_indices):
_N, n_planes, _M, _C = sampled_features.shape
sampled_features = sampled_features.permute(0, 2, 1, 3).reshape(_N, _M, n_planes*_C)
sdf = self.net_sdf(sampled_features)
deformation = self.net_deformation(sampled_features)
grid_features = torch.index_select(input=sampled_features, index=flexicubes_indices.reshape(-1), dim=1)
grid_features = grid_features.reshape(
sampled_features.shape[0], flexicubes_indices.shape[0], flexicubes_indices.shape[1] * sampled_features.shape[-1])
weight = self.net_weight(grid_features) * 0.1
return sdf, deformation, weight
def get_texture_prediction(self, sampled_features):
_N, n_planes, _M, _C = sampled_features.shape
sampled_features = sampled_features.permute(0, 2, 1, 3).reshape(_N, _M, n_planes*_C)
rgb = self.net_rgb(sampled_features)
rgb = torch.sigmoid(rgb)*(1 + 2*0.001) - 0.001 # Uses sigmoid clamping from MipNeRF
return rgb
class TriplaneSynthesizer(nn.Module):
"""
Synthesizer that renders a triplane volume with planes and a camera.
Reference:
EG3D: https://github.com/NVlabs/eg3d/blob/main/eg3d/training/triplane.py#L19
"""
DEFAULT_RENDERING_KWARGS = {
'ray_start': 'auto',
'ray_end': 'auto',
'box_warp': 2.,
'white_back': True,
'disparity_space_sampling': False,
'clamp_mode': 'softplus',
'sampler_bbox_min': -1.,
'sampler_bbox_max': 1.,
}
def __init__(self, triplane_dim: int, samples_per_ray: int):
super().__init__()
# attributes
self.triplane_dim = triplane_dim
self.rendering_kwargs = {
**self.DEFAULT_RENDERING_KWARGS,
'depth_resolution': samples_per_ray // 2,
'depth_resolution_importance': samples_per_ray // 2,
}
# modules
self.plane_axes = generate_planes()
self.decoder = OSGDecoder(n_features=triplane_dim)
def get_geometry_prediction(self, planes, sample_coordinates, flexicubes_indices):
plane_axes = self.plane_axes.to(planes.device)
sampled_features = sample_from_planes(
plane_axes, planes, sample_coordinates, padding_mode='zeros', box_warp=self.rendering_kwargs['box_warp'])
sdf, deformation, weight = self.decoder.get_geometry_prediction(sampled_features, flexicubes_indices)
return sdf, deformation, weight
def get_texture_prediction(self, planes, sample_coordinates):
plane_axes = self.plane_axes.to(planes.device)
sampled_features = sample_from_planes(
plane_axes, planes, sample_coordinates, padding_mode='zeros', box_warp=self.rendering_kwargs['box_warp'])
rgb = self.decoder.get_texture_prediction(sampled_features)
return rgb