|
import tyro
|
|
from dataclasses import dataclass
|
|
from typing import Tuple, Literal, Dict, Optional
|
|
|
|
|
|
@dataclass
|
|
class Options:
|
|
seed: Optional[int] = None
|
|
is_crop: bool = True
|
|
is_fix_views: bool = False
|
|
specific_demo: Optional[str] = None
|
|
txt_or_image: Optional[bool] = False
|
|
infer_render_size: int = 256
|
|
mvdream_or_zero123: Optional[bool] = True
|
|
|
|
rar_data: bool = True
|
|
|
|
|
|
input_size: int = 512
|
|
|
|
down_channels: Tuple[int, ...] = (64, 128, 256, 512, 1024, 1024)
|
|
down_attention: Tuple[bool, ...] = (False, False, False, True, True, True)
|
|
mid_attention: bool = True
|
|
up_channels: Tuple[int, ...] = (1024, 1024, 512, 256)
|
|
up_attention: Tuple[bool, ...] = (True, True, True, False)
|
|
|
|
splat_size: int = 64
|
|
|
|
output_size: Optional[int] = 128
|
|
|
|
|
|
density_n_comp: int = 8
|
|
app_n_comp: int = 32
|
|
app_dim: int = 27
|
|
density_dim: int = 8
|
|
shadingMode: Literal['MLP_Fea']='MLP_Fea'
|
|
view_pe: int = 2
|
|
fea_pe: int = 2
|
|
pos_pe: int = 6
|
|
|
|
n_sample: int = 64
|
|
|
|
|
|
volume_mode: Literal['TRF_Mesh','TRF_NeRF'] = 'TRF_NeRF'
|
|
|
|
|
|
|
|
camera_embed_dim: int=1024
|
|
transformer_dim: int=1024
|
|
transformer_layers: int=16
|
|
transformer_heads: int=16
|
|
triplane_low_res: int=32
|
|
triplane_high_res: int=64
|
|
triplane_dim: int=32
|
|
encoder_type: str ='dinov2'
|
|
encoder_model_name: str = 'dinov2_vitb14_reg'
|
|
encoder_feat_dim: int = 768
|
|
encoder_freeze: bool = False
|
|
|
|
|
|
over_fit: Optional[bool] = False
|
|
is_grid_sample: bool = False
|
|
|
|
|
|
|
|
data_mode: Literal['s3','s4','s5'] = 's4'
|
|
data_path: str = 'train_data'
|
|
data_debug_list: str = 'dataset_debug/gobj_merged_debug.json'
|
|
data_list_path: str = 'dataset_debug/gobj_merged_debug_selected.json'
|
|
|
|
fovy: float = 39.6
|
|
|
|
znear: float = 0.5
|
|
|
|
zfar: float = 2.5
|
|
|
|
num_views: int = 12
|
|
|
|
num_input_views: int = 4
|
|
|
|
cam_radius: float = 1.5
|
|
|
|
num_workers: int = 8
|
|
|
|
training_view_plane: bool = False
|
|
is_certainty: bool = False
|
|
|
|
|
|
|
|
workspace: str = './workspace_test'
|
|
|
|
resume: Optional[str] = None
|
|
ckpt_nerf: Optional[str] = None
|
|
|
|
batch_size: int = 8
|
|
|
|
gradient_accumulation_steps: Optional[int] = 1
|
|
|
|
num_epochs: int = 50
|
|
|
|
lambda_lpips: float = 2.0
|
|
|
|
gradient_clip: float = 1.0
|
|
|
|
mixed_precision: str = 'bf16'
|
|
|
|
lr: Optional[float] = 4e-4
|
|
lr_scheduler: str = 'OneCycleLR'
|
|
warmup_real_iters: int = 3000
|
|
|
|
|
|
prob_grid_distortion: float = 0.5
|
|
|
|
prob_cam_jitter: float = 0.5
|
|
|
|
|
|
|
|
test_path: Optional[str] = None
|
|
|
|
|
|
|
|
force_cuda_rast: bool = False
|
|
|
|
fancy_video: bool = False
|
|
|
|
|
|
|
|
config_defaults: Dict[str, Options] = {}
|
|
config_doc: Dict[str, str] = {}
|
|
|
|
config_doc['lrm'] = 'the default settings for LGM'
|
|
config_defaults['lrm'] = Options()
|
|
|
|
config_doc['small'] = 'small model with lower resolution Gaussians'
|
|
config_defaults['small'] = Options(
|
|
input_size=256,
|
|
splat_size=64,
|
|
output_size=256,
|
|
batch_size=8,
|
|
gradient_accumulation_steps=1,
|
|
mixed_precision='bf16',
|
|
)
|
|
|
|
config_doc['big'] = 'big model with higher resolution Gaussians'
|
|
config_defaults['big'] = Options(
|
|
input_size=256,
|
|
up_channels=(1024, 1024, 512, 256, 128),
|
|
up_attention=(True, True, True, False, False),
|
|
splat_size=128,
|
|
output_size=512,
|
|
batch_size=8,
|
|
num_views=8,
|
|
gradient_accumulation_steps=1,
|
|
mixed_precision='bf16',
|
|
)
|
|
|
|
|
|
config_doc['tiny_trf_trans_mesh'] = 'tiny model for ablation'
|
|
config_defaults['tiny_trf_trans_mesh'] = Options(
|
|
input_size=512,
|
|
down_channels=(32, 64, 128, 256, 512),
|
|
down_attention=(False, False, False, False, True),
|
|
up_channels=(512, 256, 128),
|
|
up_attention=(True, False, False, False),
|
|
volume_mode='TRF_Mesh',
|
|
|
|
splat_size=64,
|
|
output_size=512,
|
|
data_mode='s6',
|
|
batch_size=1,
|
|
num_views=8,
|
|
gradient_accumulation_steps=1,
|
|
mixed_precision='no',
|
|
)
|
|
|
|
config_doc['tiny_trf_trans_nerf'] = 'tiny model for ablation'
|
|
config_defaults['tiny_trf_trans_nerf'] = Options(
|
|
input_size=512,
|
|
down_channels=(32, 64, 128, 256, 512),
|
|
down_attention=(False, False, False, False, True),
|
|
up_channels=(512, 256, 128),
|
|
up_attention=(True, False, False, False),
|
|
volume_mode='TRF_NeRF',
|
|
splat_size=64,
|
|
output_size=62,
|
|
data_mode='s5',
|
|
batch_size=4,
|
|
num_views=8,
|
|
gradient_accumulation_steps=1,
|
|
mixed_precision='bf16',
|
|
)
|
|
|
|
config_doc['tiny_trf_trans_nerf_123plus'] = 'tiny model for ablation'
|
|
config_defaults['tiny_trf_trans_nerf_123plus'] = Options(
|
|
input_size=512,
|
|
down_channels=(32, 64, 128, 256, 512),
|
|
down_attention=(False, False, False, False, True),
|
|
up_channels=(512, 256, 128),
|
|
up_attention=(True, False, False, False),
|
|
volume_mode='TRF_NeRF',
|
|
splat_size=64,
|
|
output_size=116,
|
|
data_mode='s5',
|
|
mvdream_or_zero123=False,
|
|
batch_size=1,
|
|
num_views=10,
|
|
num_input_views=6,
|
|
gradient_accumulation_steps=1,
|
|
mixed_precision='bf16',
|
|
)
|
|
|
|
|
|
config_doc['tiny_trf_trans_nerf_nocrop'] = 'tiny model for ablation'
|
|
config_defaults['tiny_trf_trans_nerf_nocrop'] = Options(
|
|
input_size=512,
|
|
down_channels=(32, 64, 128, 256, 512),
|
|
down_attention=(False, False, False, False, True),
|
|
up_channels=(512, 256, 128),
|
|
up_attention=(True, False, False, False),
|
|
volume_mode='TRF_NeRF',
|
|
splat_size=64,
|
|
output_size=62,
|
|
data_mode='s5',
|
|
batch_size=4,
|
|
is_crop=False,
|
|
num_views=8,
|
|
gradient_accumulation_steps=1,
|
|
mixed_precision='bf16',
|
|
)
|
|
|
|
|
|
AllConfigs = tyro.extras.subcommand_type_from_defaults(config_defaults, config_doc)
|
|
|