|
cond_image_size: 512 |
|
isosurface_resolution: 160 |
|
radius: 0.87 |
|
|
|
camera_embedder_cls: sf3d.models.camera.LinearCameraEmbedder |
|
camera_embedder: |
|
in_channels: 25 |
|
out_channels: 768 |
|
conditions: |
|
- c2w_cond |
|
- intrinsic_normed_cond |
|
|
|
image_tokenizer_cls: sf3d.models.tokenizers.image.DINOV2SingleImageTokenizer |
|
image_tokenizer: |
|
pretrained_model_name_or_path: "facebook/dinov2-large" |
|
width: 512 |
|
height: 512 |
|
modulation_cond_dim: 768 |
|
|
|
tokenizer_cls: sf3d.models.tokenizers.triplane.TriplaneLearnablePositionalEmbedding |
|
tokenizer: |
|
plane_size: 96 |
|
num_channels: 1024 |
|
|
|
backbone_cls: sf3d.models.transformers.backbone.TwoStreamInterleaveTransformer |
|
backbone: |
|
num_attention_heads: 16 |
|
attention_head_dim: 64 |
|
raw_triplane_channels: 1024 |
|
triplane_channels: 1024 |
|
raw_image_channels: 1024 |
|
num_latents: 1792 |
|
num_blocks: 4 |
|
num_basic_blocks: 3 |
|
|
|
post_processor_cls: sf3d.models.network.PixelShuffleUpsampleNetwork |
|
post_processor: |
|
in_channels: 1024 |
|
out_channels: 40 |
|
scale_factor: 4 |
|
conv_layers: 4 |
|
|
|
|
|
decoder_cls: sf3d.models.network.MaterialMLP |
|
decoder: |
|
in_channels: 120 |
|
n_neurons: 64 |
|
activation: silu |
|
heads: |
|
- name: density |
|
out_channels: 1 |
|
out_bias: -1.0 |
|
n_hidden_layers: 2 |
|
output_activation: trunc_exp |
|
- name: features |
|
out_channels: 3 |
|
n_hidden_layers: 3 |
|
output_activation: sigmoid |
|
- name: perturb_normal |
|
out_channels: 3 |
|
n_hidden_layers: 3 |
|
output_activation: normalize_channel_last |
|
- name: vertex_offset |
|
out_channels: 3 |
|
n_hidden_layers: 2 |
|
|
|
image_estimator_cls: sf3d.models.image_estimator.clip_based_estimator.ClipBasedHeadEstimator |
|
image_estimator: |
|
distribution: beta |
|
distribution_eval: mode |
|
heads: |
|
- name: roughness |
|
out_channels: 1 |
|
n_hidden_layers: 3 |
|
output_activation: linear |
|
add_to_decoder_features: true |
|
output_bias: 1.0 |
|
shape: [-1, 1, 1] |
|
- name: metallic |
|
out_channels: 1 |
|
n_hidden_layers: 3 |
|
output_activation: linear |
|
add_to_decoder_features: true |
|
output_bias: 1.0 |
|
shape: [-1, 1, 1] |
|
|
|
global_estimator_cls: sf3d.models.global_estimator.multi_head_estimator.MultiHeadEstimator |
|
global_estimator: |
|
triplane_features: 1024 |
|
heads: |
|
- name: sg_amplitudes |
|
out_channels: 24 |
|
n_hidden_layers: 3 |
|
output_activation: softplus |
|
output_bias: 1.0 |
|
shape: [-1, 24, 1] |
|
|