Spaces:

LTT
/

PRM

Running on Zero

App Files Files Community

JiantaoLin commited on Nov 26, 2024

Commit

2fe3da0

•

1 Parent(s): e2cc5f8

new

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +1 -0
README copy.md +111 -0
app.py +499 -0
configs/PRM.yaml +71 -0
configs/PRM_inference.yaml +22 -0
light2map.py +95 -0
obj2mesh.py +121 -0
requirements.txt +21 -0
run.py +355 -0
run.sh +7 -0
run_hpc.sh +16 -0
src/__init__.py +0 -0
src/__pycache__/__init__.cpython-310.pyc +0 -0
src/data/__init__.py +0 -0
src/data/__pycache__/__init__.cpython-310.pyc +0 -0
src/data/__pycache__/objaverse.cpython-310.pyc +0 -0
src/data/bsdf_256_256.bin +0 -0
src/data/objaverse.py +509 -0
src/model_mesh.py +642 -0
src/models/__init__.py +0 -0
src/models/__pycache__/__init__.cpython-310.pyc +0 -0
src/models/__pycache__/lrm_mesh.cpython-310.pyc +0 -0
src/models/decoder/__init__.py +0 -0
src/models/decoder/__pycache__/__init__.cpython-310.pyc +0 -0
src/models/decoder/__pycache__/transformer.cpython-310.pyc +0 -0
src/models/decoder/transformer.py +123 -0
src/models/encoder/__init__.py +0 -0
src/models/encoder/__pycache__/__init__.cpython-310.pyc +0 -0
src/models/encoder/__pycache__/dino.cpython-310.pyc +0 -0
src/models/encoder/__pycache__/dino_wrapper.cpython-310.pyc +0 -0
src/models/encoder/dino.py +550 -0
src/models/encoder/dino_wrapper.py +80 -0
src/models/geometry/__init__.py +7 -0
src/models/geometry/__pycache__/__init__.cpython-310.pyc +0 -0
src/models/geometry/camera/__init__.py +16 -0
src/models/geometry/camera/__pycache__/__init__.cpython-310.pyc +0 -0
src/models/geometry/camera/__pycache__/perspective_camera.cpython-310.pyc +0 -0
src/models/geometry/camera/perspective_camera.py +35 -0
src/models/geometry/render/__init__.py +8 -0
src/models/geometry/render/__pycache__/__init__.cpython-310.pyc +0 -0
src/models/geometry/render/__pycache__/neural_render.cpython-310.pyc +0 -0
src/models/geometry/render/__pycache__/util.cpython-310.pyc +0 -0
src/models/geometry/render/neural_render.py +293 -0
src/models/geometry/render/renderutils/__init__.py +11 -0
src/models/geometry/render/renderutils/__pycache__/__init__.cpython-310.pyc +0 -0
src/models/geometry/render/renderutils/__pycache__/bsdf.cpython-310.pyc +0 -0
src/models/geometry/render/renderutils/__pycache__/loss.cpython-310.pyc +0 -0
src/models/geometry/render/renderutils/__pycache__/ops.cpython-310.pyc +0 -0
src/models/geometry/render/renderutils/bsdf.py +151 -0
src/models/geometry/render/renderutils/c_src/bsdf.cu +710 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text

README copy.md ADDED Viewed

	@@ -0,0 +1,111 @@

+<div align="center">
+# PRM:  Photometric Stereo based Large Reconstruction Model
+<a href="https://tau-yihouxiang.github.io/projects/X-Ray/X-Ray.html"><img src="https://img.shields.io/badge/Project_Page-Online-EA3A97"></a>
+<a href="https://arxiv.org/abs/2404.07191"><img src="https://img.shields.io/badge/ArXiv-2404.07191-brightgreen"></a>
+<a href="https://huggingface.co/LTT/PRM"><img src="https://img.shields.io/badge/%F0%9F%A4%97%20Model_Card-Huggingface-orange"></a>  <br>
+<a href="https://huggingface.co/spaces/TencentARC/InstantMesh"><img src="https://img.shields.io/badge/%F0%9F%A4%97%20Gradio%20Demo-Huggingface-orange"></a>
+<a href="https://github.com/jtydhr88/ComfyUI-InstantMesh"><img src="https://img.shields.io/badge/Demo-ComfyUI-8A2BE2"></a>
+</div>
+---
+An official implementation of PRM, a feed-forward framework for high-quality 3D mesh generation with photometric stereo images.
+![image](https://github.com/g3956/PRM/blob/main/assets/teaser.png)
+# 🚩 Features
+- [x] Release inference and training code.
+- [x] Release model weights.
+- [x] Release huggingface gradio demo. Please try it at [demo](https://huggingface.co/spaces/TencentARC/InstantMesh) link.
+- [x] Release ComfyUI demo.
+# ⚙️ Dependencies and Installation
+We recommend using `Python>=3.10`, `PyTorch>=2.1.0`, and `CUDA>=12.1`.
+```bash
+conda create --name PRM python=3.10
+conda activate PRM
+pip install -U pip
+# Ensure Ninja is installed
+conda install Ninja
+# Install the correct version of CUDA
+conda install cuda -c nvidia/label/cuda-12.1.0
+# Install PyTorch and xformers
+# You may need to install another xformers version if you use a different PyTorch version
+pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu121
+pip install xformers==0.0.22.post7
+# Install Triton
+pip install triton
+# Install other requirements
+pip install -r requirements.txt
+```
+# 💫 Inference
+## Download the pretrained model
+The pretrained model can be found [model card](https://huggingface.co/LTT/PRM).
+Our inference script will download the models automatically. Alternatively, you can manually download the models and put them under the `ckpts/` directory.
+# 💻 Training
+We provide our training code to facilitate future research.
+For training data, we used filtered Objaverse for training. Before training, you need to pre-processe the environment maps and GLB files into formats that fit our dataloader.
+For preprocessing GLB files, please run
+```bash
+# GLB files to OBJ files
+python train.py --base configs/instant-mesh-large-train.yaml --gpus 0,1,2,3,4,5,6,7 --num_nodes 1
+```
+then
+```bash
+# OBJ files to mesh files that can be readed
+python obj2mesh.py path_to_obj save_path
+```
+For preprocessing environment maps, please run
+```bash
+# Pre-process environment maps
+python light2map.py path_to_env save_path
+```
+To train the sparse-view reconstruction models, please run:
+```bash
+# Training on Mesh representation
+python train.py --base configs/PRM.yaml --gpus 0,1,2,3,4,5,6,7 --num_nodes 1
+```
+Note that you need to change to root_dir and light_dir to pathes that you save the preprocessed GLB files and environment maps.
+# :books: Citation
+If you find our work useful for your research or applications, please cite using this BibTeX:
+```BibTeX
+@article{xu2024instantmesh,
+  title={InstantMesh: Efficient 3D Mesh Generation from a Single Image with Sparse-view Large Reconstruction Models},
+  author={Xu, Jiale and Cheng, Weihao and Gao, Yiming and Wang, Xintao and Gao, Shenghua and Shan, Ying},
+  journal={arXiv preprint arXiv:2404.07191},
+  year={2024}
+}
+```
+# 🤗 Acknowledgements
+We thank the authors of the following projects for their excellent contributions to 3D generative AI!
+- [FlexiCubes](https://github.com/nv-tlabs/FlexiCubes)
+- [InstantMesh]([https://instant-3d.github.io/](https://github.com/TencentARC/InstantMesh))

app.py ADDED Viewed

	@@ -0,0 +1,499 @@

+import os
+import imageio
+import numpy as np
+import torch
+import rembg
+from PIL import Image
+from torchvision.transforms import v2
+from pytorch_lightning import seed_everything
+from omegaconf import OmegaConf
+from einops import rearrange, repeat
+from tqdm import tqdm
+import glm
+from diffusers import DiffusionPipeline, EulerAncestralDiscreteScheduler
+from src.data.objaverse import load_mipmap
+from src.utils import render_utils
+from src.utils.train_util import instantiate_from_config
+from src.utils.camera_util import (
+    FOV_to_intrinsics,
+    get_zero123plus_input_cameras,
+    get_circular_camera_poses,
+)
+from src.utils.mesh_util import save_obj, save_glb
+from src.utils.infer_util import remove_background, resize_foreground, images_to_video
+import tempfile
+from huggingface_hub import hf_hub_download
+if torch.cuda.is_available() and torch.cuda.device_count() >= 2:
+    device0 = torch.device('cuda:0')
+    device1 = torch.device('cuda:0')
+else:
+    device0 = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    device1 = device0
+# Define the cache directory for model files
+model_cache_dir = './ckpts/'
+os.makedirs(model_cache_dir, exist_ok=True)
+def get_render_cameras(batch_size=1, M=120, radius=4.0, elevation=20.0, is_flexicubes=False, fov=50):
+    """
+    Get the rendering camera parameters.
+    """
+    train_res = [512, 512]
+    cam_near_far = [0.1, 1000.0]
+    fovy = np.deg2rad(fov)
+    proj_mtx = render_utils.perspective(fovy, train_res[1] / train_res[0], cam_near_far[0], cam_near_far[1])
+    all_mv = []
+    all_mvp = []
+    all_campos = []
+    if isinstance(elevation, tuple):
+        elevation_0 = np.deg2rad(elevation[0])
+        elevation_1 = np.deg2rad(elevation[1])
+        for i in range(M//2):
+            azimuth = 2 * np.pi * i / (M // 2)
+            z = radius * np.cos(azimuth) * np.sin(elevation_0)
+            x = radius * np.sin(azimuth) * np.sin(elevation_0)
+            y = radius * np.cos(elevation_0)
+            eye = glm.vec3(x, y, z)
+            at = glm.vec3(0.0, 0.0, 0.0)
+            up = glm.vec3(0.0, 1.0, 0.0)
+            view_matrix = glm.lookAt(eye, at, up)
+            mv = torch.from_numpy(np.array(view_matrix))
+            mvp   = proj_mtx @ (mv)  #w2c
+            campos = torch.linalg.inv(mv)[:3, 3]
+            all_mv.append(mv[None, ...].cuda())
+            all_mvp.append(mvp[None, ...].cuda())
+            all_campos.append(campos[None, ...].cuda())
+        for i in range(M//2):
+            azimuth = 2 * np.pi * i / (M // 2)
+            z = radius * np.cos(azimuth) * np.sin(elevation_1)
+            x = radius * np.sin(azimuth) * np.sin(elevation_1)
+            y = radius * np.cos(elevation_1)
+            eye = glm.vec3(x, y, z)
+            at = glm.vec3(0.0, 0.0, 0.0)
+            up = glm.vec3(0.0, 1.0, 0.0)
+            view_matrix = glm.lookAt(eye, at, up)
+            mv = torch.from_numpy(np.array(view_matrix))
+            mvp   = proj_mtx @ (mv)  #w2c
+            campos = torch.linalg.inv(mv)[:3, 3]
+            all_mv.append(mv[None, ...].cuda())
+            all_mvp.append(mvp[None, ...].cuda())
+            all_campos.append(campos[None, ...].cuda())
+    else:
+        # elevation = 90 - elevation
+        for i in range(M):
+            azimuth = 2 * np.pi * i / M
+            z = radius * np.cos(azimuth) * np.sin(elevation)
+            x = radius * np.sin(azimuth) * np.sin(elevation)
+            y = radius * np.cos(elevation)
+            eye = glm.vec3(x, y, z)
+            at = glm.vec3(0.0, 0.0, 0.0)
+            up = glm.vec3(0.0, 1.0, 0.0)
+            view_matrix = glm.lookAt(eye, at, up)
+            mv = torch.from_numpy(np.array(view_matrix))
+            mvp   = proj_mtx @ (mv)  #w2c
+            campos = torch.linalg.inv(mv)[:3, 3]
+            all_mv.append(mv[None, ...].cuda())
+            all_mvp.append(mvp[None, ...].cuda())
+            all_campos.append(campos[None, ...].cuda())
+    all_mv = torch.stack(all_mv, dim=0).unsqueeze(0).squeeze(2)
+    all_mvp = torch.stack(all_mvp, dim=0).unsqueeze(0).squeeze(2)
+    all_campos = torch.stack(all_campos, dim=0).unsqueeze(0).squeeze(2)
+    return all_mv, all_mvp, all_campos
+def render_frames(model, planes, render_cameras, camera_pos, env, materials, render_size=512, chunk_size=1, is_flexicubes=False):
+    """
+    Render frames from triplanes.
+    """
+    frames = []
+    albedos = []
+    pbr_spec_lights = []
+    pbr_diffuse_lights = []
+    normals = []
+    alphas = []
+    for i in tqdm(range(0, render_cameras.shape[1], chunk_size)):
+        if is_flexicubes:
+            out = model.forward_geometry(
+                planes,
+                render_cameras[:, i:i+chunk_size],
+                camera_pos[:, i:i+chunk_size],
+                [[env]*chunk_size],
+                [[materials]*chunk_size],
+                render_size=render_size,
+            )
+            frame = out['pbr_img']
+            albedo = out['albedo']
+            pbr_spec_light = out['pbr_spec_light']
+            pbr_diffuse_light = out['pbr_diffuse_light']
+            normal = out['normal']
+            alpha = out['mask']
+        else:
+            frame = model.forward_synthesizer(
+                planes,
+                render_cameras[i],
+                render_size=render_size,
+            )['images_rgb']
+        frames.append(frame)
+        albedos.append(albedo)
+        pbr_spec_lights.append(pbr_spec_light)
+        pbr_diffuse_lights.append(pbr_diffuse_light)
+        normals.append(normal)
+        alphas.append(alpha)
+    frames = torch.cat(frames, dim=1)[0]    # we suppose batch size is always 1
+    alphas = torch.cat(alphas, dim=1)[0]
+    albedos = torch.cat(albedos, dim=1)[0]
+    pbr_spec_lights = torch.cat(pbr_spec_lights, dim=1)[0]
+    pbr_diffuse_lights = torch.cat(pbr_diffuse_lights, dim=1)[0]
+    normals = torch.cat(normals, dim=0).permute(0,3,1,2)[:,:3]
+    return frames, albedos, pbr_spec_lights, pbr_diffuse_lights, normals, alphas
+def images_to_video(images, output_path, fps=30):
+    # images: (N, C, H, W)
+    os.makedirs(os.path.dirname(output_path), exist_ok=True)
+    frames = []
+    for i in range(images.shape[0]):
+        frame = (images[i].permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8).clip(0, 255)
+        assert frame.shape[0] == images.shape[2] and frame.shape[1] == images.shape[3], \
+            f"Frame shape mismatch: {frame.shape} vs {images.shape}"
+        assert frame.min() >= 0 and frame.max() <= 255, \
+            f"Frame value out of range: {frame.min()} ~ {frame.max()}"
+        frames.append(frame)
+    imageio.mimwrite(output_path, np.stack(frames), fps=fps, codec='h264')
+###############################################################################
+# Configuration.
+###############################################################################
+seed_everything(0)
+config_path = 'configs/PRM_inference.yaml'
+config = OmegaConf.load(config_path)
+config_name = os.path.basename(config_path).replace('.yaml', '')
+model_config = config.model_config
+infer_config = config.infer_config
+IS_FLEXICUBES = True
+device = torch.device('cuda')
+# load diffusion model
+print('Loading diffusion model ...')
+pipeline = DiffusionPipeline.from_pretrained(
+    "sudo-ai/zero123plus-v1.2",
+    custom_pipeline="zero123plus",
+    torch_dtype=torch.float16,
+    cache_dir=model_cache_dir
+)
+pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(
+    pipeline.scheduler.config, timestep_spacing='trailing'
+)
+# load custom white-background UNet
+print('Loading custom white-background unet ...')
+if os.path.exists(infer_config.unet_path):
+    unet_ckpt_path = infer_config.unet_path
+else:
+    unet_ckpt_path = hf_hub_download(repo_id="LTT/PRM", filename="diffusion_pytorch_model.bin", repo_type="model")
+state_dict = torch.load(unet_ckpt_path, map_location='cpu')
+pipeline.unet.load_state_dict(state_dict, strict=True)
+pipeline = pipeline.to(device)
+# load reconstruction model
+print('Loading reconstruction model ...')
+model = instantiate_from_config(model_config)
+if os.path.exists(infer_config.model_path):
+    model_ckpt_path = infer_config.model_path
+else:
+    model_ckpt_path = hf_hub_download(repo_id="LTT/PRM", filename="final_ckpt.ckpt", repo_type="model")
+state_dict = torch.load(model_ckpt_path, map_location='cpu')['state_dict']
+state_dict = {k[14:]: v for k, v in state_dict.items() if k.startswith('lrm_generator.')}
+model.load_state_dict(state_dict, strict=True)
+model = model.to(device1)
+if IS_FLEXICUBES:
+    model.init_flexicubes_geometry(device1, fovy=30.0)
+model = model.eval()
+print('Loading Finished!')
+def check_input_image(input_image):
+    if input_image is None:
+        raise gr.Error("No image uploaded!")
+def preprocess(input_image, do_remove_background):
+    rembg_session = rembg.new_session() if do_remove_background else None
+    if do_remove_background:
+        input_image = remove_background(input_image, rembg_session)
+        input_image = resize_foreground(input_image, 0.85)
+    return input_image
+def generate_mvs(input_image, sample_steps, sample_seed):
+    seed_everything(sample_seed)
+    # sampling
+    generator = torch.Generator(device=device0)
+    z123_image = pipeline(
+        input_image,
+        num_inference_steps=sample_steps,
+        generator=generator,
+    ).images[0]
+    show_image = np.asarray(z123_image, dtype=np.uint8)
+    show_image = torch.from_numpy(show_image)     # (960, 640, 3)
+    show_image = rearrange(show_image, '(n h) (m w) c -> (n m) h w c', n=3, m=2)
+    show_image = rearrange(show_image, '(n m) h w c -> (n h) (m w) c', n=2, m=3)
+    show_image = Image.fromarray(show_image.numpy())
+    return z123_image, show_image
+def make_mesh(mesh_fpath, planes):
+    mesh_basename = os.path.basename(mesh_fpath).split('.')[0]
+    mesh_dirname = os.path.dirname(mesh_fpath)
+    mesh_glb_fpath = os.path.join(mesh_dirname, f"{mesh_basename}.glb")
+    with torch.no_grad():
+        # get mesh
+        mesh_out = model.extract_mesh(
+            planes,
+            use_texture_map=False,
+            **infer_config,
+        )
+        vertices, faces, vertex_colors = mesh_out
+        vertices = vertices[:, [1, 2, 0]]
+        save_glb(vertices, faces, vertex_colors, mesh_glb_fpath)
+        save_obj(vertices, faces, vertex_colors, mesh_fpath)
+        print(f"Mesh saved to {mesh_fpath}")
+    return mesh_fpath, mesh_glb_fpath
+def make3d(images):
+    images = np.asarray(images, dtype=np.float32) / 255.0
+    images = torch.from_numpy(images).permute(2, 0, 1).contiguous().float()     # (3, 960, 640)
+    images = rearrange(images, 'c (n h) (m w) -> (n m) c h w', n=3, m=2)        # (6, 3, 320, 320)
+    input_cameras = get_zero123plus_input_cameras(batch_size=1, radius=3.2, fov=30).to(device).to(device1)
+    all_mv, all_mvp, all_campos = get_render_cameras(
+                batch_size=1,
+                M=240,
+                radius=4.5,
+                elevation=(90, 60.0),
+                is_flexicubes=IS_FLEXICUBES,
+                fov=30
+            )
+    images = images.unsqueeze(0).to(device1)
+    images = v2.functional.resize(images, (512, 512), interpolation=3, antialias=True).clamp(0, 1)
+    mesh_fpath = tempfile.NamedTemporaryFile(suffix=f".obj", delete=False).name
+    print(mesh_fpath)
+    mesh_basename = os.path.basename(mesh_fpath).split('.')[0]
+    mesh_dirname = os.path.dirname(mesh_fpath)
+    video_fpath = os.path.join(mesh_dirname, f"{mesh_basename}.mp4")
+    ENV = load_mipmap("env_mipmap/6")
+    materials = (0.0,0.9)
+    with torch.no_grad():
+        # get triplane
+        planes = model.forward_planes(images, input_cameras)
+        # get video
+        chunk_size = 20 if IS_FLEXICUBES else 1
+        render_size = 512
+        frames = []
+        frames, albedos, pbr_spec_lights, pbr_diffuse_lights, normals, alphas = render_frames(
+                model,
+                planes,
+                render_cameras=all_mvp,
+                camera_pos=all_campos,
+                env=ENV,
+                materials=materials,
+                render_size=render_size,
+                chunk_size=chunk_size,
+                is_flexicubes=IS_FLEXICUBES,
+            )
+        normals = (torch.nn.functional.normalize(normals) + 1) / 2
+        normals = normals * alphas + (1-alphas)
+        all_frames = torch.cat([frames, albedos, pbr_spec_lights, pbr_diffuse_lights, normals], dim=3)
+        images_to_video(
+            all_frames,
+            video_fpath,
+            fps=30,
+        )
+        print(f"Video saved to {video_fpath}")
+    mesh_fpath, mesh_glb_fpath = make_mesh(mesh_fpath, planes)
+    return video_fpath, mesh_fpath, mesh_glb_fpath
+import gradio as gr
+_HEADER_ = '''
+<h2><b>Official 🤗 Gradio Demo</b></h2><h2><a href='https://github.com/g3956/PRM' target='_blank'><b>PRM: Photometric Stereo based Large Reconstruction Model</b></a></h2>
+**PRM** is a feed-forward framework for high-quality 3D mesh generation with fine-grained local details from a single image.
+Code: <a href='https://github.com/g3956/PRM' target='_blank'>GitHub</a>. Techenical report: <a href='https://arxiv.org/abs/2404.07191' target='_blank'>ArXiv</a>.
+'''
+_CITE_ = r"""
+If PRM is helpful, please help to ⭐ the <a href='https://github.com/g3956/PRM' target='_blank'>Github Repo</a>. Thanks!
+---
+📝 **Citation**
+If you find our work useful for your research or applications, please cite using this bibtex:
+```bibtex
+@article{xu2024instantmesh,
+  title={InstantMesh: Efficient 3D Mesh Generation from a Single Image with Sparse-view Large Reconstruction Models},
+  author={Xu, Jiale and Cheng, Weihao and Gao, Yiming and Wang, Xintao and Gao, Shenghua and Shan, Ying},
+  journal={arXiv preprint arXiv:2404.07191},
+  year={2024}
+}
+```
+📋 **License**
+Apache-2.0 LICENSE. Please refer to the [LICENSE file](https://huggingface.co/spaces/TencentARC/InstantMesh/blob/main/LICENSE) for details.
+📧 **Contact**
+If you have any questions, feel free to open a discussion or contact us at <b>jlin695@connect.hkust-gz.edu.cn</b>.
+"""
+with gr.Blocks() as demo:
+    gr.Markdown(_HEADER_)
+    with gr.Row(variant="panel"):
+        with gr.Column():
+            with gr.Row():
+                input_image = gr.Image(
+                    label="Input Image",
+                    image_mode="RGBA",
+                    sources="upload",
+                    width=256,
+                    height=256,
+                    type="pil",
+                    elem_id="content_image",
+                )
+                processed_image = gr.Image(
+                    label="Processed Image",
+                    image_mode="RGBA",
+                    width=256,
+                    height=256,
+                    type="pil",
+                    interactive=False
+                )
+            with gr.Row():
+                with gr.Group():
+                    do_remove_background = gr.Checkbox(
+                        label="Remove Background", value=True
+                    )
+                    sample_seed = gr.Number(value=42, label="Seed Value", precision=0)
+                    sample_steps = gr.Slider(
+                        label="Sample Steps",
+                        minimum=30,
+                        maximum=100,
+                        value=75,
+                        step=5
+                    )
+            with gr.Row():
+                submit = gr.Button("Generate", elem_id="generate", variant="primary")
+            with gr.Row(variant="panel"):
+                gr.Examples(
+                    examples=[
+                        os.path.join("examples", img_name) for img_name in sorted(os.listdir("examples"))
+                    ],
+                    inputs=[input_image],
+                    label="Examples",
+                    examples_per_page=20
+                )
+        with gr.Column():
+            with gr.Row():
+                with gr.Column():
+                    mv_show_images = gr.Image(
+                        label="Generated Multi-views",
+                        type="pil",
+                        width=379,
+                        interactive=False
+                    )
+            with gr.Column():
+                with gr.Column():
+                    output_video = gr.Video(
+                        label="video", format="mp4",
+                        width=768,
+                        autoplay=True,
+                        interactive=False
+                    )
+            with gr.Row():
+                with gr.Tab("OBJ"):
+                    output_model_obj = gr.Model3D(
+                        label="Output Model (OBJ Format)",
+                        #width=768,
+                        interactive=False,
+                    )
+                    gr.Markdown("Note: Downloaded .obj model will be flipped. Export .glb instead or manually flip it before usage.")
+                with gr.Tab("GLB"):
+                    output_model_glb = gr.Model3D(
+                        label="Output Model (GLB Format)",
+                        #width=768,
+                        interactive=False,
+                    )
+                    gr.Markdown("Note: The model shown here has a darker appearance. Download to get correct results.")
+            with gr.Row():
+                gr.Markdown('''Try a different <b>seed value</b> if the result is unsatisfying (Default: 42).''')
+    gr.Markdown(_CITE_)
+    mv_images = gr.State()
+    submit.click(fn=check_input_image, inputs=[input_image]).success(
+        fn=preprocess,
+        inputs=[input_image, do_remove_background],
+        outputs=[processed_image],
+    ).success(
+        fn=generate_mvs,
+        inputs=[processed_image, sample_steps, sample_seed],
+        outputs=[mv_images, mv_show_images],
+    ).success(
+        fn=make3d,
+        inputs=[mv_images],
+        outputs=[output_video, output_model_obj, output_model_glb]
+    )
+demo.queue(max_size=10)
+demo.launch(server_port=1211)

configs/PRM.yaml ADDED Viewed

	@@ -0,0 +1,71 @@

+model:
+  base_learning_rate: 4.0e-06
+  target: src.model_mesh.MVRecon
+  params:
+    mesh_save_root: Objaverse
+    init_ckpt: nerf_base.ckpt
+    input_size: 512
+    render_size: 512
+    use_tv_loss: true
+    sample_points: null
+    use_gt_albedo: false
+    lrm_generator_config:
+      target: src.models.lrm_mesh.PRM
+      params:
+        encoder_feat_dim: 768
+        encoder_freeze: false
+        encoder_model_name: facebook/dino-vitb16
+        transformer_dim: 1024
+        transformer_layers: 16
+        transformer_heads: 16
+        triplane_low_res: 32
+        triplane_high_res: 64
+        triplane_dim: 80
+        rendering_samples_per_ray: 128
+        grid_res: 128
+        grid_scale: 2.1
+data:
+  target: src.data.objaverse.DataModuleFromConfig
+  params:
+    batch_size: 1
+    num_workers: 8
+    train:
+      target: src.data.objaverse.ObjaverseData
+      params:
+        root_dir: Objaverse
+        light_dir: env_mipmap
+        input_view_num: [6]
+        target_view_num: 6
+        total_view_n: 18
+        distance: 5.0
+        fov: 30
+        camera_random: true
+        validation: false
+    validation:
+      target: src.data.objaverse.ValidationData
+      params:
+        root_dir: Objaverse
+        input_view_num: 6
+        input_image_size: 320
+        fov: 30
+lightning:
+  modelcheckpoint:
+    params:
+      every_n_train_steps: 100
+      save_top_k: -1
+      save_last: true
+  callbacks: {}
+  trainer:
+    benchmark: true
+    max_epochs: -1
+    val_check_interval: 2000000000
+    num_sanity_val_steps: 0
+    accumulate_grad_batches: 8
+    log_every_n_steps: 1
+    check_val_every_n_epoch: null   # if not set this, validation does not run

configs/PRM_inference.yaml ADDED Viewed

	@@ -0,0 +1,22 @@

+model_config:
+  target: src.models.lrm_mesh.PRM
+  params:
+    encoder_feat_dim: 768
+    encoder_freeze: false
+    encoder_model_name: facebook/dino-vitb16
+    transformer_dim: 1024
+    transformer_layers: 16
+    transformer_heads: 16
+    triplane_low_res: 32
+    triplane_high_res: 64
+    triplane_dim: 80
+    rendering_samples_per_ray: 128
+    grid_res: 128
+    grid_scale: 2.1
+infer_config:
+  unet_path: ckpts/diffusion_pytorch_model.bin
+  model_path: ckpts/final_ckpt.ckpt
+  texture_resolution: 2048
+  render_resolution: 512

light2map.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import sys
+from src.models.geometry.render import renderutils as ru
+import torch
+from src.models.geometry.render import util
+import nvdiffrast.torch as dr
+import os
+from PIL import Image
+import torchvision.transforms.functional as TF
+import torchvision.utils as vutils
+import imageio
+os.environ["OPENCV_IO_ENABLE_OPENEXR"]="1"
+LIGHT_MIN_RES = 16
+MIN_ROUGHNESS = 0.04
+MAX_ROUGHNESS = 1.00
+class cubemap_mip(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, cubemap):
+        return util.avg_pool_nhwc(cubemap, (2,2))
+    @staticmethod
+    def backward(ctx, dout):
+        res = dout.shape[1] * 2
+        out = torch.zeros(6, res, res, dout.shape[-1], dtype=torch.float32, device="cuda")
+        for s in range(6):
+            gy, gx = torch.meshgrid(torch.linspace(-1.0 + 1.0 / res, 1.0 - 1.0 / res, res, device="cuda"),
+                                    torch.linspace(-1.0 + 1.0 / res, 1.0 - 1.0 / res, res, device="cuda"),
+                                    indexing='ij')
+            v = util.safe_normalize(util.cube_to_dir(s, gx, gy))
+            out[s, ...] = dr.texture(dout[None, ...] * 0.25, v[None, ...].contiguous(), filter_mode='linear', boundary_mode='cube')
+        return out
+def build_mips(base, cutoff=0.99):
+    specular = [base]
+    while specular[-1].shape[1] > LIGHT_MIN_RES:
+        specular.append(cubemap_mip.apply(specular[-1]))
+        #specular.append(util.avg_pool_nhwc(specular[-1], (2,2)))
+    diffuse = ru.diffuse_cubemap(specular[-1])
+    for idx in range(len(specular) - 1):
+        roughness = (idx / (len(specular) - 2)) * (MAX_ROUGHNESS - MIN_ROUGHNESS) + MIN_ROUGHNESS
+        specular[idx] = ru.specular_cubemap(specular[idx], roughness, cutoff)
+    specular[-1] = ru.specular_cubemap(specular[-1], 1.0, cutoff)
+    return specular, diffuse
+# Load from latlong .HDR file
+def _load_env_hdr(fn, scale=1.0):
+    latlong_img = torch.tensor(util.load_image(fn), dtype=torch.float32, device='cuda')*scale
+    cubemap = util.latlong_to_cubemap(latlong_img, [512, 512])
+    specular, diffuse = build_mips(cubemap)
+    return specular, diffuse
+def main(path_hdr, save_path_map):
+    all_envs = os.listdir(path_hdr)
+    for env in all_envs:
+        env_path = os.path.join(path_hdr, env)
+        base_n = os.path.basename(env_path).split('.')[0]
+        try:
+            if not os.path.exists(os.path.join(save_path_map, base_n)):
+                os.makedirs(os.path.join(save_path_map, base_n))
+                specular, diffuse = _load_env_hdr(env_path)
+                for i in range(len(specular)):
+                    tensor = specular[i]
+                    torch.save(tensor, os.path.join(save_path_map, base_n, f'specular_{i}.pth'))
+                torch.save(diffuse, os.path.join(save_path_map, base_n, 'diffuse.pth'))
+        except Exception as e:
+            print(f"Error processing {env}: {e}")
+            continue
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        print("Usage: python script.py <path_hdr> <save_path_map>")
+        sys.exit(1)
+    path_hdr = sys.argv[1]
+    save_path_map = sys.argv[2]
+    if not os.path.exists(path_hdr):
+        print(f"Error: path_hdr '{path_hdr}' does not exist.")
+        sys.exit(1)
+    if not os.path.exists(save_path_map):
+        os.makedirs(save_path_map)
+    main(path_hdr, save_path_map)

obj2mesh.py ADDED Viewed

	@@ -0,0 +1,121 @@

+import json
+import os
+import torch
+import psutil
+import gc
+from tqdm import tqdm
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from src.data.objaverse import load_obj
+from src.utils import mesh
+from src.utils.material import Material
+import argparse
+def bytes_to_megabytes(bytes):
+    return bytes / (1024 * 1024)
+def bytes_to_gigabytes(bytes):
+    return bytes / (1024 * 1024 * 1024)
+def print_memory_usage(stage):
+    process = psutil.Process(os.getpid())
+    memory_info = process.memory_info()
+    allocated = torch.cuda.memory_allocated() / 1024**2
+    cached = torch.cuda.memory_reserved() / 1024**2
+    print(
+        f"[{stage}] Process memory: {memory_info.rss / 1024**2:.2f} MB, "
+        f"Allocated CUDA memory: {allocated:.2f} MB, Cached CUDA memory: {cached:.2f} MB"
+    )
+def process_obj(index, root_dir, final_save_dir, paths):
+    obj_path = os.path.join(root_dir, paths[index], paths[index] + '.obj')
+    mtl_path = os.path.join(root_dir, paths[index], paths[index] + '.mtl')
+    if os.path.exists(os.path.join(final_save_dir, f"{paths[index]}.pth")):
+        return None
+    try:
+        with torch.no_grad():
+            ref_mesh, vertices, faces, normals, nfaces, texcoords, tfaces, uber_material = load_obj(
+                obj_path, return_attributes=True
+            )
+            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+            ref_mesh = mesh.compute_tangents(ref_mesh)
+        with open(mtl_path, 'r') as file:
+            lines = file.readlines()
+        if len(lines) >= 250:
+            return None
+        final_mesh_attributes = {
+            "v_pos": ref_mesh.v_pos.detach().cpu(),
+            "v_nrm": ref_mesh.v_nrm.detach().cpu(),
+            "v_tex": ref_mesh.v_tex.detach().cpu(),
+            "v_tng": ref_mesh.v_tng.detach().cpu(),
+            "t_pos_idx": ref_mesh.t_pos_idx.detach().cpu(),
+            "t_nrm_idx": ref_mesh.t_nrm_idx.detach().cpu(),
+            "t_tex_idx": ref_mesh.t_tex_idx.detach().cpu(),
+            "t_tng_idx": ref_mesh.t_tng_idx.detach().cpu(),
+            "mat_dict": {key: ref_mesh.material[key] for key in ref_mesh.material.mat_keys},
+        }
+        torch.save(final_mesh_attributes, f"{final_save_dir}/{paths[index]}.pth")
+        print(f"==> Saved to {final_save_dir}/{paths[index]}.pth")
+        del ref_mesh
+        torch.cuda.empty_cache()
+        return paths[index]
+    except Exception as e:
+        print(f"Failed to process {paths[index]}: {e}")
+        return None
+    finally:
+        gc.collect()
+        torch.cuda.empty_cache()
+def main(root_dir, save_dir):
+    os.makedirs(save_dir, exist_ok=True)
+    finish_lists = os.listdir(save_dir)
+    paths = os.listdir(root_dir)
+    valid_uid = []
+    print_memory_usage("Start")
+    batch_size = 100
+    num_batches = (len(paths) + batch_size - 1) // batch_size
+    for batch in tqdm(range(num_batches)):
+        start_index = batch * batch_size
+        end_index = min(start_index + batch_size, len(paths))
+        with ThreadPoolExecutor(max_workers=8) as executor:
+            futures = [
+                executor.submit(process_obj, index, root_dir, save_dir, paths)
+                for index in range(start_index, end_index)
+            ]
+            for future in as_completed(futures):
+                result = future.result()
+                if result is not None:
+                    valid_uid.append(result)
+        print_memory_usage(f"=====> After processing batch {batch + 1}")
+        torch.cuda.empty_cache()
+        gc.collect()
+    print_memory_usage("End")
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Process OBJ files and save final results.")
+    parser.add_argument("root_dir", type=str, help="Directory containing the root OBJ files.")
+    parser.add_argument("save_dir", type=str, help="Directory to save the processed results.")
+    args = parser.parse_args()
+    main(args.root_dir, args.save_dir)

requirements.txt ADDED Viewed

	@@ -0,0 +1,21 @@

+pytorch-lightning==2.1.2
+gradio==3.41.2
+huggingface-hub
+einops
+omegaconf
+torchmetrics
+webdataset
+accelerate
+tensorboard
+PyMCubes
+trimesh
+rembg
+transformers==4.34.1
+diffusers==0.20.2
+bitsandbytes
+imageio[ffmpeg]
+xatlas
+plyfile
+git+https://github.com/NVlabs/nvdiffrast/
+PyGLM==2.7.0
+open3d

run.py ADDED Viewed

	@@ -0,0 +1,355 @@

+import os
+import argparse
+import glm
+import numpy as np
+import torch
+import rembg
+from PIL import Image
+from torchvision.transforms import v2
+import torchvision
+from pytorch_lightning import seed_everything
+from omegaconf import OmegaConf
+from einops import rearrange, repeat
+from tqdm import tqdm
+from huggingface_hub import hf_hub_download
+from diffusers import DiffusionPipeline, EulerAncestralDiscreteScheduler
+from src.data.objaverse import load_mipmap
+from src.utils import render_utils
+from src.utils.train_util import instantiate_from_config
+from src.utils.camera_util import (
+    FOV_to_intrinsics,
+    center_looking_at_camera_pose,
+    get_zero123plus_input_cameras,
+    get_circular_camera_poses,
+)
+from src.utils.mesh_util import save_obj, save_obj_with_mtl
+from src.utils.infer_util import remove_background, resize_foreground, save_video
+def str_to_tuple(arg_str):
+    try:
+        return eval(arg_str)
+    except:
+        raise argparse.ArgumentTypeError("Tuple argument must be in the format (x, y)")
+def get_render_cameras(batch_size=1, M=120, radius=4.0, elevation=20.0, is_flexicubes=False, fov=50):
+    """
+    Get the rendering camera parameters.
+    """
+    train_res = [512, 512]
+    cam_near_far = [0.1, 1000.0]
+    fovy = np.deg2rad(fov)
+    proj_mtx = render_utils.perspective(fovy, train_res[1] / train_res[0], cam_near_far[0], cam_near_far[1])
+    all_mv = []
+    all_mvp = []
+    all_campos = []
+    if isinstance(elevation, tuple):
+        elevation_0 = np.deg2rad(elevation[0])
+        elevation_1 = np.deg2rad(elevation[1])
+        for i in range(M//2):
+            azimuth = 2 * np.pi * i / (M // 2)
+            z = radius * np.cos(azimuth) * np.sin(elevation_0)
+            x = radius * np.sin(azimuth) * np.sin(elevation_0)
+            y = radius * np.cos(elevation_0)
+            eye = glm.vec3(x, y, z)
+            at = glm.vec3(0.0, 0.0, 0.0)
+            up = glm.vec3(0.0, 1.0, 0.0)
+            view_matrix = glm.lookAt(eye, at, up)
+            mv = torch.from_numpy(np.array(view_matrix))
+            mvp   = proj_mtx @ (mv)  #w2c
+            campos = torch.linalg.inv(mv)[:3, 3]
+            all_mv.append(mv[None, ...].cuda())
+            all_mvp.append(mvp[None, ...].cuda())
+            all_campos.append(campos[None, ...].cuda())
+        for i in range(M//2):
+            azimuth = 2 * np.pi * i / (M // 2)
+            z = radius * np.cos(azimuth) * np.sin(elevation_1)
+            x = radius * np.sin(azimuth) * np.sin(elevation_1)
+            y = radius * np.cos(elevation_1)
+            eye = glm.vec3(x, y, z)
+            at = glm.vec3(0.0, 0.0, 0.0)
+            up = glm.vec3(0.0, 1.0, 0.0)
+            view_matrix = glm.lookAt(eye, at, up)
+            mv = torch.from_numpy(np.array(view_matrix))
+            mvp   = proj_mtx @ (mv)  #w2c
+            campos = torch.linalg.inv(mv)[:3, 3]
+            all_mv.append(mv[None, ...].cuda())
+            all_mvp.append(mvp[None, ...].cuda())
+            all_campos.append(campos[None, ...].cuda())
+    else:
+        # elevation = 90 - elevation
+        for i in range(M):
+            azimuth = 2 * np.pi * i / M
+            z = radius * np.cos(azimuth) * np.sin(elevation)
+            x = radius * np.sin(azimuth) * np.sin(elevation)
+            y = radius * np.cos(elevation)
+            eye = glm.vec3(x, y, z)
+            at = glm.vec3(0.0, 0.0, 0.0)
+            up = glm.vec3(0.0, 1.0, 0.0)
+            view_matrix = glm.lookAt(eye, at, up)
+            mv = torch.from_numpy(np.array(view_matrix))
+            mvp   = proj_mtx @ (mv)  #w2c
+            campos = torch.linalg.inv(mv)[:3, 3]
+            all_mv.append(mv[None, ...].cuda())
+            all_mvp.append(mvp[None, ...].cuda())
+            all_campos.append(campos[None, ...].cuda())
+    all_mv = torch.stack(all_mv, dim=0).unsqueeze(0).squeeze(2)
+    all_mvp = torch.stack(all_mvp, dim=0).unsqueeze(0).squeeze(2)
+    all_campos = torch.stack(all_campos, dim=0).unsqueeze(0).squeeze(2)
+    return all_mv, all_mvp, all_campos
+def render_frames(model, planes, render_cameras, camera_pos, env, materials, render_size=512, chunk_size=1, is_flexicubes=False):
+    """
+    Render frames from triplanes.
+    """
+    frames = []
+    albedos = []
+    pbr_spec_lights = []
+    pbr_diffuse_lights = []
+    normals = []
+    alphas = []
+    for i in tqdm(range(0, render_cameras.shape[1], chunk_size)):
+        if is_flexicubes:
+            out = model.forward_geometry(
+                planes,
+                render_cameras[:, i:i+chunk_size],
+                camera_pos[:, i:i+chunk_size],
+                [[env]*chunk_size],
+                [[materials]*chunk_size],
+                render_size=render_size,
+            )
+            frame = out['pbr_img']
+            albedo = out['albedo']
+            pbr_spec_light = out['pbr_spec_light']
+            pbr_diffuse_light = out['pbr_diffuse_light']
+            normal = out['normal']
+            alpha = out['mask']
+        else:
+            frame = model.forward_synthesizer(
+                planes,
+                render_cameras[i],
+                render_size=render_size,
+            )['images_rgb']
+        frames.append(frame)
+        albedos.append(albedo)
+        pbr_spec_lights.append(pbr_spec_light)
+        pbr_diffuse_lights.append(pbr_diffuse_light)
+        normals.append(normal)
+        alphas.append(alpha)
+    frames = torch.cat(frames, dim=1)[0]    # we suppose batch size is always 1
+    alphas = torch.cat(alphas, dim=1)[0]
+    albedos = torch.cat(albedos, dim=1)[0]
+    pbr_spec_lights = torch.cat(pbr_spec_lights, dim=1)[0]
+    pbr_diffuse_lights = torch.cat(pbr_diffuse_lights, dim=1)[0]
+    normals = torch.cat(normals, dim=0).permute(0,3,1,2)[:,:3]
+    return frames, albedos, pbr_spec_lights, pbr_diffuse_lights, normals, alphas
+###############################################################################
+# Arguments.
+###############################################################################
+parser = argparse.ArgumentParser()
+parser.add_argument('config', type=str, help='Path to config file.')
+parser.add_argument('input_path', type=str, help='Path to input image or directory.')
+parser.add_argument('--output_path', type=str, default='outputs/', help='Output directory.')
+parser.add_argument('--model_ckpt_path', type=str, default="", help='Output directory.')
+parser.add_argument('--diffusion_steps', type=int, default=100, help='Denoising Sampling steps.')
+parser.add_argument('--seed', type=int, default=42, help='Random seed for sampling.')
+parser.add_argument('--scale', type=float, default=1.0, help='Scale of generated object.')
+parser.add_argument('--materials', type=str_to_tuple, default=(1.0, 0.1), help=' metallic and roughness')
+parser.add_argument('--distance', type=float, default=4.5, help='Render distance.')
+parser.add_argument('--fov', type=float, default=30, help='Render distance.')
+parser.add_argument('--env_path', type=str, default='data/env_mipmap/2', help='environment map')
+parser.add_argument('--view', type=int, default=6, choices=[4, 6], help='Number of input views.')
+parser.add_argument('--no_rembg', action='store_true', help='Do not remove input background.')
+parser.add_argument('--export_texmap', action='store_true', help='Export a mesh with texture map.')
+parser.add_argument('--save_video', action='store_true', help='Save a circular-view video.')
+args = parser.parse_args()
+seed_everything(args.seed)
+###############################################################################
+# Stage 0: Configuration.
+###############################################################################
+config = OmegaConf.load(args.config)
+config_name = os.path.basename(args.config).replace('.yaml', '')
+model_config = config.model_config
+infer_config = config.infer_config
+IS_FLEXICUBES = True
+device = torch.device('cuda')
+# load diffusion model
+print('Loading diffusion model ...')
+pipeline = DiffusionPipeline.from_pretrained(
+    "sudo-ai/zero123plus-v1.2",
+    custom_pipeline="zero123plus",
+    torch_dtype=torch.float16,
+)
+pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(
+    pipeline.scheduler.config, timestep_spacing='trailing'
+)
+# load custom white-background UNet
+print('Loading custom white-background unet ...')
+if os.path.exists(infer_config.unet_path):
+    unet_ckpt_path = infer_config.unet_path
+else:
+    unet_ckpt_path = hf_hub_download(repo_id="LTT/PRM", filename="diffusion_pytorch_model.bin", repo_type="model")
+state_dict = torch.load(unet_ckpt_path, map_location='cpu')
+pipeline.unet.load_state_dict(state_dict, strict=True)
+pipeline = pipeline.to(device)
+# load reconstruction model
+print('Loading reconstruction model ...')
+model = instantiate_from_config(model_config)
+if os.path.exists(infer_config.model_path):
+    model_ckpt_path = infer_config.model_path
+else:
+    model_ckpt_path = hf_hub_download(repo_id="LTT/PRM", filename="final_ckpt.ckpt", repo_type="model")
+state_dict = torch.load(model_ckpt_path, map_location='cpu')['state_dict']
+state_dict = {k[14:]: v for k, v in state_dict.items() if k.startswith('lrm_generator.')}
+model.load_state_dict(state_dict, strict=True)
+model = model.to(device)
+if IS_FLEXICUBES:
+    model.init_flexicubes_geometry(device, fovy=50.0)
+model = model.eval()
+# make output directories
+image_path = os.path.join(args.output_path, config_name, 'images')
+mesh_path = os.path.join(args.output_path, config_name, 'meshes')
+video_path = os.path.join(args.output_path, config_name, 'videos')
+os.makedirs(image_path, exist_ok=True)
+os.makedirs(mesh_path, exist_ok=True)
+os.makedirs(video_path, exist_ok=True)
+# process input files
+if os.path.isdir(args.input_path):
+    input_files = [
+        os.path.join(args.input_path, file)
+        for file in os.listdir(args.input_path)
+        if file.endswith('.png') or file.endswith('.jpg') or file.endswith('.webp')
+    ]
+else:
+    input_files = [args.input_path]
+print(f'Total number of input images: {len(input_files)}')
+###############################################################################
+# Stage 1: Multiview generation.
+###############################################################################
+rembg_session = None if args.no_rembg else rembg.new_session()
+outputs = []
+for idx, image_file in enumerate(input_files):
+    name = os.path.basename(image_file).split('.')[0]
+    print(f'[{idx+1}/{len(input_files)}] Imagining {name} ...')
+    # remove background optionally
+    input_image = Image.open(image_file)
+    if not args.no_rembg:
+        input_image = remove_background(input_image, rembg_session)
+        input_image = resize_foreground(input_image, 0.85)
+    # sampling
+    output_image = pipeline(
+        input_image,
+        num_inference_steps=args.diffusion_steps,
+    ).images[0]
+    print(f"Image saved to {os.path.join(image_path, f'{name}.png')}")
+    images = np.asarray(output_image, dtype=np.float32) / 255.0
+    images = torch.from_numpy(images).permute(2, 0, 1).contiguous().float()     # (3, 960, 640)
+    images = rearrange(images, 'c (n h) (m w) -> (n m) c h w', n=3, m=2)        # (6, 3, 320, 320)
+    torchvision.utils.save_image(images, os.path.join(image_path, f'{name}.png'))
+    sample = {'name': name, 'images': images}
+# delete pipeline to save memory
+# del pipeline
+###############################################################################
+# Stage 2: Reconstruction.
+###############################################################################
+    input_cameras = get_zero123plus_input_cameras(batch_size=1, radius=3.2*args.scale, fov=30).to(device)
+    chunk_size = 20 if IS_FLEXICUBES else 1
+# for idx, sample in enumerate(outputs):
+    name = sample['name']
+    print(f'[{idx+1}/{len(outputs)}] Creating {name} ...')
+    images = sample['images'].unsqueeze(0).to(device)
+    images = v2.functional.resize(images, 512, interpolation=3, antialias=True).clamp(0, 1)
+    with torch.no_grad():
+        # get triplane
+        planes = model.forward_planes(images, input_cameras)
+        mesh_path_idx = os.path.join(mesh_path, f'{name}.obj')
+        mesh_out = model.extract_mesh(
+            planes,
+            use_texture_map=args.export_texmap,
+            **infer_config,
+        )
+        if args.export_texmap:
+            vertices, faces, uvs, mesh_tex_idx, tex_map = mesh_out
+            save_obj_with_mtl(
+                vertices.data.cpu().numpy(),
+                uvs.data.cpu().numpy(),
+                faces.data.cpu().numpy(),
+                mesh_tex_idx.data.cpu().numpy(),
+                tex_map.permute(1, 2, 0).data.cpu().numpy(),
+                mesh_path_idx,
+            )
+        else:
+            vertices, faces, vertex_colors = mesh_out
+            save_obj(vertices, faces, vertex_colors, mesh_path_idx)
+        print(f"Mesh saved to {mesh_path_idx}")
+        render_size = 512
+        if args.save_video:
+            video_path_idx = os.path.join(video_path, f'{name}.mp4')
+            render_size = infer_config.render_resolution
+            ENV = load_mipmap(args.env_path)
+            materials = args.materials
+            all_mv, all_mvp, all_campos = get_render_cameras(
+                batch_size=1,
+                M=240,
+                radius=args.distance,
+                elevation=(90, 60.0),
+                is_flexicubes=IS_FLEXICUBES,
+                fov=args.fov
+            )
+            frames, albedos, pbr_spec_lights, pbr_diffuse_lights, normals, alphas = render_frames(
+                model,
+                planes,
+                render_cameras=all_mvp,
+                camera_pos=all_campos,
+                env=ENV,
+                materials=materials,
+                render_size=render_size,
+                chunk_size=chunk_size,
+                is_flexicubes=IS_FLEXICUBES,
+            )
+            normals = (torch.nn.functional.normalize(normals) + 1) / 2
+            normals = normals * alphas + (1-alphas)
+            all_frames = torch.cat([frames, albedos, pbr_spec_lights, pbr_diffuse_lights, normals], dim=3)
+            # breakpoint()
+            save_video(
+                all_frames,
+                video_path_idx,
+                fps=30,
+            )
+            print(f"Video saved to {video_path_idx}")

run.sh ADDED Viewed

	@@ -0,0 +1,7 @@

+python run.py configs/PRM_inference.yaml examples/ \
+--seed 10 \
+--materials "(0.0, 0.9)" \
+--env_path "./env_mipmap/6" \
+--output_path "output/" \
+--save_video \
+--export_texmap \

run_hpc.sh ADDED Viewed

	@@ -0,0 +1,16 @@

+source /hpc2ssd/softwares/anaconda3/bin/activate instantmesh
+module load cuda/12.1 compilers/gcc-11.1.0 compilers/icc-2023.1.0 cmake/3.27.0
+export CXX=$(which g++)
+export CC=$(which gcc)
+export CPLUS_INCLUDE_PATH=/hpc2ssd/softwares/cuda/cuda-12.1/targets/x86_64-linux/include:$CPLUS_INCLUDE_PATH
+export CUDA_LAUNCH_BLOCKING=1
+export NCCL_TIMEOUT=3600
+export CUDA_VISIBLE_DEVICES="0"
+# python app.py
+python run.py configs/PRM_inference.yaml examples/恐龙套装.webp \
+--seed 10 \
+--materials "(0.0, 0.9)" \
+--env_path "./env_mipmap/6" \
+--output_path "output/" \
+--save_video \
+--export_texmap \

src/__init__.py ADDED Viewed

File without changes

src/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (139 Bytes). View file

src/data/__init__.py ADDED Viewed

File without changes

src/data/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (144 Bytes). View file

src/data/__pycache__/objaverse.cpython-310.pyc ADDED Viewed

Binary file (14.9 kB). View file

src/data/bsdf_256_256.bin ADDED Viewed

Binary file (524 kB). View file

src/data/objaverse.py ADDED Viewed

	@@ -0,0 +1,509 @@

+import os, sys
+import math
+import json
+import glm
+from pathlib import Path
+import random
+import numpy as np
+from PIL import Image
+import webdataset as wds
+import pytorch_lightning as pl
+import sys
+from src.utils import obj, render_utils
+import torch
+import torch.nn.functional as F
+from torch.utils.data import Dataset
+from torch.utils.data.distributed import DistributedSampler
+import random
+import itertools
+from src.utils.train_util import instantiate_from_config
+from src.utils.camera_util import (
+    FOV_to_intrinsics,
+    center_looking_at_camera_pose,
+    get_circular_camera_poses,
+)
+os.environ["OPENCV_IO_ENABLE_OPENEXR"]="1"
+import re
+def spherical_camera_pose(azimuths: np.ndarray, elevations: np.ndarray, radius=2.5):
+    azimuths = np.deg2rad(azimuths)
+    elevations = np.deg2rad(elevations)
+    xs = radius * np.cos(elevations) * np.cos(azimuths)
+    ys = radius * np.cos(elevations) * np.sin(azimuths)
+    zs = radius * np.sin(elevations)
+    cam_locations = np.stack([xs, ys, zs], axis=-1)
+    cam_locations = torch.from_numpy(cam_locations).float()
+    c2ws = center_looking_at_camera_pose(cam_locations)
+    return c2ws
+def find_matching_files(base_path, idx):
+    formatted_idx = '%03d' % idx
+    pattern = re.compile(r'^%s_\d+\.png$' % formatted_idx)
+    matching_files = []
+    if os.path.exists(base_path):
+        for filename in os.listdir(base_path):
+            if pattern.match(filename):
+                matching_files.append(filename)
+    return os.path.join(base_path, matching_files[0])
+def load_mipmap(env_path):
+    diffuse_path = os.path.join(env_path, "diffuse.pth")
+    diffuse = torch.load(diffuse_path, map_location=torch.device('cpu'))
+    specular = []
+    for i in range(6):
+        specular_path = os.path.join(env_path, f"specular_{i}.pth")
+        specular_tensor = torch.load(specular_path, map_location=torch.device('cpu'))
+        specular.append(specular_tensor)
+    return [specular, diffuse]
+def convert_to_white_bg(image, write_bg=True):
+    alpha = image[:, :, 3:]
+    if write_bg:
+        return image[:, :, :3] * alpha + 1. * (1 - alpha)
+    else:
+        return image[:, :, :3] * alpha
+def load_obj(path, return_attributes=False, scale_factor=1.0):
+    return obj.load_obj(path, clear_ks=True, mtl_override=None, return_attributes=return_attributes, scale_factor=scale_factor)
+def custom_collate_fn(batch):
+    return batch
+def collate_fn_wrapper(batch):
+    return custom_collate_fn(batch)
+class DataModuleFromConfig(pl.LightningDataModule):
+    def __init__(
+        self,
+        batch_size=8,
+        num_workers=4,
+        train=None,
+        validation=None,
+        test=None,
+        **kwargs,
+    ):
+        super().__init__()
+        self.batch_size = batch_size
+        self.num_workers = num_workers
+        self.dataset_configs = dict()
+        if train is not None:
+            self.dataset_configs['train'] = train
+        if validation is not None:
+            self.dataset_configs['validation'] = validation
+        if test is not None:
+            self.dataset_configs['test'] = test
+    def setup(self, stage):
+        if stage in ['fit']:
+            self.datasets = dict((k, instantiate_from_config(self.dataset_configs[k])) for k in self.dataset_configs)
+        else:
+            raise NotImplementedError
+    def custom_collate_fn(self, batch):
+        collated_batch = {}
+        for key in batch[0].keys():
+            if key == 'input_env' or key == 'target_env':
+                collated_batch[key] = [d[key] for d in batch]
+            else:
+                collated_batch[key] = torch.stack([d[key] for d in batch], dim=0)
+        return collated_batch
+    def convert_to_white_bg(self, image):
+        alpha = image[:, :, 3:]
+        return image[:, :, :3] * alpha + 1. * (1 - alpha)
+    def load_obj(self, path):
+        return obj.load_obj(path, clear_ks=True, mtl_override=None)
+    def train_dataloader(self):
+        sampler = DistributedSampler(self.datasets['train'])
+        return wds.WebLoader(self.datasets['train'], batch_size=self.batch_size, num_workers=self.num_workers, shuffle=False, sampler=sampler, collate_fn=collate_fn_wrapper)
+    def val_dataloader(self):
+        sampler = DistributedSampler(self.datasets['validation'])
+        return wds.WebLoader(self.datasets['validation'], batch_size=1, num_workers=self.num_workers, shuffle=False, sampler=sampler, collate_fn=collate_fn_wrapper)
+    def test_dataloader(self):
+        return wds.WebLoader(self.datasets['test'], batch_size=self.batch_size, num_workers=self.num_workers, shuffle=False)
+class ObjaverseData(Dataset):
+    def __init__(self,
+        root_dir='Objaverse_highQuality',
+        light_dir= 'env_mipmap',
+        input_view_num=6,
+        target_view_num=4,
+        total_view_n=18,
+        distance=3.5,
+        fov=50,
+        camera_random=False,
+        validation=False,
+    ):
+        self.root_dir = Path(root_dir)
+        self.light_dir = light_dir
+        self.all_env_name = []
+        for temp_dir in os.listdir(light_dir):
+            if os.listdir(os.path.join(self.light_dir, temp_dir)):
+                self.all_env_name.append(temp_dir)
+        self.input_view_num = input_view_num
+        self.target_view_num = target_view_num
+        self.total_view_n = total_view_n
+        self.fov = fov
+        self.camera_random = camera_random
+        self.train_res = [512, 512]
+        self.cam_near_far = [0.1, 1000.0]
+        self.fov_rad = np.deg2rad(fov)
+        self.fov_deg = fov
+        self.spp = 1
+        self.cam_radius = distance
+        self.layers = 1
+        numbers = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
+        self.combinations = list(itertools.product(numbers, repeat=2))
+        self.paths = os.listdir(self.root_dir)
+        # with open("BJ_Mesh_list.json", 'r') as file:
+        #     self.paths = json.load(file)
+        print('total training object num:', len(self.paths))
+        self.depth_scale = 6.0
+        total_objects = len(self.paths)
+        print('============= length of dataset %d =============' % total_objects)
+    def __len__(self):
+        return len(self.paths)
+    def load_obj(self, path):
+        return obj.load_obj(path, clear_ks=True, mtl_override=None)
+    def sample_spherical(self, phi, theta, cam_radius):
+        theta = np.deg2rad(theta)
+        phi = np.deg2rad(phi)
+        z = cam_radius * np.cos(phi) * np.sin(theta)
+        x = cam_radius * np.sin(phi) * np.sin(theta)
+        y = cam_radius * np.cos(theta)
+        return x, y, z
+    def _random_scene(self, cam_radius, fov_rad):
+        iter_res = self.train_res
+        proj_mtx = render_utils.perspective(fov_rad, iter_res[1] / iter_res[0], self.cam_near_far[0], self.cam_near_far[1])
+        azimuths = random.uniform(0, 360)
+        elevations = random.uniform(30, 150)
+        mv_embedding = spherical_camera_pose(azimuths, 90-elevations, cam_radius)
+        x, y, z = self.sample_spherical(azimuths, elevations, cam_radius)
+        eye = glm.vec3(x, y, z)
+        at = glm.vec3(0.0, 0.0, 0.0)
+        up = glm.vec3(0.0, 1.0, 0.0)
+        view_matrix = glm.lookAt(eye, at, up)
+        mv = torch.from_numpy(np.array(view_matrix))
+        mvp    = proj_mtx @ (mv)  #w2c
+        campos = torch.linalg.inv(mv)[:3, 3]
+        return mv[None, ...], mvp[None, ...], campos[None, ...], mv_embedding[None, ...], iter_res, self.spp # Add batch dimension
+    def load_im(self, path, color):
+        '''
+        replace background pixel with random color in rendering
+        '''
+        pil_img = Image.open(path)
+        image = np.asarray(pil_img, dtype=np.float32) / 255.
+        alpha = image[:, :, 3:]
+        image = image[:, :, :3] * alpha + color * (1 - alpha)
+        image = torch.from_numpy(image).permute(2, 0, 1).contiguous().float()
+        alpha = torch.from_numpy(alpha).permute(2, 0, 1).contiguous().float()
+        return image, alpha
+    def load_albedo(self, path, color, mask):
+        '''
+        replace background pixel with random color in rendering
+        '''
+        pil_img = Image.open(path)
+        image = np.asarray(pil_img, dtype=np.float32) / 255.
+        image = torch.from_numpy(image).permute(2, 0, 1).contiguous().float()
+        color = torch.ones_like(image)
+        image = image * mask + color * (1 - mask)
+        return image
+    def convert_to_white_bg(self, image):
+        alpha = image[:, :, 3:]
+        return image[:, :, :3] * alpha + 1. * (1 - alpha)
+    def calculate_fov(self, initial_distance, initial_fov, new_distance):
+        initial_fov_rad = math.radians(initial_fov)
+        height = 2 * initial_distance * math.tan(initial_fov_rad / 2)
+        new_fov_rad = 2 * math.atan(height / (2 * new_distance))
+        new_fov = math.degrees(new_fov_rad)
+        return new_fov
+    def __getitem__(self, index):
+        obj_path = os.path.join(self.root_dir, self.paths[index])
+        mesh_attributes = torch.load(obj_path, map_location=torch.device('cpu'))
+        pose_list = []
+        env_list = []
+        material_list = []
+        camera_pos = []
+        c2w_list = []
+        camera_embedding_list = []
+        random_env = False
+        random_mr = False
+        if random.random() > 0.5:
+            random_env = True
+        if random.random() > 0.5:
+            random_mr = True
+        selected_env = random.randint(0, len(self.all_env_name)-1)
+        materials = random.choice(self.combinations)
+        if self.camera_random:
+            random_perturbation = random.uniform(-1.5, 1.5)
+            cam_radius = self.cam_radius + random_perturbation
+            fov_deg = self.calculate_fov(initial_distance=self.cam_radius, initial_fov=self.fov_deg, new_distance=cam_radius)
+            fov_rad = np.deg2rad(fov_deg)
+        else:
+            cam_radius = self.cam_radius
+            fov_rad = self.fov_rad
+            fov_deg = self.fov_deg
+        if len(self.input_view_num) >= 1:
+            input_view_num = random.choice(self.input_view_num)
+        else:
+            input_view_num = self.input_view_num
+        for _ in range(input_view_num + self.target_view_num):
+            mv, mvp, campos, mv_mebedding, iter_res, iter_spp = self._random_scene(cam_radius, fov_rad)
+            if random_env:
+                selected_env = random.randint(0, len(self.all_env_name)-1)
+            env_path = os.path.join(self.light_dir, self.all_env_name[selected_env])
+            env = load_mipmap(env_path)
+            if random_mr:
+                materials = random.choice(self.combinations)
+            pose_list.append(mvp)
+            camera_pos.append(campos)
+            c2w_list.append(mv)
+            env_list.append(env)
+            material_list.append(materials)
+            camera_embedding_list.append(mv_mebedding)
+        data = {
+            'mesh_attributes': mesh_attributes,
+            'input_view_num': input_view_num,
+            'target_view_num': self.target_view_num,
+            'obj_path': obj_path,
+            'pose_list': pose_list,
+            'camera_pos': camera_pos,
+            'c2w_list': c2w_list,
+            'env_list': env_list,
+            'material_list': material_list,
+            'camera_embedding_list': camera_embedding_list,
+            'fov_deg':fov_deg,
+            'raduis': cam_radius
+        }
+        return data
+class ValidationData(Dataset):
+    def __init__(self,
+        root_dir='objaverse/',
+        input_view_num=6,
+        input_image_size=320,
+        fov=30,
+    ):
+        self.root_dir = Path(root_dir)
+        self.input_view_num = input_view_num
+        self.input_image_size = input_image_size
+        self.fov = fov
+        self.light_dir = 'env_mipmap'
+        # with open('Mesh_list.json') as f:
+        #     filtered_dict = json.load(f)
+        self.paths = os.listdir(self.root_dir)
+        # self.paths = filtered_dict
+        print('============= length of dataset %d =============' % len(self.paths))
+        cam_distance = 4.0
+        azimuths = np.array([30, 90, 150, 210, 270, 330])
+        elevations = np.array([20, -10, 20, -10, 20, -10])
+        azimuths = np.deg2rad(azimuths)
+        elevations = np.deg2rad(elevations)
+        x = cam_distance * np.cos(elevations) * np.cos(azimuths)
+        y = cam_distance * np.cos(elevations) * np.sin(azimuths)
+        z = cam_distance * np.sin(elevations)
+        cam_locations = np.stack([x, y, z], axis=-1)
+        cam_locations = torch.from_numpy(cam_locations).float()
+        c2ws = center_looking_at_camera_pose(cam_locations)
+        self.c2ws = c2ws.float()
+        self.Ks = FOV_to_intrinsics(self.fov).unsqueeze(0).repeat(6, 1, 1).float()
+        render_c2ws = get_circular_camera_poses(M=8, radius=cam_distance, elevation=20.0)
+        render_Ks = FOV_to_intrinsics(self.fov).unsqueeze(0).repeat(render_c2ws.shape[0], 1, 1)
+        self.render_c2ws = render_c2ws.float()
+        self.render_Ks = render_Ks.float()
+    def __len__(self):
+        return len(self.paths)
+    def load_im(self, path, color):
+        '''
+        replace background pixel with random color in rendering
+        '''
+        pil_img = Image.open(path)
+        pil_img = pil_img.resize((self.input_image_size, self.input_image_size), resample=Image.BICUBIC)
+        image = np.asarray(pil_img, dtype=np.float32) / 255.
+        if image.shape[-1] == 4:
+            alpha = image[:, :, 3:]
+            image = image[:, :, :3] * alpha + color * (1 - alpha)
+        else:
+            alpha = np.ones_like(image[:, :, :1])
+        image = torch.from_numpy(image).permute(2, 0, 1).contiguous().float()
+        alpha = torch.from_numpy(alpha).permute(2, 0, 1).contiguous().float()
+        return image, alpha
+    def load_mat(self, path, color):
+        '''
+        replace background pixel with random color in rendering
+        '''
+        pil_img = Image.open(path)
+        pil_img = pil_img.resize((384,384), resample=Image.BICUBIC)
+        image = np.asarray(pil_img, dtype=np.float32) / 255.
+        if image.shape[-1] == 4:
+            alpha = image[:, :, 3:]
+            image = image[:, :, :3] * alpha + color * (1 - alpha)
+        else:
+            alpha = np.ones_like(image[:, :, :1])
+        image = torch.from_numpy(image).permute(2, 0, 1).contiguous().float()
+        alpha = torch.from_numpy(alpha).permute(2, 0, 1).contiguous().float()
+        return image, alpha
+    def load_albedo(self, path, color, mask):
+        '''
+        replace background pixel with random color in rendering
+        '''
+        pil_img = Image.open(path)
+        pil_img = pil_img.resize((self.input_image_size, self.input_image_size), resample=Image.BICUBIC)
+        image = np.asarray(pil_img, dtype=np.float32) / 255.
+        image = torch.from_numpy(image).permute(2, 0, 1).contiguous().float()
+        color = torch.ones_like(image)
+        image = image * mask + color * (1 - mask)
+        return image
+    def __getitem__(self, index):
+        # load data
+        input_image_path = os.path.join(self.root_dir, self.paths[index])
+        '''background color, default: white'''
+        bkg_color = [1.0, 1.0, 1.0]
+        image_list = []
+        albedo_list = []
+        alpha_list = []
+        specular_list = []
+        diffuse_list = []
+        metallic_list = []
+        roughness_list = []
+        exist_comb_list = []
+        for subfolder in os.listdir(input_image_path):
+            found_numeric_subfolder=False
+            subfolder_path = os.path.join(input_image_path, subfolder)
+            if os.path.isdir(subfolder_path) and '_' in subfolder and 'specular' not in subfolder and 'diffuse' not in subfolder:
+                try:
+                    parts = subfolder.split('_')
+                    float(parts[0])  # 尝试将分隔符前后的字符串转换为浮点数
+                    float(parts[1])
+                    found_numeric_subfolder = True
+                except ValueError:
+                    continue
+            if found_numeric_subfolder:
+                exist_comb_list.append(subfolder)
+        selected_one_comb = random.choice(exist_comb_list)
+        for idx in range(self.input_view_num):
+            img_path = find_matching_files(os.path.join(input_image_path, selected_one_comb, 'rgb'), idx)
+            albedo_path = img_path.replace('rgb', 'albedo')
+            metallic_path = img_path.replace('rgb', 'metallic')
+            roughness_path = img_path.replace('rgb', 'roughness')
+            image, alpha = self.load_im(img_path, bkg_color)
+            albedo = self.load_albedo(albedo_path, bkg_color, alpha)
+            metallic,_ = self.load_mat(metallic_path, bkg_color)
+            roughness,_ = self.load_mat(roughness_path, bkg_color)
+            light_num = os.path.basename(img_path).split('_')[1].split('.')[0]
+            light_path = os.path.join(self.light_dir, str(int(light_num)+1))
+            specular, diffuse = load_mipmap(light_path)
+            image_list.append(image)
+            alpha_list.append(alpha)
+            albedo_list.append(albedo)
+            metallic_list.append(metallic)
+            roughness_list.append(roughness)
+            specular_list.append(specular)
+            diffuse_list.append(diffuse)
+        images = torch.stack(image_list, dim=0).float()
+        alphas = torch.stack(alpha_list, dim=0).float()
+        albedo = torch.stack(albedo_list, dim=0).float()
+        metallic = torch.stack(metallic_list, dim=0).float()
+        roughness = torch.stack(roughness_list, dim=0).float()
+        data = {
+            'input_images': images,
+            'input_alphas': alphas,
+            'input_c2ws': self.c2ws,
+            'input_Ks': self.Ks,
+            'input_albedos': albedo[:self.input_view_num],
+            'input_metallics': metallic[:self.input_view_num],
+            'input_roughness': roughness[:self.input_view_num],
+            'specular': specular_list[:self.input_view_num],
+            'diffuse': diffuse_list[:self.input_view_num],
+            'render_c2ws': self.render_c2ws,
+            'render_Ks': self.render_Ks,
+        }
+        return data
+if __name__ == '__main__':
+    dataset = ObjaverseData()
+    dataset.new(1)

src/model_mesh.py ADDED Viewed

	@@ -0,0 +1,642 @@

+import os
+import time
+import numpy as np
+import torch
+import torch.nn.functional as F
+import gc
+from torchvision.transforms import v2
+from torchvision.utils import make_grid, save_image
+from torchmetrics.image.lpip import LearnedPerceptualImagePatchSimilarity
+import pytorch_lightning as pl
+from einops import rearrange, repeat
+from src.utils.camera_util import FOV_to_intrinsics
+from src.utils.material import Material
+from src.utils.train_util import instantiate_from_config
+import nvdiffrast.torch as dr
+from src.utils import render
+from src.utils.mesh import Mesh, compute_tangents
+os.environ['PYOPENGL_PLATFORM'] = 'egl'
+# from pytorch3d.transforms import quaternion_to_matrix, euler_angles_to_matrix
+GLCTX = [None] * torch.cuda.device_count()
+def initialize_extension(gpu_id):
+    global GLCTX
+    if GLCTX[gpu_id] is None:
+        print(f"Initializing extension module renderutils_plugin on GPU {gpu_id}...")
+        torch.cuda.set_device(gpu_id)
+        GLCTX[gpu_id] = dr.RasterizeCudaContext()
+    return GLCTX[gpu_id]
+# Regulrarization loss for FlexiCubes
+def sdf_reg_loss_batch(sdf, all_edges):
+    sdf_f1x6x2 = sdf[:, all_edges.reshape(-1)].reshape(sdf.shape[0], -1, 2)
+    mask = torch.sign(sdf_f1x6x2[..., 0]) != torch.sign(sdf_f1x6x2[..., 1])
+    sdf_f1x6x2 = sdf_f1x6x2[mask]
+    sdf_diff = F.binary_cross_entropy_with_logits(
+        sdf_f1x6x2[..., 0], (sdf_f1x6x2[..., 1] > 0).float()) + \
+               F.binary_cross_entropy_with_logits(
+                   sdf_f1x6x2[..., 1], (sdf_f1x6x2[..., 0] > 0).float())
+    return sdf_diff
+def rotate_x(a, device=None):
+    s, c = np.sin(a), np.cos(a)
+    return torch.tensor([[1, 0, 0, 0],
+                         [0, c,-s, 0],
+                         [0, s, c, 0],
+                         [0, 0, 0, 1]], dtype=torch.float32, device=device)
+def convert_to_white_bg(image, write_bg=True):
+    alpha = image[:, :, 3:]
+    if write_bg:
+        return image[:, :, :3] * alpha + 1. * (1 - alpha)
+    else:
+        return image[:, :, :3] * alpha
+class MVRecon(pl.LightningModule):
+    def __init__(
+        self,
+        lrm_generator_config,
+        input_size=256,
+        render_size=512,
+        init_ckpt=None,
+        use_tv_loss=True,
+        mesh_save_root="Objaverse_highQuality",
+        sample_points=None,
+        use_gt_albedo=False,
+    ):
+        super(MVRecon, self).__init__()
+        self.use_gt_albedo = use_gt_albedo
+        self.use_tv_loss = use_tv_loss
+        self.input_size = input_size
+        self.render_size = render_size
+        self.mesh_save_root = mesh_save_root
+        self.sample_points = sample_points
+        self.lrm_generator = instantiate_from_config(lrm_generator_config)
+        self.lpips = LearnedPerceptualImagePatchSimilarity(net_type='vgg')
+        if init_ckpt is not None:
+            sd = torch.load(init_ckpt, map_location='cpu')['state_dict']
+            sd = {k: v for k, v in sd.items() if k.startswith('lrm_generator')}
+            sd_fc = {}
+            for k, v in sd.items():
+                if k.startswith('lrm_generator.synthesizer.decoder.net.'):
+                    if k.startswith('lrm_generator.synthesizer.decoder.net.6.'):    # last layer
+                        # Here we assume the density filed's isosurface threshold is t,
+                        # we reverse the sign of density filed to initialize SDF field.
+                        # -(w*x + b - t) = (-w)*x + (t - b)
+                        if 'weight' in k:
+                            sd_fc[k.replace('net.', 'net_sdf.')] = -v[0:1]
+                        else:
+                            sd_fc[k.replace('net.', 'net_sdf.')] = 10.0 - v[0:1]
+                        sd_fc[k.replace('net.', 'net_rgb.')] = v[1:4]
+                    else:
+                        sd_fc[k.replace('net.', 'net_sdf.')] = v
+                        sd_fc[k.replace('net.', 'net_rgb.')] = v
+                else:
+                    sd_fc[k] = v
+            sd_fc = {k.replace('lrm_generator.', ''): v for k, v in sd_fc.items()}
+            # missing `net_deformation` and `net_weight` parameters
+            self.lrm_generator.load_state_dict(sd_fc, strict=False)
+            print(f'Loaded weights from {init_ckpt}')
+        self.validation_step_outputs = []
+    def on_fit_start(self):
+        device = torch.device(f'cuda:{self.local_rank}')
+        self.lrm_generator.init_flexicubes_geometry(device)
+        if self.global_rank == 0:
+            os.makedirs(os.path.join(self.logdir, 'images'), exist_ok=True)
+            os.makedirs(os.path.join(self.logdir, 'images_val'), exist_ok=True)
+    def collate_fn(self, batch):
+        gpu_id = torch.cuda.current_device()  # 获取当前线程的 GPU ID
+        glctx = initialize_extension(gpu_id)
+        batch_size = len(batch)
+        input_view_num = batch[0]["input_view_num"]
+        target_view_num = batch[0]["target_view_num"]
+        iter_res = [512, 512]
+        iter_spp = 1
+        layers = 1
+        # Initialize lists for input and target data
+        input_images, input_alphas, input_depths, input_normals, input_albedos = [], [], [], [], []
+        input_spec_light, input_diff_light, input_spec_albedo,input_diff_albedo = [], [], [], []
+        input_w2cs, input_Ks, input_camera_pos, input_c2ws = [], [], [], []
+        input_env, input_materials = [], []
+        input_camera_embeddings = []    # camera_embedding_list
+        target_images, target_alphas, target_depths, target_normals, target_albedos = [], [], [], [], []
+        target_spec_light, target_diff_light, target_spec_albedo, target_diff_albedo = [], [], [], []
+        target_w2cs, target_Ks, target_camera_pos = [], [], []
+        target_env, target_materials = [], []
+        for sample in batch:
+            obj_path = sample['obj_path']
+            with torch.no_grad():
+                mesh_attributes = sample['mesh_attributes']
+                v_pos = mesh_attributes["v_pos"].to(self.device)
+                v_nrm = mesh_attributes["v_nrm"].to(self.device)
+                v_tex = mesh_attributes["v_tex"].to(self.device)
+                v_tng = mesh_attributes["v_tng"].to(self.device)
+                t_pos_idx = mesh_attributes["t_pos_idx"].to(self.device)
+                t_nrm_idx = mesh_attributes["t_nrm_idx"].to(self.device)
+                t_tex_idx = mesh_attributes["t_tex_idx"].to(self.device)
+                t_tng_idx = mesh_attributes["t_tng_idx"].to(self.device)
+                material = Material(mesh_attributes["mat_dict"])
+                material = material.to(self.device)
+                ref_mesh = Mesh(v_pos=v_pos, v_nrm=v_nrm, v_tex=v_tex, v_tng=v_tng,
+                                t_pos_idx=t_pos_idx, t_nrm_idx=t_nrm_idx,
+                                t_tex_idx=t_tex_idx, t_tng_idx=t_tng_idx, material=material)
+            pose_list_sample = sample['pose_list']  # mvp
+            camera_pos_sample = sample['camera_pos'] # campos, mv.inverse
+            c2w_list_sample = sample['c2w_list']    # mv
+            env_list_sample = sample['env_list']
+            material_list_sample = sample['material_list']
+            camera_embeddings = sample["camera_embedding_list"]
+            fov_deg = sample['fov_deg']
+            raduis = sample['raduis']
+            # print(f"fov_deg:{fov_deg}, raduis:{raduis}")
+            sample_input_images, sample_input_alphas, sample_input_depths, sample_input_normals, sample_input_albedos = [], [], [], [], []
+            sample_input_w2cs, sample_input_Ks, sample_input_camera_pos, sample_input_c2ws = [], [], [], []
+            sample_input_camera_embeddings = []
+            sample_input_spec_light, sample_input_diff_light = [], []
+            sample_target_images, sample_target_alphas, sample_target_depths, sample_target_normals, sample_target_albedos = [], [], [], [], []
+            sample_target_w2cs, sample_target_Ks, sample_target_camera_pos = [], [], []
+            sample_target_spec_light, sample_target_diff_light = [], []
+            sample_input_env = []
+            sample_input_materials = []
+            sample_target_env = []
+            sample_target_materials = []
+            for i in range(len(pose_list_sample)):
+                mvp = pose_list_sample[i]
+                campos = camera_pos_sample[i]
+                env = env_list_sample[i]
+                materials = material_list_sample[i]
+                camera_embedding = camera_embeddings[i]
+                with torch.no_grad():
+                    buffer_dict = render.render_mesh(glctx, ref_mesh, mvp.to(self.device), campos.to(self.device), [env], None, None,
+                                                    materials, iter_res, spp=iter_spp, num_layers=layers, msaa=True,
+                                                    background=None, gt_render=True)
+                image = convert_to_white_bg(buffer_dict['shaded'][0])
+                albedo = convert_to_white_bg(buffer_dict['albedo'][0]).clamp(0., 1.)
+                alpha = buffer_dict['mask'][0][:, :, 3:]
+                depth = convert_to_white_bg(buffer_dict['depth'][0])
+                normal = convert_to_white_bg(buffer_dict['gb_normal'][0], write_bg=False)
+                spec_light = convert_to_white_bg(buffer_dict['spec_light'][0])
+                diff_light = convert_to_white_bg(buffer_dict['diff_light'][0])
+                if i < input_view_num:
+                    sample_input_images.append(image)
+                    sample_input_albedos.append(albedo)
+                    sample_input_alphas.append(alpha)
+                    sample_input_depths.append(depth)
+                    sample_input_normals.append(normal)
+                    sample_input_spec_light.append(spec_light)
+                    sample_input_diff_light.append(diff_light)
+                    sample_input_w2cs.append(mvp)
+                    sample_input_camera_pos.append(campos)
+                    sample_input_c2ws.append(c2w_list_sample[i])
+                    sample_input_Ks.append(FOV_to_intrinsics(fov_deg))
+                    sample_input_env.append(env)
+                    sample_input_materials.append(materials)
+                    sample_input_camera_embeddings.append(camera_embedding)
+                else:
+                    sample_target_images.append(image)
+                    sample_target_albedos.append(albedo)
+                    sample_target_alphas.append(alpha)
+                    sample_target_depths.append(depth)
+                    sample_target_normals.append(normal)
+                    sample_target_spec_light.append(spec_light)
+                    sample_target_diff_light.append(diff_light)
+                    sample_target_w2cs.append(mvp)
+                    sample_target_camera_pos.append(campos)
+                    sample_target_Ks.append(FOV_to_intrinsics(fov_deg))
+                    sample_target_env.append(env)
+                    sample_target_materials.append(materials)
+            input_images.append(torch.stack(sample_input_images, dim=0).permute(0, 3, 1, 2))
+            input_albedos.append(torch.stack(sample_input_albedos, dim=0).permute(0, 3, 1, 2))
+            input_alphas.append(torch.stack(sample_input_alphas, dim=0).permute(0, 3, 1, 2))
+            input_depths.append(torch.stack(sample_input_depths, dim=0).permute(0, 3, 1, 2))
+            input_normals.append(torch.stack(sample_input_normals, dim=0).permute(0, 3, 1, 2))
+            input_spec_light.append(torch.stack(sample_input_spec_light, dim=0).permute(0, 3, 1, 2))
+            input_diff_light.append(torch.stack(sample_input_diff_light, dim=0).permute(0, 3, 1, 2))
+            input_w2cs.append(torch.stack(sample_input_w2cs, dim=0))
+            input_camera_pos.append(torch.stack(sample_input_camera_pos, dim=0))
+            input_c2ws.append(torch.stack(sample_input_c2ws, dim=0))
+            input_camera_embeddings.append(torch.stack(sample_input_camera_embeddings, dim=0))
+            input_Ks.append(torch.stack(sample_input_Ks, dim=0))
+            input_env.append(sample_input_env)
+            input_materials.append(sample_input_materials)
+            target_images.append(torch.stack(sample_target_images, dim=0).permute(0, 3, 1, 2))
+            target_albedos.append(torch.stack(sample_target_albedos, dim=0).permute(0, 3, 1, 2))
+            target_alphas.append(torch.stack(sample_target_alphas, dim=0).permute(0, 3, 1, 2))
+            target_depths.append(torch.stack(sample_target_depths, dim=0).permute(0, 3, 1, 2))
+            target_normals.append(torch.stack(sample_target_normals, dim=0).permute(0, 3, 1, 2))
+            target_spec_light.append(torch.stack(sample_target_spec_light, dim=0).permute(0, 3, 1, 2))
+            target_diff_light.append(torch.stack(sample_target_diff_light, dim=0).permute(0, 3, 1, 2))
+            target_w2cs.append(torch.stack(sample_target_w2cs, dim=0))
+            target_camera_pos.append(torch.stack(sample_target_camera_pos, dim=0))
+            target_Ks.append(torch.stack(sample_target_Ks, dim=0))
+            target_env.append(sample_target_env)
+            target_materials.append(sample_target_materials)
+            del ref_mesh
+            del material
+            del mesh_attributes
+            torch.cuda.empty_cache()
+            gc.collect()
+        data = {
+            'input_images': torch.stack(input_images, dim=0).detach().cpu(),           # (batch_size, input_view_num, 3, H, W)
+            'input_alphas': torch.stack(input_alphas, dim=0).detach().cpu(),           # (batch_size, input_view_num, 1, H, W)
+            'input_depths': torch.stack(input_depths, dim=0).detach().cpu(),
+            'input_normals': torch.stack(input_normals, dim=0).detach().cpu(),
+            'input_albedos': torch.stack(input_albedos, dim=0).detach().cpu(),
+            'input_spec_light': torch.stack(input_spec_light, dim=0).detach().cpu(),
+            'input_diff_light': torch.stack(input_diff_light, dim=0).detach().cpu(),
+            'input_materials': input_materials,
+            'input_w2cs': torch.stack(input_w2cs, dim=0).squeeze(2),               # (batch_size, input_view_num, 4, 4)
+            'input_Ks': torch.stack(input_Ks, dim=0).float(),                   # (batch_size, input_view_num, 3, 3)
+            'input_env': input_env,
+            'input_camera_pos': torch.stack(input_camera_pos, dim=0).squeeze(2),   # (batch_size, input_view_num, 3)
+            'input_c2ws': torch.stack(input_c2ws, dim=0).squeeze(2),               # (batch_size, input_view_num, 4, 4)
+            'input_camera_embedding': torch.stack(input_camera_embeddings, dim=0).squeeze(2),
+            'target_sample_points': None,
+            'target_images': torch.stack(target_images, dim=0).detach().cpu(),         # (batch_size, target_view_num, 3, H, W)
+            'target_alphas': torch.stack(target_alphas, dim=0).detach().cpu(),         # (batch_size, target_view_num, 1, H, W)
+            'target_depths': torch.stack(target_depths, dim=0).detach().cpu(),
+            'target_normals': torch.stack(target_normals, dim=0).detach().cpu(),
+            'target_albedos': torch.stack(target_albedos, dim=0).detach().cpu(),
+            'target_spec_light': torch.stack(target_spec_light, dim=0).detach().cpu(),
+            'target_diff_light': torch.stack(target_diff_light, dim=0).detach().cpu(),
+            'target_materials': target_materials,
+            'target_w2cs': torch.stack(target_w2cs, dim=0).squeeze(2),             # (batch_size, target_view_num, 4, 4)
+            'target_Ks': torch.stack(target_Ks, dim=0).float(),                 # (batch_size, target_view_num, 3, 3)
+            'target_env': target_env,
+            'target_camera_pos': torch.stack(target_camera_pos, dim=0).squeeze(2)  # (batch_size, target_view_num, 3)
+        }
+        return data
+    def prepare_batch_data(self, batch):
+        # breakpoint()
+        lrm_generator_input = {}
+        render_gt = {}
+        # input images
+        images = batch['input_images']
+        images = v2.functional.resize(images, self.input_size, interpolation=3, antialias=True).clamp(0, 1)
+        batch_size = images.shape[0]
+        # breakpoint()
+        lrm_generator_input['images'] = images.to(self.device)
+        # input cameras and render cameras
+        # input_c2ws = batch['input_c2ws']
+        input_Ks = batch['input_Ks']
+        # target_c2ws = batch['target_c2ws']
+        input_camera_embedding = batch["input_camera_embedding"].to(self.device)
+        input_w2cs = batch['input_w2cs']
+        target_w2cs = batch['target_w2cs']
+        render_w2cs =  torch.cat([input_w2cs, target_w2cs], dim=1)
+        input_camera_pos = batch['input_camera_pos']
+        target_camera_pos = batch['target_camera_pos']
+        render_camera_pos = torch.cat([input_camera_pos, target_camera_pos], dim=1)
+        input_extrinsics = input_camera_embedding.flatten(-2)
+        input_extrinsics = input_extrinsics[:, :, :12]
+        input_intrinsics = input_Ks.flatten(-2).to(self.device)
+        input_intrinsics = torch.stack([
+            input_intrinsics[:, :, 0], input_intrinsics[:, :, 4],
+            input_intrinsics[:, :, 2], input_intrinsics[:, :, 5],
+        ], dim=-1)
+        cameras = torch.cat([input_extrinsics, input_intrinsics], dim=-1)
+        # add noise to input_cameras
+        cameras = cameras + torch.rand_like(cameras) * 0.04 - 0.02
+        lrm_generator_input['cameras'] = cameras.to(self.device)
+        lrm_generator_input['render_cameras'] =  render_w2cs.to(self.device)
+        lrm_generator_input['cameras_pos'] = render_camera_pos.to(self.device)
+        lrm_generator_input['env'] = []
+        lrm_generator_input['materials'] = []
+        for i in range(batch_size):
+            lrm_generator_input['env'].append( batch['input_env'][i] + batch['target_env'][i])
+            lrm_generator_input['materials'].append( batch['input_materials'][i] +  batch['target_materials'][i])
+        lrm_generator_input['albedo'] = torch.cat([batch['input_albedos'],batch['target_albedos']],dim=1)
+        # target images
+        target_images = torch.cat([batch['input_images'], batch['target_images']], dim=1)
+        target_albedos = torch.cat([batch['input_albedos'], batch['target_albedos']], dim=1)
+        target_depths = torch.cat([batch['input_depths'], batch['target_depths']], dim=1)
+        target_alphas = torch.cat([batch['input_alphas'], batch['target_alphas']], dim=1)
+        target_normals = torch.cat([batch['input_normals'], batch['target_normals']], dim=1)
+        target_spec_lights = torch.cat([batch['input_spec_light'], batch['target_spec_light']], dim=1)
+        target_diff_lights = torch.cat([batch['input_diff_light'], batch['target_diff_light']], dim=1)
+        render_size = self.render_size
+        target_images = v2.functional.resize(
+            target_images, render_size, interpolation=3, antialias=True).clamp(0, 1)
+        target_depths = v2.functional.resize(
+            target_depths, render_size, interpolation=0, antialias=True)
+        target_alphas = v2.functional.resize(
+            target_alphas, render_size, interpolation=0, antialias=True)
+        target_normals = v2.functional.resize(
+            target_normals, render_size, interpolation=3, antialias=True)
+        lrm_generator_input['render_size'] = render_size
+        render_gt['target_sample_points'] = batch['target_sample_points']
+        render_gt['target_images'] = target_images.to(self.device)
+        render_gt['target_albedos'] = target_albedos.to(self.device)
+        render_gt['target_depths'] = target_depths.to(self.device)
+        render_gt['target_alphas'] = target_alphas.to(self.device)
+        render_gt['target_normals'] = target_normals.to(self.device)
+        render_gt['target_spec_lights'] = target_spec_lights.to(self.device)
+        render_gt['target_diff_lights'] = target_diff_lights.to(self.device)
+        # render_gt['target_spec_albedos'] = target_spec_albedos.to(self.device)
+        # render_gt['target_diff_albedos'] = target_diff_albedos.to(self.device)
+        return lrm_generator_input, render_gt
+    def prepare_validation_batch_data(self, batch):
+        lrm_generator_input = {}
+        # input images
+        images = batch['input_images']
+        images = v2.functional.resize(
+            images, self.input_size, interpolation=3, antialias=True).clamp(0, 1)
+        lrm_generator_input['images'] = images.to(self.device)
+        lrm_generator_input['specular_light'] = batch['specular']
+        lrm_generator_input['diffuse_light'] = batch['diffuse']
+        lrm_generator_input['metallic'] = batch['input_metallics']
+        lrm_generator_input['roughness'] = batch['input_roughness']
+        proj = self.perspective(0.449, 1,  0.1, 1000., self.device)
+        # input cameras
+        input_c2ws = batch['input_c2ws'].flatten(-2)
+        input_Ks = batch['input_Ks'].flatten(-2)
+        input_extrinsics = input_c2ws[:, :, :12]
+        input_intrinsics = torch.stack([
+            input_Ks[:, :, 0], input_Ks[:, :, 4],
+            input_Ks[:, :, 2], input_Ks[:, :, 5],
+        ], dim=-1)
+        cameras = torch.cat([input_extrinsics, input_intrinsics], dim=-1)
+        lrm_generator_input['cameras'] = cameras.to(self.device)
+        # render cameras
+        render_c2ws = batch['render_c2ws']
+        lrm_generator_input['camera_pos'] =  torch.linalg.inv(render_w2cs.to(self.device) @ rotate_x(np.pi / 2, self.device))[..., :3, 3]
+        render_w2cs = ( render_w2cs @ rotate_x(np.pi / 2) )
+        lrm_generator_input['render_cameras'] = render_w2cs.to(self.device)
+        lrm_generator_input['render_size'] = 384
+        return lrm_generator_input
+    def forward_lrm_generator(self, images, cameras, camera_pos,env, materials, albedo_map, render_cameras, render_size=512, sample_points=None, gt_albedo_map=None):
+        planes = torch.utils.checkpoint.checkpoint(
+            self.lrm_generator.forward_planes,
+            images,
+            cameras,
+            use_reentrant=False,
+        )
+        out = self.lrm_generator.forward_geometry(
+            planes,
+            render_cameras,
+            camera_pos,
+            env,
+            materials,
+            albedo_map,
+            render_size,
+            sample_points,
+            gt_albedo_map
+        )
+        return out
+    def forward(self, lrm_generator_input, gt_albedo_map=None):
+        images = lrm_generator_input['images']
+        cameras = lrm_generator_input['cameras']
+        render_cameras = lrm_generator_input['render_cameras']
+        render_size = lrm_generator_input['render_size']
+        env = lrm_generator_input['env']
+        materials = lrm_generator_input['materials']
+        albedo_map = lrm_generator_input['albedo']
+        camera_pos = lrm_generator_input['cameras_pos']
+        out = self.forward_lrm_generator(
+            images, cameras, camera_pos, env, materials, albedo_map, render_cameras, render_size=render_size, sample_points=self.sample_points, gt_albedo_map=gt_albedo_map)
+        return out
+    def training_step(self, batch, batch_idx):
+        batch = self.collate_fn(batch)
+        lrm_generator_input, render_gt = self.prepare_batch_data(batch)
+        if self.use_gt_albedo:
+            gt_albedo_map = render_gt['target_albedos']
+        else:
+            gt_albedo_map = None
+        render_out = self.forward(lrm_generator_input, gt_albedo_map=gt_albedo_map)
+        loss, loss_dict = self.compute_loss(render_out, render_gt)
+        self.log_dict(loss_dict, prog_bar=True, logger=True, on_step=True, on_epoch=True, batch_size=len(batch['input_images']), sync_dist=True)
+        if self.global_step % 20 == 0 and self.global_rank == 0 :
+            B, N, C, H, W = render_gt['target_images'].shape
+            N_in = lrm_generator_input['images'].shape[1]
+            target_images = rearrange(render_gt['target_images'], 'b n c h w -> b c h (n w)')
+            render_images = rearrange(render_out['pbr_img'], 'b n c h w -> b c h (n w)')
+            target_alphas = rearrange(repeat(render_gt['target_alphas'], 'b n 1 h w -> b n 3 h w'), 'b n c h w -> b c h (n w)')
+            target_spec_light =  rearrange(render_gt['target_spec_lights'], 'b n c h w -> b c h (n w)')
+            target_diff_light =  rearrange(render_gt['target_diff_lights'], 'b n c h w -> b c h (n w)')
+            render_alphas = rearrange(render_out['mask'], 'b n c h w -> b c h (n w)')
+            render_albodos =  rearrange(render_out['albedo'], 'b n c h w -> b c h (n w)')
+            target_albedos = rearrange(render_gt['target_albedos'], 'b n c h w -> b c h (n w)')
+            render_spec_light = rearrange(render_out['pbr_spec_light'], 'b n c h w -> b c h (n w)')
+            render_diffuse_light = rearrange(render_out['pbr_diffuse_light'], 'b n c h w -> b c h (n w)')
+            render_normal = rearrange(render_out['normal_img'], 'b n c h w -> b c h (n w)')
+            target_depths = rearrange(render_gt['target_depths'], 'b n c h w -> b c h (n w)')
+            render_depths = rearrange(render_out['depth'], 'b n c h w -> b c h (n w)')
+            target_normals = rearrange(render_gt['target_normals'], 'b n c h w -> b c h (n w)')
+            MAX_DEPTH = torch.max(target_depths)
+            target_depths = target_depths / MAX_DEPTH * target_alphas
+            render_depths = render_depths / MAX_DEPTH * render_alphas
+            grid = torch.cat([
+                target_images, render_images,
+                target_alphas, render_alphas,
+                target_albedos, render_albodos,
+                target_spec_light, render_spec_light,
+                target_diff_light, render_diffuse_light,
+                (target_normals+1)/2, (render_normal+1)/2,
+                target_depths, render_depths
+            ], dim=-2).detach().cpu()
+            grid = make_grid(grid, nrow=target_images.shape[0], normalize=True, value_range=(0, 1))
+            image_path = os.path.join(self.logdir, 'images', f'train_{self.global_step:07d}.png')
+            save_image(grid, image_path)
+            print(f"Saved image to {image_path}")
+        return loss
+    def total_variation_loss(self, img, beta=2.0):
+        bs_img, n_view, c_img, h_img, w_img = img.size()
+        tv_h = torch.pow(img[...,1:,:]-img[...,:-1,:], beta).sum()
+        tv_w = torch.pow(img[...,:,1:]-img[...,:,:-1], beta).sum()
+        return (tv_h+tv_w)/(bs_img*n_view*c_img*h_img*w_img)
+    def compute_loss(self, render_out, render_gt):
+        # NOTE: the rgb value range of OpenLRM is [0, 1]
+        render_albedo_image = render_out['albedo']
+        render_pbr_image = render_out['pbr_img']
+        render_spec_light = render_out['pbr_spec_light']
+        render_diff_light = render_out['pbr_diffuse_light']
+        target_images = render_gt['target_images'].to(render_albedo_image)
+        target_albedos = render_gt['target_albedos'].to(render_albedo_image)
+        target_spec_light = render_gt['target_spec_lights'].to(render_albedo_image)
+        target_diff_light = render_gt['target_diff_lights'].to(render_albedo_image)
+        render_images = rearrange(render_pbr_image, 'b n ... -> (b n) ...') * 2.0 - 1.0
+        target_images = rearrange(target_images, 'b n ... -> (b n) ...') * 2.0 - 1.0
+        render_albedos = rearrange(render_albedo_image, 'b n ... -> (b n) ...') * 2.0 - 1.0
+        target_albedos = rearrange(target_albedos, 'b n ... -> (b n) ...') * 2.0 - 1.0
+        render_spec_light = rearrange(render_spec_light, 'b n ... -> (b n) ...') * 2.0 - 1.0
+        target_spec_light = rearrange(target_spec_light, 'b n ... -> (b n) ...') * 2.0 - 1.0
+        render_diff_light = rearrange(render_diff_light, 'b n ... -> (b n) ...') * 2.0 - 1.0
+        target_diff_light = rearrange(target_diff_light, 'b n ... -> (b n) ...') * 2.0 - 1.0
+        loss_mse = F.mse_loss(render_images, target_images)
+        loss_mse_albedo = F.mse_loss(render_albedos, target_albedos)
+        loss_rgb_lpips = 2.0 * self.lpips(render_images, target_images)
+        loss_albedo_lpips =  2.0 * self.lpips(render_albedos, target_albedos)
+        loss_spec_light = F.mse_loss(render_spec_light, target_spec_light)
+        loss_diff_light = F.mse_loss(render_diff_light, target_diff_light)
+        loss_spec_light_lpips = 2.0 * self.lpips(render_spec_light.clamp(-1., 1.), target_spec_light.clamp(-1., 1.))
+        loss_diff_light_lpips = 2.0 * self.lpips(render_diff_light.clamp(-1., 1.), target_diff_light.clamp(-1., 1.))
+        render_alphas = render_out['mask'][:,:,:1,:,:]
+        target_alphas = render_gt['target_alphas']
+        loss_mask = F.mse_loss(render_alphas, target_alphas)
+        render_depths = torch.mean(render_out['depth'], dim=2, keepdim=True)
+        target_depths = torch.mean(render_gt['target_depths'], dim=2, keepdim=True)
+        loss_depth = 0.5 * F.l1_loss(render_depths[(target_alphas>0)], target_depths[target_alphas>0])
+        render_normals = render_out['normal'][...,:3].permute(0,3,1,2).unsqueeze(0)
+        target_normals = render_gt['target_normals']
+        similarity = (render_normals * target_normals).sum(dim=-3).abs()
+        normal_mask = target_alphas.squeeze(-3)
+        loss_normal = 1 - similarity[normal_mask>0].mean()
+        loss_normal = 0.2 * loss_normal * 1.0
+        # tv loss
+        if self.use_tv_loss:
+            triplane = render_out['triplane']
+            tv_loss = self.total_variation_loss(triplane, beta=2.0)
+        # flexicubes regularization loss
+        sdf = render_out['sdf']
+        sdf_reg_loss = render_out['sdf_reg_loss']
+        sdf_reg_loss_entropy = sdf_reg_loss_batch(sdf, self.lrm_generator.geometry.all_edges).mean() * 0.01
+        _, flexicubes_surface_reg, flexicubes_weights_reg = sdf_reg_loss
+        flexicubes_surface_reg = flexicubes_surface_reg.mean() * 0.5
+        flexicubes_weights_reg = flexicubes_weights_reg.mean() * 0.1
+        loss_reg = sdf_reg_loss_entropy + flexicubes_surface_reg + flexicubes_weights_reg
+        loss_reg = loss_reg
+        loss = loss_mse + loss_rgb_lpips + loss_albedo_lpips + loss_mask + loss_reg + loss_mse_albedo + loss_depth + \
+            loss_normal + loss_spec_light + loss_diff_light + loss_spec_light_lpips + loss_diff_light_lpips
+        if self.use_tv_loss:
+            loss += tv_loss * 2e-4
+        prefix = 'train'
+        loss_dict = {}
+        loss_dict.update({f'{prefix}/loss_mse': loss_mse.item()})
+        loss_dict.update({f'{prefix}/loss_mse_albedo': loss_mse_albedo.item()})
+        loss_dict.update({f'{prefix}/loss_rgb_lpips': loss_rgb_lpips.item()})
+        loss_dict.update({f'{prefix}/loss_albedo_lpips': loss_albedo_lpips.item()})
+        loss_dict.update({f'{prefix}/loss_mask': loss_mask.item()})
+        loss_dict.update({f'{prefix}/loss_normal': loss_normal.item()})
+        loss_dict.update({f'{prefix}/loss_depth': loss_depth.item()})
+        loss_dict.update({f'{prefix}/loss_spec_light': loss_spec_light.item()})
+        loss_dict.update({f'{prefix}/loss_diff_light': loss_diff_light.item()})
+        loss_dict.update({f'{prefix}/loss_spec_light_lpips': loss_spec_light_lpips.item()})
+        loss_dict.update({f'{prefix}/loss_diff_light_lpips': loss_diff_light_lpips.item()})
+        loss_dict.update({f'{prefix}/loss_reg_sdf': sdf_reg_loss_entropy.item()})
+        loss_dict.update({f'{prefix}/loss_reg_surface': flexicubes_surface_reg.item()})
+        loss_dict.update({f'{prefix}/loss_reg_weights': flexicubes_weights_reg.item()})
+        if self.use_tv_loss:
+            loss_dict.update({f'{prefix}/loss_tv': tv_loss.item()})
+        loss_dict.update({f'{prefix}/loss': loss.item()})
+        return loss, loss_dict
+    @torch.no_grad()
+    def validation_step(self, batch, batch_idx):
+        lrm_generator_input = self.prepare_validation_batch_data(batch)
+        render_out = self.forward(lrm_generator_input)
+        render_images = rearrange(render_out['pbr_img'], 'b n c h w -> b c h (n w)')
+        render_albodos =  rearrange(render_out['img'], 'b n c h w -> b c h (n w)')
+        self.validation_step_outputs.append(render_images)
+        self.validation_step_outputs.append(render_albodos)
+    def on_validation_epoch_end(self):
+        images = torch.cat(self.validation_step_outputs, dim=0)
+        all_images = self.all_gather(images)
+        all_images = rearrange(all_images, 'r b c h w -> (r b) c h w')
+        if self.global_rank == 0:
+            image_path = os.path.join(self.logdir, 'images_val', f'val_{self.global_step:07d}.png')
+            grid = make_grid(all_images, nrow=1, normalize=True, value_range=(0, 1))
+            save_image(grid, image_path)
+            print(f"Saved image to {image_path}")
+        self.validation_step_outputs.clear()
+    def configure_optimizers(self):
+        lr = self.learning_rate
+        optimizer = torch.optim.AdamW(
+            self.lrm_generator.parameters(), lr=lr, betas=(0.90, 0.95), weight_decay=0.01)
+        scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, 100000, eta_min=0)
+        return {'optimizer': optimizer, 'lr_scheduler': scheduler}

src/models/__init__.py ADDED Viewed

File without changes

src/models/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (146 Bytes). View file

src/models/__pycache__/lrm_mesh.cpython-310.pyc ADDED Viewed

Binary file (11.6 kB). View file

src/models/decoder/__init__.py ADDED Viewed

File without changes

src/models/decoder/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (154 Bytes). View file

src/models/decoder/__pycache__/transformer.cpython-310.pyc ADDED Viewed

Binary file (3.45 kB). View file

src/models/decoder/transformer.py ADDED Viewed

	@@ -0,0 +1,123 @@

+# Copyright (c) 2023, Zexin He
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+import torch.nn as nn
+class BasicTransformerBlock(nn.Module):
+    """
+    Transformer block that takes in a cross-attention condition and another modulation vector applied to sub-blocks.
+    """
+    # use attention from torch.nn.MultiHeadAttention
+    # Block contains a cross-attention layer, a self-attention layer, and a MLP
+    def __init__(
+        self,
+        inner_dim: int,
+        cond_dim: int,
+        num_heads: int,
+        eps: float,
+        attn_drop: float = 0.,
+        attn_bias: bool = False,
+        mlp_ratio: float = 4.,
+        mlp_drop: float = 0.,
+    ):
+        super().__init__()
+        self.norm1 = nn.LayerNorm(inner_dim)
+        self.cross_attn = nn.MultiheadAttention(
+            embed_dim=inner_dim, num_heads=num_heads, kdim=cond_dim, vdim=cond_dim,
+            dropout=attn_drop, bias=attn_bias, batch_first=True)
+        self.norm2 = nn.LayerNorm(inner_dim)
+        self.self_attn = nn.MultiheadAttention(
+            embed_dim=inner_dim, num_heads=num_heads,
+            dropout=attn_drop, bias=attn_bias, batch_first=True)
+        self.norm3 = nn.LayerNorm(inner_dim)
+        self.mlp = nn.Sequential(
+            nn.Linear(inner_dim, int(inner_dim * mlp_ratio)),
+            nn.GELU(),
+            nn.Dropout(mlp_drop),
+            nn.Linear(int(inner_dim * mlp_ratio), inner_dim),
+            nn.Dropout(mlp_drop),
+        )
+    def forward(self, x, cond):
+        # x: [N, L, D]
+        # cond: [N, L_cond, D_cond]
+        x = x + self.cross_attn(self.norm1(x), cond, cond)[0]
+        before_sa = self.norm2(x)
+        x = x + self.self_attn(before_sa, before_sa, before_sa)[0]
+        x = x + self.mlp(self.norm3(x))
+        return x
+class TriplaneTransformer(nn.Module):
+    """
+    Transformer with condition that generates a triplane representation.
+    Reference:
+    Timm: https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/vision_transformer.py#L486
+    """
+    def __init__(
+        self,
+        inner_dim: int,
+        image_feat_dim: int,
+        triplane_low_res: int,
+        triplane_high_res: int,
+        triplane_dim: int,
+        num_layers: int,
+        num_heads: int,
+        eps: float = 1e-6,
+    ):
+        super().__init__()
+        # attributes
+        self.triplane_low_res = triplane_low_res
+        self.triplane_high_res = triplane_high_res
+        self.triplane_dim = triplane_dim
+        # modules
+        # initialize pos_embed with 1/sqrt(dim) * N(0, 1)
+        self.pos_embed = nn.Parameter(torch.randn(1, 3*triplane_low_res**2, inner_dim) * (1. / inner_dim) ** 0.5)
+        self.layers = nn.ModuleList([
+            BasicTransformerBlock(
+                inner_dim=inner_dim, cond_dim=image_feat_dim, num_heads=num_heads, eps=eps)
+            for _ in range(num_layers)
+        ])
+        self.norm = nn.LayerNorm(inner_dim, eps=eps)
+        self.deconv = nn.ConvTranspose2d(inner_dim, triplane_dim, kernel_size=2, stride=2, padding=0)
+    def forward(self, image_feats):
+        # image_feats: [N, L_cond, D_cond]
+        N = image_feats.shape[0]
+        H = W = self.triplane_low_res
+        L = 3 * H * W
+        x = self.pos_embed.repeat(N, 1, 1)  # [N, L, D]
+        for layer in self.layers:
+            x = layer(x, image_feats)
+        x = self.norm(x)
+        # separate each plane and apply deconv
+        x = x.view(N, 3, H, W, -1)
+        x = torch.einsum('nihwd->indhw', x)  # [3, N, D, H, W]
+        x = x.contiguous().view(3*N, -1, H, W)  # [3*N, D, H, W]
+        x = self.deconv(x)  # [3*N, D', H', W']
+        x = x.view(3, N, *x.shape[-3:])  # [3, N, D', H', W']
+        x = torch.einsum('indhw->nidhw', x)  # [N, 3, D', H', W']
+        x = x.contiguous()
+        return x

src/models/encoder/__init__.py ADDED Viewed

File without changes

src/models/encoder/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (154 Bytes). View file

src/models/encoder/__pycache__/dino.cpython-310.pyc ADDED Viewed

Binary file (17.2 kB). View file

src/models/encoder/__pycache__/dino_wrapper.cpython-310.pyc ADDED Viewed

Binary file (2.54 kB). View file

src/models/encoder/dino.py ADDED Viewed

	@@ -0,0 +1,550 @@

+# coding=utf-8
+# Copyright 2021 Google AI, Ross Wightman, The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" PyTorch ViT model."""
+import collections.abc
+import math
+from typing import Dict, List, Optional, Set, Tuple, Union
+import torch
+from torch import nn
+from transformers.activations import ACT2FN
+from transformers.modeling_outputs import (
+    BaseModelOutput,
+    BaseModelOutputWithPooling,
+)
+from transformers import PreTrainedModel, ViTConfig
+from transformers.pytorch_utils import find_pruneable_heads_and_indices, prune_linear_layer
+class ViTEmbeddings(nn.Module):
+    """
+    Construct the CLS token, position and patch embeddings. Optionally, also the mask token.
+    """
+    def __init__(self, config: ViTConfig, use_mask_token: bool = False) -> None:
+        super().__init__()
+        self.cls_token = nn.Parameter(torch.randn(1, 1, config.hidden_size))
+        self.mask_token = nn.Parameter(torch.zeros(1, 1, config.hidden_size)) if use_mask_token else None
+        self.patch_embeddings = ViTPatchEmbeddings(config)
+        num_patches = self.patch_embeddings.num_patches
+        self.position_embeddings = nn.Parameter(torch.randn(1, num_patches + 1, config.hidden_size))
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+        self.config = config
+    def interpolate_pos_encoding(self, embeddings: torch.Tensor, height: int, width: int) -> torch.Tensor:
+        """
+        This method allows to interpolate the pre-trained position encodings, to be able to use the model on higher
+        resolution images.
+        Source:
+        https://github.com/facebookresearch/dino/blob/de9ee3df6cf39fac952ab558447af1fa1365362a/vision_transformer.py#L174
+        """
+        num_patches = embeddings.shape[1] - 1
+        num_positions = self.position_embeddings.shape[1] - 1
+        if num_patches == num_positions and height == width:
+            return self.position_embeddings
+        class_pos_embed = self.position_embeddings[:, 0]
+        patch_pos_embed = self.position_embeddings[:, 1:]
+        dim = embeddings.shape[-1]
+        h0 = height // self.config.patch_size
+        w0 = width // self.config.patch_size
+        # we add a small number to avoid floating point error in the interpolation
+        # see discussion at https://github.com/facebookresearch/dino/issues/8
+        h0, w0 = h0 + 0.1, w0 + 0.1
+        patch_pos_embed = patch_pos_embed.reshape(1, int(math.sqrt(num_positions)), int(math.sqrt(num_positions)), dim)
+        patch_pos_embed = patch_pos_embed.permute(0, 3, 1, 2)
+        patch_pos_embed = nn.functional.interpolate(
+            patch_pos_embed,
+            scale_factor=(h0 / math.sqrt(num_positions), w0 / math.sqrt(num_positions)),
+            mode="bicubic",
+            align_corners=False,
+        )
+        assert int(h0) == patch_pos_embed.shape[-2] and int(w0) == patch_pos_embed.shape[-1]
+        patch_pos_embed = patch_pos_embed.permute(0, 2, 3, 1).view(1, -1, dim)
+        return torch.cat((class_pos_embed.unsqueeze(0), patch_pos_embed), dim=1)
+    def forward(
+        self,
+        pixel_values: torch.Tensor,
+        bool_masked_pos: Optional[torch.BoolTensor] = None,
+        interpolate_pos_encoding: bool = False,
+    ) -> torch.Tensor:
+        batch_size, num_channels, height, width = pixel_values.shape
+        embeddings = self.patch_embeddings(pixel_values, interpolate_pos_encoding=interpolate_pos_encoding)
+        if bool_masked_pos is not None:
+            seq_length = embeddings.shape[1]
+            mask_tokens = self.mask_token.expand(batch_size, seq_length, -1)
+            # replace the masked visual tokens by mask_tokens
+            mask = bool_masked_pos.unsqueeze(-1).type_as(mask_tokens)
+            embeddings = embeddings * (1.0 - mask) + mask_tokens * mask
+        # add the [CLS] token to the embedded patch tokens
+        cls_tokens = self.cls_token.expand(batch_size, -1, -1)
+        embeddings = torch.cat((cls_tokens, embeddings), dim=1)
+        # add positional encoding to each token
+        if interpolate_pos_encoding:
+            embeddings = embeddings + self.interpolate_pos_encoding(embeddings, height, width)
+        else:
+            embeddings = embeddings + self.position_embeddings
+        embeddings = self.dropout(embeddings)
+        return embeddings
+class ViTPatchEmbeddings(nn.Module):
+    """
+    This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial
+    `hidden_states` (patch embeddings) of shape `(batch_size, seq_length, hidden_size)` to be consumed by a
+    Transformer.
+    """
+    def __init__(self, config):
+        super().__init__()
+        image_size, patch_size = config.image_size, config.patch_size
+        num_channels, hidden_size = config.num_channels, config.hidden_size
+        image_size = image_size if isinstance(image_size, collections.abc.Iterable) else (image_size, image_size)
+        patch_size = patch_size if isinstance(patch_size, collections.abc.Iterable) else (patch_size, patch_size)
+        num_patches = (image_size[1] // patch_size[1]) * (image_size[0] // patch_size[0])
+        self.image_size = image_size
+        self.patch_size = patch_size
+        self.num_channels = num_channels
+        self.num_patches = num_patches
+        self.projection = nn.Conv2d(num_channels, hidden_size, kernel_size=patch_size, stride=patch_size)
+    def forward(self, pixel_values: torch.Tensor, interpolate_pos_encoding: bool = False) -> torch.Tensor:
+        batch_size, num_channels, height, width = pixel_values.shape
+        if num_channels != self.num_channels:
+            raise ValueError(
+                "Make sure that the channel dimension of the pixel values match with the one set in the configuration."
+                f" Expected {self.num_channels} but got {num_channels}."
+            )
+        if not interpolate_pos_encoding:
+            if height != self.image_size[0] or width != self.image_size[1]:
+                raise ValueError(
+                    f"Input image size ({height}*{width}) doesn't match model"
+                    f" ({self.image_size[0]}*{self.image_size[1]})."
+                )
+        embeddings = self.projection(pixel_values).flatten(2).transpose(1, 2)
+        return embeddings
+class ViTSelfAttention(nn.Module):
+    def __init__(self, config: ViTConfig) -> None:
+        super().__init__()
+        if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
+            raise ValueError(
+                f"The hidden size {config.hidden_size,} is not a multiple of the number of attention "
+                f"heads {config.num_attention_heads}."
+            )
+        self.num_attention_heads = config.num_attention_heads
+        self.attention_head_size = int(config.hidden_size / config.num_attention_heads)
+        self.all_head_size = self.num_attention_heads * self.attention_head_size
+        self.query = nn.Linear(config.hidden_size, self.all_head_size, bias=config.qkv_bias)
+        self.key = nn.Linear(config.hidden_size, self.all_head_size, bias=config.qkv_bias)
+        self.value = nn.Linear(config.hidden_size, self.all_head_size, bias=config.qkv_bias)
+        self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
+    def transpose_for_scores(self, x: torch.Tensor) -> torch.Tensor:
+        new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
+        x = x.view(new_x_shape)
+        return x.permute(0, 2, 1, 3)
+    def forward(
+        self, hidden_states, head_mask: Optional[torch.Tensor] = None, output_attentions: bool = False
+    ) -> Union[Tuple[torch.Tensor, torch.Tensor], Tuple[torch.Tensor]]:
+        mixed_query_layer = self.query(hidden_states)
+        key_layer = self.transpose_for_scores(self.key(hidden_states))
+        value_layer = self.transpose_for_scores(self.value(hidden_states))
+        query_layer = self.transpose_for_scores(mixed_query_layer)
+        # Take the dot product between "query" and "key" to get the raw attention scores.
+        attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
+        attention_scores = attention_scores / math.sqrt(self.attention_head_size)
+        # Normalize the attention scores to probabilities.
+        attention_probs = nn.functional.softmax(attention_scores, dim=-1)
+        # This is actually dropping out entire tokens to attend to, which might
+        # seem a bit unusual, but is taken from the original Transformer paper.
+        attention_probs = self.dropout(attention_probs)
+        # Mask heads if we want to
+        if head_mask is not None:
+            attention_probs = attention_probs * head_mask
+        context_layer = torch.matmul(attention_probs, value_layer)
+        context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
+        new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
+        context_layer = context_layer.view(new_context_layer_shape)
+        outputs = (context_layer, attention_probs) if output_attentions else (context_layer,)
+        return outputs
+class ViTSelfOutput(nn.Module):
+    """
+    The residual connection is defined in ViTLayer instead of here (as is the case with other models), due to the
+    layernorm applied before each block.
+    """
+    def __init__(self, config: ViTConfig) -> None:
+        super().__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+    def forward(self, hidden_states: torch.Tensor, input_tensor: torch.Tensor) -> torch.Tensor:
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        return hidden_states
+class ViTAttention(nn.Module):
+    def __init__(self, config: ViTConfig) -> None:
+        super().__init__()
+        self.attention = ViTSelfAttention(config)
+        self.output = ViTSelfOutput(config)
+        self.pruned_heads = set()
+    def prune_heads(self, heads: Set[int]) -> None:
+        if len(heads) == 0:
+            return
+        heads, index = find_pruneable_heads_and_indices(
+            heads, self.attention.num_attention_heads, self.attention.attention_head_size, self.pruned_heads
+        )
+        # Prune linear layers
+        self.attention.query = prune_linear_layer(self.attention.query, index)
+        self.attention.key = prune_linear_layer(self.attention.key, index)
+        self.attention.value = prune_linear_layer(self.attention.value, index)
+        self.output.dense = prune_linear_layer(self.output.dense, index, dim=1)
+        # Update hyper params and store pruned heads
+        self.attention.num_attention_heads = self.attention.num_attention_heads - len(heads)
+        self.attention.all_head_size = self.attention.attention_head_size * self.attention.num_attention_heads
+        self.pruned_heads = self.pruned_heads.union(heads)
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        head_mask: Optional[torch.Tensor] = None,
+        output_attentions: bool = False,
+    ) -> Union[Tuple[torch.Tensor, torch.Tensor], Tuple[torch.Tensor]]:
+        self_outputs = self.attention(hidden_states, head_mask, output_attentions)
+        attention_output = self.output(self_outputs[0], hidden_states)
+        outputs = (attention_output,) + self_outputs[1:]  # add attentions if we output them
+        return outputs
+class ViTIntermediate(nn.Module):
+    def __init__(self, config: ViTConfig) -> None:
+        super().__init__()
+        self.dense = nn.Linear(config.hidden_size, config.intermediate_size)
+        if isinstance(config.hidden_act, str):
+            self.intermediate_act_fn = ACT2FN[config.hidden_act]
+        else:
+            self.intermediate_act_fn = config.hidden_act
+    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.intermediate_act_fn(hidden_states)
+        return hidden_states
+class ViTOutput(nn.Module):
+    def __init__(self, config: ViTConfig) -> None:
+        super().__init__()
+        self.dense = nn.Linear(config.intermediate_size, config.hidden_size)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+    def forward(self, hidden_states: torch.Tensor, input_tensor: torch.Tensor) -> torch.Tensor:
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        hidden_states = hidden_states + input_tensor
+        return hidden_states
+def modulate(x, shift, scale):
+    return x * (1 + scale.unsqueeze(1)) + shift.unsqueeze(1)
+class ViTLayer(nn.Module):
+    """This corresponds to the Block class in the timm implementation."""
+    def __init__(self, config: ViTConfig) -> None:
+        super().__init__()
+        self.chunk_size_feed_forward = config.chunk_size_feed_forward
+        self.seq_len_dim = 1
+        self.attention = ViTAttention(config)
+        self.intermediate = ViTIntermediate(config)
+        self.output = ViTOutput(config)
+        self.layernorm_before = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
+        self.layernorm_after = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
+        self.adaLN_modulation = nn.Sequential(
+            nn.SiLU(),
+            nn.Linear(config.hidden_size, 4 * config.hidden_size, bias=True)
+        )
+        nn.init.constant_(self.adaLN_modulation[-1].weight, 0)
+        nn.init.constant_(self.adaLN_modulation[-1].bias, 0)
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        adaln_input: torch.Tensor = None,
+        head_mask: Optional[torch.Tensor] = None,
+        output_attentions: bool = False,
+    ) -> Union[Tuple[torch.Tensor, torch.Tensor], Tuple[torch.Tensor]]:
+        shift_msa, scale_msa, shift_mlp, scale_mlp = self.adaLN_modulation(adaln_input).chunk(4, dim=1)
+        self_attention_outputs = self.attention(
+            modulate(self.layernorm_before(hidden_states), shift_msa, scale_msa),  # in ViT, layernorm is applied before self-attention
+            head_mask,
+            output_attentions=output_attentions,
+        )
+        attention_output = self_attention_outputs[0]
+        outputs = self_attention_outputs[1:]  # add self attentions if we output attention weights
+        # first residual connection
+        hidden_states = attention_output + hidden_states
+        # in ViT, layernorm is also applied after self-attention
+        layer_output = modulate(self.layernorm_after(hidden_states), shift_mlp, scale_mlp)
+        layer_output = self.intermediate(layer_output)
+        # second residual connection is done here
+        layer_output = self.output(layer_output, hidden_states)
+        outputs = (layer_output,) + outputs
+        return outputs
+class ViTEncoder(nn.Module):
+    def __init__(self, config: ViTConfig) -> None:
+        super().__init__()
+        self.config = config
+        self.layer = nn.ModuleList([ViTLayer(config) for _ in range(config.num_hidden_layers)])
+        self.gradient_checkpointing = False
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        adaln_input: torch.Tensor = None,
+        head_mask: Optional[torch.Tensor] = None,
+        output_attentions: bool = False,
+        output_hidden_states: bool = False,
+        return_dict: bool = True,
+    ) -> Union[tuple, BaseModelOutput]:
+        all_hidden_states = () if output_hidden_states else None
+        all_self_attentions = () if output_attentions else None
+        for i, layer_module in enumerate(self.layer):
+            if output_hidden_states:
+                all_hidden_states = all_hidden_states + (hidden_states,)
+            layer_head_mask = head_mask[i] if head_mask is not None else None
+            if self.gradient_checkpointing and self.training:
+                layer_outputs = self._gradient_checkpointing_func(
+                    layer_module.__call__,
+                    hidden_states,
+                    adaln_input,
+                    layer_head_mask,
+                    output_attentions,
+                )
+            else:
+                layer_outputs = layer_module(hidden_states, adaln_input, layer_head_mask, output_attentions)
+            hidden_states = layer_outputs[0]
+            if output_attentions:
+                all_self_attentions = all_self_attentions + (layer_outputs[1],)
+        if output_hidden_states:
+            all_hidden_states = all_hidden_states + (hidden_states,)
+        if not return_dict:
+            return tuple(v for v in [hidden_states, all_hidden_states, all_self_attentions] if v is not None)
+        return BaseModelOutput(
+            last_hidden_state=hidden_states,
+            hidden_states=all_hidden_states,
+            attentions=all_self_attentions,
+        )
+class ViTPreTrainedModel(PreTrainedModel):
+    """
+    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
+    models.
+    """
+    config_class = ViTConfig
+    base_model_prefix = "vit"
+    main_input_name = "pixel_values"
+    supports_gradient_checkpointing = True
+    _no_split_modules = ["ViTEmbeddings", "ViTLayer"]
+    def _init_weights(self, module: Union[nn.Linear, nn.Conv2d, nn.LayerNorm]) -> None:
+        """Initialize the weights"""
+        if isinstance(module, (nn.Linear, nn.Conv2d)):
+            # Upcast the input in `fp32` and cast it back to desired `dtype` to avoid
+            # `trunc_normal_cpu` not implemented in `half` issues
+            module.weight.data = nn.init.trunc_normal_(
+                module.weight.data.to(torch.float32), mean=0.0, std=self.config.initializer_range
+            ).to(module.weight.dtype)
+            if module.bias is not None:
+                module.bias.data.zero_()
+        elif isinstance(module, nn.LayerNorm):
+            module.bias.data.zero_()
+            module.weight.data.fill_(1.0)
+        elif isinstance(module, ViTEmbeddings):
+            module.position_embeddings.data = nn.init.trunc_normal_(
+                module.position_embeddings.data.to(torch.float32),
+                mean=0.0,
+                std=self.config.initializer_range,
+            ).to(module.position_embeddings.dtype)
+            module.cls_token.data = nn.init.trunc_normal_(
+                module.cls_token.data.to(torch.float32),
+                mean=0.0,
+                std=self.config.initializer_range,
+            ).to(module.cls_token.dtype)
+class ViTModel(ViTPreTrainedModel):
+    def __init__(self, config: ViTConfig, add_pooling_layer: bool = True, use_mask_token: bool = False):
+        super().__init__(config)
+        self.config = config
+        self.embeddings = ViTEmbeddings(config, use_mask_token=use_mask_token)
+        self.encoder = ViTEncoder(config)
+        self.layernorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
+        self.pooler = ViTPooler(config) if add_pooling_layer else None
+        # Initialize weights and apply final processing
+        self.post_init()
+    def get_input_embeddings(self) -> ViTPatchEmbeddings:
+        return self.embeddings.patch_embeddings
+    def _prune_heads(self, heads_to_prune: Dict[int, List[int]]) -> None:
+        """
+        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
+        class PreTrainedModel
+        """
+        for layer, heads in heads_to_prune.items():
+            self.encoder.layer[layer].attention.prune_heads(heads)
+    def forward(
+        self,
+        pixel_values: Optional[torch.Tensor] = None,
+        adaln_input: Optional[torch.Tensor] = None,
+        bool_masked_pos: Optional[torch.BoolTensor] = None,
+        head_mask: Optional[torch.Tensor] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        interpolate_pos_encoding: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+    ) -> Union[Tuple, BaseModelOutputWithPooling]:
+        r"""
+        bool_masked_pos (`torch.BoolTensor` of shape `(batch_size, num_patches)`, *optional*):
+            Boolean masked positions. Indicates which patches are masked (1) and which aren't (0).
+        """
+        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+        )
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        if pixel_values is None:
+            raise ValueError("You have to specify pixel_values")
+        # Prepare head mask if needed
+        # 1.0 in head_mask indicate we keep the head
+        # attention_probs has shape bsz x n_heads x N x N
+        # input head_mask has shape [num_heads] or [num_hidden_layers x num_heads]
+        # and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length]
+        head_mask = self.get_head_mask(head_mask, self.config.num_hidden_layers)
+        # TODO: maybe have a cleaner way to cast the input (from `ImageProcessor` side?)
+        expected_dtype = self.embeddings.patch_embeddings.projection.weight.dtype
+        if pixel_values.dtype != expected_dtype:
+            pixel_values = pixel_values.to(expected_dtype)
+        embedding_output = self.embeddings(
+            pixel_values, bool_masked_pos=bool_masked_pos, interpolate_pos_encoding=interpolate_pos_encoding
+        )
+        encoder_outputs = self.encoder(
+            embedding_output,
+            adaln_input=adaln_input,
+            head_mask=head_mask,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )
+        sequence_output = encoder_outputs[0]
+        sequence_output = self.layernorm(sequence_output)
+        pooled_output = self.pooler(sequence_output) if self.pooler is not None else None
+        if not return_dict:
+            head_outputs = (sequence_output, pooled_output) if pooled_output is not None else (sequence_output,)
+            return head_outputs + encoder_outputs[1:]
+        return BaseModelOutputWithPooling(
+            last_hidden_state=sequence_output,
+            pooler_output=pooled_output,
+            hidden_states=encoder_outputs.hidden_states,
+            attentions=encoder_outputs.attentions,
+        )
+class ViTPooler(nn.Module):
+    def __init__(self, config: ViTConfig):
+        super().__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.activation = nn.Tanh()
+    def forward(self, hidden_states):
+        # We "pool" the model by simply taking the hidden state corresponding
+        # to the first token.
+        first_token_tensor = hidden_states[:, 0]
+        pooled_output = self.dense(first_token_tensor)
+        pooled_output = self.activation(pooled_output)
+        return pooled_output

src/models/encoder/dino_wrapper.py ADDED Viewed

	@@ -0,0 +1,80 @@

+# Copyright (c) 2023, Zexin He
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch.nn as nn
+from transformers import ViTImageProcessor
+from einops import rearrange, repeat
+from .dino import ViTModel
+class DinoWrapper(nn.Module):
+    """
+    Dino v1 wrapper using huggingface transformer implementation.
+    """
+    def __init__(self, model_name: str, freeze: bool = True):
+        super().__init__()
+        self.model, self.processor = self._build_dino(model_name)
+        self.camera_embedder = nn.Sequential(
+            nn.Linear(16, self.model.config.hidden_size, bias=True),
+            nn.SiLU(),
+            nn.Linear(self.model.config.hidden_size, self.model.config.hidden_size, bias=True)
+        )
+        if freeze:
+            self._freeze()
+    def forward(self, image, camera):
+        # image: [B, N, C, H, W]
+        # camera: [B, N, D]
+        # RGB image with [0,1] scale and properly sized
+        if image.ndim == 5:
+            image = rearrange(image, 'b n c h w -> (b n) c h w')
+        dtype = image.dtype
+        inputs = self.processor(
+            images=image.float(),
+            return_tensors="pt",
+            do_rescale=False,
+            do_resize=False,
+        ).to(self.model.device).to(dtype)
+        # embed camera
+        N = camera.shape[1]
+        camera_embeddings = self.camera_embedder(camera)
+        camera_embeddings = rearrange(camera_embeddings, 'b n d -> (b n) d')
+        embeddings = camera_embeddings
+        # This resampling of positional embedding uses bicubic interpolation
+        outputs = self.model(**inputs, adaln_input=embeddings, interpolate_pos_encoding=True)
+        last_hidden_states = outputs.last_hidden_state
+        return last_hidden_states
+    def _freeze(self):
+        print(f"======== Freezing DinoWrapper ========")
+        self.model.eval()
+        for name, param in self.model.named_parameters():
+            param.requires_grad = False
+    @staticmethod
+    def _build_dino(model_name: str, proxy_error_retries: int = 3, proxy_error_cooldown: int = 5):
+        import requests
+        try:
+            model = ViTModel.from_pretrained(model_name, add_pooling_layer=False)
+            processor = ViTImageProcessor.from_pretrained(model_name)
+            return model, processor
+        except requests.exceptions.ProxyError as err:
+            if proxy_error_retries > 0:
+                print(f"Huggingface ProxyError: Retrying in {proxy_error_cooldown} seconds...")
+                import time
+                time.sleep(proxy_error_cooldown)
+                return DinoWrapper._build_dino(model_name, proxy_error_retries - 1, proxy_error_cooldown)
+            else:
+                raise err

src/models/geometry/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# NVIDIA CORPORATION & AFFILIATES and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION & AFFILIATES is strictly prohibited.

src/models/geometry/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (155 Bytes). View file

src/models/geometry/camera/__init__.py ADDED Viewed

	@@ -0,0 +1,16 @@

+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# NVIDIA CORPORATION & AFFILIATES and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION & AFFILIATES is strictly prohibited.
+import torch
+from torch import nn
+class Camera(nn.Module):
+    def __init__(self):
+        super(Camera, self).__init__()
+        pass

src/models/geometry/camera/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (547 Bytes). View file

src/models/geometry/camera/__pycache__/perspective_camera.cpython-310.pyc ADDED Viewed

Binary file (1.43 kB). View file

src/models/geometry/camera/perspective_camera.py ADDED Viewed

	@@ -0,0 +1,35 @@

+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# NVIDIA CORPORATION & AFFILIATES and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION & AFFILIATES is strictly prohibited.
+import torch
+from . import Camera
+import numpy as np
+def projection(x=0.1, n=1.0, f=50.0, near_plane=None):
+    if near_plane is None:
+        near_plane = n
+    return np.array(
+        [[n / x, 0, 0, 0],
+         [0, n / -x, 0, 0],
+         [0, 0, -(f + near_plane) / (f - near_plane), -(2 * f * near_plane) / (f - near_plane)],
+         [0, 0, -1, 0]]).astype(np.float32)
+class PerspectiveCamera(Camera):
+    def __init__(self, fovy=49.0, device='cuda'):
+        super(PerspectiveCamera, self).__init__()
+        self.device = device
+        focal = np.tan(fovy / 180.0 * np.pi * 0.5)
+        self.proj_mtx = torch.from_numpy(projection(x=focal, f=1000.0, n=1.0, near_plane=0.1)).to(self.device).unsqueeze(dim=0)
+    def project(self, points_bxnx4):
+        out = torch.matmul(
+            points_bxnx4,
+            torch.transpose(self.proj_mtx, 1, 2))
+        return out

src/models/geometry/render/__init__.py ADDED Viewed

	@@ -0,0 +1,8 @@

+import torch
+class Renderer():
+    def __init__(self):
+        pass
+    def forward(self):
+        pass

src/models/geometry/render/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (565 Bytes). View file

src/models/geometry/render/__pycache__/neural_render.cpython-310.pyc ADDED Viewed

Binary file (5.85 kB). View file

src/models/geometry/render/__pycache__/util.cpython-310.pyc ADDED Viewed

Binary file (15.1 kB). View file

src/models/geometry/render/neural_render.py ADDED Viewed

	@@ -0,0 +1,293 @@

+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# NVIDIA CORPORATION & AFFILIATES and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION & AFFILIATES is strictly prohibited.
+import torch
+import torch.nn.functional as F
+import nvdiffrast.torch as dr
+from . import Renderer
+from . import util
+from . import renderutils as ru
+_FG_LUT = None
+def interpolate(attr, rast, attr_idx, rast_db=None):
+    return dr.interpolate(
+        attr.contiguous(), rast, attr_idx, rast_db=rast_db,
+        diff_attrs=None if rast_db is None else 'all')
+def xfm_points(points, matrix, use_python=True):
+    '''Transform points.
+    Args:
+        points: Tensor containing 3D points with shape [minibatch_size, num_vertices, 3] or [1, num_vertices, 3]
+        matrix: A 4x4 transform matrix with shape [minibatch_size, 4, 4]
+        use_python: Use PyTorch's torch.matmul (for validation)
+    Returns:
+        Transformed points in homogeneous 4D with shape [minibatch_size, num_vertices, 4].
+    '''
+    out = torch.matmul(torch.nn.functional.pad(points, pad=(0, 1), mode='constant', value=1.0), torch.transpose(matrix, 1, 2))
+    if torch.is_anomaly_enabled():
+        assert torch.all(torch.isfinite(out)), "Output of xfm_points contains inf or NaN"
+    return out
+def dot(x, y):
+    return torch.sum(x * y, -1, keepdim=True)
+def compute_vertex_normal(v_pos, t_pos_idx):
+    i0 = t_pos_idx[:, 0]
+    i1 = t_pos_idx[:, 1]
+    i2 = t_pos_idx[:, 2]
+    v0 = v_pos[i0, :]
+    v1 = v_pos[i1, :]
+    v2 = v_pos[i2, :]
+    face_normals = torch.cross(v1 - v0, v2 - v0)
+    # Splat face normals to vertices
+    v_nrm = torch.zeros_like(v_pos)
+    v_nrm.scatter_add_(0, i0[:, None].repeat(1, 3), face_normals)
+    v_nrm.scatter_add_(0, i1[:, None].repeat(1, 3), face_normals)
+    v_nrm.scatter_add_(0, i2[:, None].repeat(1, 3), face_normals)
+    # Normalize, replace zero (degenerated) normals with some default value
+    v_nrm = torch.where(
+        dot(v_nrm, v_nrm) > 1e-20, v_nrm, torch.as_tensor([0.0, 0.0, 1.0]).to(v_nrm)
+    )
+    v_nrm = F.normalize(v_nrm, dim=1)
+    assert torch.all(torch.isfinite(v_nrm))
+    return v_nrm
+class NeuralRender(Renderer):
+    def __init__(self, device='cuda', camera_model=None):
+        super(NeuralRender, self).__init__()
+        self.device = device
+        self.ctx = dr.RasterizeCudaContext(device=device)
+        self.projection_mtx = None
+        self.camera = camera_model
+    # ==============================================================================================
+    #  pixel shader
+    # ==============================================================================================
+    # def shade(
+    #         self,
+    #         gb_pos,
+    #         gb_geometric_normal,
+    #         gb_normal,
+    #         gb_tangent,
+    #         gb_texc,
+    #         gb_texc_deriv,
+    #         view_pos,
+    #     ):
+    #     ################################################################################
+    #     # Texture lookups
+    #     ################################################################################
+    #     breakpoint()
+    #     # Separate kd into alpha and color, default alpha = 1
+    #     alpha = kd[..., 3:4] if kd.shape[-1] == 4 else torch.ones_like(kd[..., 0:1])
+    #     kd = kd[..., 0:3]
+    #     ################################################################################
+    #     # Normal perturbation & normal bend
+    #     ################################################################################
+    #     perturbed_nrm = None
+    #     gb_normal = ru.prepare_shading_normal(gb_pos, view_pos, perturbed_nrm, gb_normal, gb_tangent, gb_geometric_normal, two_sided_shading=True, opengl=True)
+    #     ################################################################################
+    #     # Evaluate BSDF
+    #     ################################################################################
+    #     assert 'bsdf' in material or bsdf is not None, "Material must specify a BSDF type"
+    #     bsdf = material['bsdf'] if bsdf is None else bsdf
+    #     if bsdf == 'pbr':
+    #         if isinstance(lgt, light.EnvironmentLight):
+    #             shaded_col = lgt.shade(gb_pos, gb_normal, kd, ks, view_pos, specular=True)
+    #         else:
+    #             assert False, "Invalid light type"
+    #     elif bsdf == 'diffuse':
+    #         if isinstance(lgt, light.EnvironmentLight):
+    #             shaded_col = lgt.shade(gb_pos, gb_normal, kd, ks, view_pos, specular=False)
+    #         else:
+    #             assert False, "Invalid light type"
+    #     elif bsdf == 'normal':
+    #         shaded_col = (gb_normal + 1.0)*0.5
+    #     elif bsdf == 'tangent':
+    #         shaded_col = (gb_tangent + 1.0)*0.5
+    #     elif bsdf == 'kd':
+    #         shaded_col = kd
+    #     elif bsdf == 'ks':
+    #         shaded_col = ks
+    #     else:
+    #         assert False, "Invalid BSDF '%s'" % bsdf
+    #     # Return multiple buffers
+    #     buffers = {
+    #         'shaded'    : torch.cat((shaded_col, alpha), dim=-1),
+    #         'kd_grad'   : torch.cat((kd_grad, alpha), dim=-1),
+    #         'occlusion' : torch.cat((ks[..., :1], alpha), dim=-1)
+    #     }
+    #     return buffers
+    # ==============================================================================================
+    #  Render a depth slice of the mesh (scene), some limitations:
+    #  - Single mesh
+    #  - Single light
+    #  - Single material
+    # ==============================================================================================
+    def render_layer(
+            self,
+            rast,
+            rast_deriv,
+            mesh,
+            view_pos,
+            resolution,
+            spp,
+            msaa
+        ):
+        # Scale down to shading resolution when MSAA is enabled, otherwise shade at full resolution
+        rast_out_s = rast
+        rast_out_deriv_s = rast_deriv
+        ################################################################################
+        # Interpolate attributes
+        ################################################################################
+        # Interpolate world space position
+        gb_pos, _ = interpolate(mesh.v_pos[None, ...], rast_out_s, mesh.t_pos_idx.int())
+        # Compute geometric normals. We need those because of bent normals trick (for bump mapping)
+        v0 = mesh.v_pos[mesh.t_pos_idx[:, 0], :]
+        v1 = mesh.v_pos[mesh.t_pos_idx[:, 1], :]
+        v2 = mesh.v_pos[mesh.t_pos_idx[:, 2], :]
+        face_normals = util.safe_normalize(torch.cross(v1 - v0, v2 - v0))
+        face_normal_indices = (torch.arange(0, face_normals.shape[0], dtype=torch.int64, device='cuda')[:, None]).repeat(1, 3)
+        gb_geometric_normal, _ = interpolate(face_normals[None, ...], rast_out_s, face_normal_indices.int())
+        # Compute tangent space
+        assert mesh.v_nrm is not None and mesh.v_tng is not None
+        gb_normal, _ = interpolate(mesh.v_nrm[None, ...], rast_out_s, mesh.t_nrm_idx.int())
+        gb_tangent, _ = interpolate(mesh.v_tng[None, ...], rast_out_s, mesh.t_tng_idx.int()) # Interpolate tangents
+        # Texture coordinate
+        # assert mesh.v_tex is not None
+        # gb_texc, gb_texc_deriv = interpolate(mesh.v_tex[None, ...], rast_out_s, mesh.t_tex_idx.int(), rast_db=rast_out_deriv_s)
+        perturbed_nrm = None
+        gb_normal = ru.prepare_shading_normal(gb_pos, view_pos[:,None,None,:], perturbed_nrm, gb_normal, gb_tangent, gb_geometric_normal, two_sided_shading=True, opengl=True)
+        return gb_pos, gb_normal
+    def render_mesh(
+            self,
+            mesh_v_pos_bxnx3,
+            mesh_t_pos_idx_fx3,
+            mesh,
+            camera_mv_bx4x4,
+            camera_pos,
+            mesh_v_feat_bxnxd,
+            resolution=256,
+            spp=1,
+            device='cuda',
+            hierarchical_mask=False
+    ):
+        assert not hierarchical_mask
+        mtx_in = torch.tensor(camera_mv_bx4x4, dtype=torch.float32, device=device) if not torch.is_tensor(camera_mv_bx4x4) else camera_mv_bx4x4
+        v_pos = xfm_points(mesh_v_pos_bxnx3, mtx_in)  # Rotate it to camera coordinates
+        v_pos_clip = self.camera.project(v_pos)  # Projection in the camera
+        # view_pos = torch.linalg.inv(mtx_in)[:, :3, 3]
+        view_pos = camera_pos
+        v_nrm = mesh.v_nrm  #compute_vertex_normal(mesh_v_pos_bxnx3[0], mesh_t_pos_idx_fx3.long())  # vertex normals in world coordinates
+        # Render the image,
+        # Here we only return the feature (3D location) at each pixel, which will be used as the input for neural render
+        num_layers = 1
+        mask_pyramid = None
+        assert mesh_t_pos_idx_fx3.shape[0] > 0  # Make sure we have shapes
+        mesh_v_feat_bxnxd = torch.cat([mesh_v_feat_bxnxd.repeat(v_pos.shape[0], 1, 1), v_pos], dim=-1)  # Concatenate the pos [org_pos, clip space pose for rasterization]
+        layers = []
+        with dr.DepthPeeler(self.ctx, v_pos_clip, mesh.t_pos_idx.int(), [resolution * spp, resolution * spp]) as peeler:
+            for _ in range(num_layers):
+                rast, db = peeler.rasterize_next_layer()
+                gb_pos, gb_normal = self.render_layer(rast, db, mesh, view_pos, resolution, spp, msaa=False)
+        with dr.DepthPeeler(self.ctx, v_pos_clip, mesh_t_pos_idx_fx3, [resolution * spp, resolution * spp]) as peeler:
+            for _ in range(num_layers):
+                rast, db = peeler.rasterize_next_layer()
+                gb_feat, _ = interpolate(mesh_v_feat_bxnxd, rast, mesh_t_pos_idx_fx3)
+        hard_mask = torch.clamp(rast[..., -1:], 0, 1)
+        antialias_mask = dr.antialias(
+            hard_mask.clone().contiguous(), rast, v_pos_clip,
+            mesh_t_pos_idx_fx3)
+        depth = gb_feat[..., -2:-1]
+        ori_mesh_feature = gb_feat[..., :-4]
+        normal, _ = interpolate(v_nrm[None, ...], rast, mesh_t_pos_idx_fx3)
+        normal = dr.antialias(normal.clone().contiguous(), rast, v_pos_clip, mesh_t_pos_idx_fx3)
+        # normal = F.normalize(normal, dim=-1)
+        # normal = torch.lerp(torch.zeros_like(normal), (normal + 1.0) / 2.0, hard_mask.float())      # black background
+        return ori_mesh_feature, antialias_mask, hard_mask, rast, v_pos_clip, mask_pyramid, depth, normal, gb_normal
+    def render_mesh_light(
+            self,
+            mesh_v_pos_bxnx3,
+            mesh_t_pos_idx_fx3,
+            mesh,
+            camera_mv_bx4x4,
+            mesh_v_feat_bxnxd,
+            resolution=256,
+            spp=1,
+            device='cuda',
+            hierarchical_mask=False
+    ):
+        assert not hierarchical_mask
+        mtx_in = torch.tensor(camera_mv_bx4x4, dtype=torch.float32, device=device) if not torch.is_tensor(camera_mv_bx4x4) else camera_mv_bx4x4
+        v_pos = xfm_points(mesh_v_pos_bxnx3, mtx_in)  # Rotate it to camera coordinates
+        v_pos_clip = self.camera.project(v_pos)  # Projection in the camera
+        v_nrm = compute_vertex_normal(mesh_v_pos_bxnx3[0], mesh_t_pos_idx_fx3.long())  # vertex normals in world coordinates
+        # Render the image,
+        # Here we only return the feature (3D location) at each pixel, which will be used as the input for neural render
+        num_layers = 1
+        mask_pyramid = None
+        assert mesh_t_pos_idx_fx3.shape[0] > 0  # Make sure we have shapes
+        mesh_v_feat_bxnxd = torch.cat([mesh_v_feat_bxnxd.repeat(v_pos.shape[0], 1, 1), v_pos], dim=-1)  # Concatenate the pos
+        with dr.DepthPeeler(self.ctx, v_pos_clip, mesh_t_pos_idx_fx3, [resolution * spp, resolution * spp]) as peeler:
+            for _ in range(num_layers):
+                rast, db = peeler.rasterize_next_layer()
+                gb_feat, _ = interpolate(mesh_v_feat_bxnxd, rast, mesh_t_pos_idx_fx3)
+        hard_mask = torch.clamp(rast[..., -1:], 0, 1)
+        antialias_mask = dr.antialias(
+            hard_mask.clone().contiguous(), rast, v_pos_clip,
+            mesh_t_pos_idx_fx3)
+        depth = gb_feat[..., -2:-1]
+        ori_mesh_feature = gb_feat[..., :-4]
+        normal, _ = interpolate(v_nrm[None, ...], rast, mesh_t_pos_idx_fx3)
+        normal = dr.antialias(normal.clone().contiguous(), rast, v_pos_clip, mesh_t_pos_idx_fx3)
+        normal = F.normalize(normal, dim=-1)
+        normal = torch.lerp(torch.zeros_like(normal), (normal + 1.0) / 2.0, hard_mask.float())      # black background
+        return ori_mesh_feature, antialias_mask, hard_mask, rast, v_pos_clip, mask_pyramid, depth, normal

src/models/geometry/render/renderutils/__init__.py ADDED Viewed

	@@ -0,0 +1,11 @@

+# Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+from .ops import xfm_points, xfm_vectors, image_loss, diffuse_cubemap, specular_cubemap, prepare_shading_normal, lambert, frostbite_diffuse, pbr_specular, pbr_bsdf, _fresnel_shlick, _ndf_ggx, _lambda_ggx, _masking_smith
+__all__ = ["xfm_vectors", "xfm_points", "image_loss", "diffuse_cubemap","specular_cubemap", "prepare_shading_normal", "lambert", "frostbite_diffuse", "pbr_specular", "pbr_bsdf", "_fresnel_shlick", "_ndf_ggx", "_lambda_ggx", "_masking_smith", ]

src/models/geometry/render/renderutils/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (612 Bytes). View file

src/models/geometry/render/renderutils/__pycache__/bsdf.cpython-310.pyc ADDED Viewed

Binary file (4.48 kB). View file

src/models/geometry/render/renderutils/__pycache__/loss.cpython-310.pyc ADDED Viewed

Binary file (1.22 kB). View file

src/models/geometry/render/renderutils/__pycache__/ops.cpython-310.pyc ADDED Viewed

Binary file (18.8 kB). View file

src/models/geometry/render/renderutils/bsdf.py ADDED Viewed

	@@ -0,0 +1,151 @@

+# Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+import math
+import torch
+NORMAL_THRESHOLD = 0.1
+################################################################################
+# Vector utility functions
+################################################################################
+def _dot(x, y):
+    return torch.sum(x*y, -1, keepdim=True)
+def _reflect(x, n):
+    return 2*_dot(x, n)*n - x
+def _safe_normalize(x):
+    return torch.nn.functional.normalize(x, dim = -1)
+def _bend_normal(view_vec, smooth_nrm, geom_nrm, two_sided_shading):
+    # Swap normal direction for backfacing surfaces
+    if two_sided_shading:
+        smooth_nrm = torch.where(_dot(geom_nrm, view_vec) > 0, smooth_nrm, -smooth_nrm)
+        geom_nrm   = torch.where(_dot(geom_nrm, view_vec) > 0, geom_nrm, -geom_nrm)
+    t = torch.clamp(_dot(view_vec, smooth_nrm) / NORMAL_THRESHOLD, min=0, max=1)
+    return torch.lerp(geom_nrm, smooth_nrm, t)
+def _perturb_normal(perturbed_nrm, smooth_nrm, smooth_tng, opengl):
+    smooth_bitang = _safe_normalize(torch.cross(smooth_tng, smooth_nrm))
+    if opengl:
+        shading_nrm = smooth_tng * perturbed_nrm[..., 0:1] - smooth_bitang * perturbed_nrm[..., 1:2] + smooth_nrm * torch.clamp(perturbed_nrm[..., 2:3], min=0.0)
+    else:
+        shading_nrm = smooth_tng * perturbed_nrm[..., 0:1] + smooth_bitang * perturbed_nrm[..., 1:2] + smooth_nrm * torch.clamp(perturbed_nrm[..., 2:3], min=0.0)
+    return _safe_normalize(shading_nrm)
+def bsdf_prepare_shading_normal(pos, view_pos, perturbed_nrm, smooth_nrm, smooth_tng, geom_nrm, two_sided_shading, opengl):
+    smooth_nrm = _safe_normalize(smooth_nrm)
+    smooth_tng = _safe_normalize(smooth_tng)
+    view_vec   = _safe_normalize(view_pos - pos)
+    shading_nrm = _perturb_normal(perturbed_nrm, smooth_nrm, smooth_tng, opengl)
+    return _bend_normal(view_vec, shading_nrm, geom_nrm, two_sided_shading)
+################################################################################
+# Simple lambertian diffuse BSDF
+################################################################################
+def bsdf_lambert(nrm, wi):
+    return torch.clamp(_dot(nrm, wi), min=0.0) / math.pi
+################################################################################
+# Frostbite diffuse
+################################################################################
+def bsdf_frostbite(nrm, wi, wo, linearRoughness):
+    wiDotN = _dot(wi, nrm)
+    woDotN = _dot(wo, nrm)
+    h = _safe_normalize(wo + wi)
+    wiDotH = _dot(wi, h)
+    energyBias = 0.5 * linearRoughness
+    energyFactor = 1.0 - (0.51 / 1.51) * linearRoughness
+    f90 = energyBias + 2.0 * wiDotH * wiDotH * linearRoughness
+    f0 = 1.0
+    wiScatter = bsdf_fresnel_shlick(f0, f90, wiDotN)
+    woScatter = bsdf_fresnel_shlick(f0, f90, woDotN)
+    res = wiScatter * woScatter * energyFactor
+    return torch.where((wiDotN > 0.0) & (woDotN > 0.0), res, torch.zeros_like(res))
+################################################################################
+# Phong specular, loosely based on mitsuba implementation
+################################################################################
+def bsdf_phong(nrm, wo, wi, N):
+    dp_r = torch.clamp(_dot(_reflect(wo, nrm), wi), min=0.0, max=1.0)
+    dp_l = torch.clamp(_dot(nrm, wi), min=0.0, max=1.0)
+    return (dp_r ** N) * dp_l * (N + 2) / (2 * math.pi)
+################################################################################
+# PBR's implementation of GGX specular
+################################################################################
+specular_epsilon = 1e-4
+def bsdf_fresnel_shlick(f0, f90, cosTheta):
+    _cosTheta = torch.clamp(cosTheta, min=specular_epsilon, max=1.0 - specular_epsilon)
+    return f0 + (f90 - f0) * (1.0 - _cosTheta) ** 5.0
+def bsdf_ndf_ggx(alphaSqr, cosTheta):
+    _cosTheta = torch.clamp(cosTheta, min=specular_epsilon, max=1.0 - specular_epsilon)
+    d = (_cosTheta * alphaSqr - _cosTheta) * _cosTheta + 1
+    return alphaSqr / (d * d * math.pi)
+def bsdf_lambda_ggx(alphaSqr, cosTheta):
+    _cosTheta = torch.clamp(cosTheta, min=specular_epsilon, max=1.0 - specular_epsilon)
+    cosThetaSqr = _cosTheta * _cosTheta
+    tanThetaSqr = (1.0 - cosThetaSqr) / cosThetaSqr
+    res = 0.5 * (torch.sqrt(1 + alphaSqr * tanThetaSqr) - 1.0)
+    return res
+def bsdf_masking_smith_ggx_correlated(alphaSqr, cosThetaI, cosThetaO):
+    lambdaI = bsdf_lambda_ggx(alphaSqr, cosThetaI)
+    lambdaO = bsdf_lambda_ggx(alphaSqr, cosThetaO)
+    return 1 / (1 + lambdaI + lambdaO)
+def bsdf_pbr_specular(col, nrm, wo, wi, alpha, min_roughness=0.08):
+    _alpha = torch.clamp(alpha, min=min_roughness*min_roughness, max=1.0)
+    alphaSqr = _alpha * _alpha
+    h = _safe_normalize(wo + wi)
+    woDotN = _dot(wo, nrm)
+    wiDotN = _dot(wi, nrm)
+    woDotH = _dot(wo, h)
+    nDotH  = _dot(nrm, h)
+    D = bsdf_ndf_ggx(alphaSqr, nDotH)
+    G = bsdf_masking_smith_ggx_correlated(alphaSqr, woDotN, wiDotN)
+    F = bsdf_fresnel_shlick(col, 1, woDotH)
+    w = F * D * G * 0.25 / torch.clamp(woDotN, min=specular_epsilon)
+    frontfacing = (woDotN > specular_epsilon) & (wiDotN > specular_epsilon)
+    return torch.where(frontfacing, w, torch.zeros_like(w))
+def bsdf_pbr(kd, arm, pos, nrm, view_pos, light_pos, min_roughness, BSDF):
+    wo = _safe_normalize(view_pos - pos)
+    wi = _safe_normalize(light_pos - pos)
+    spec_str  = arm[..., 0:1] # x component
+    roughness = arm[..., 1:2] # y component
+    metallic  = arm[..., 2:3] # z component
+    ks = (0.04 * (1.0 - metallic) + kd * metallic) * (1 - spec_str)
+    kd = kd * (1.0 - metallic)
+    if BSDF == 0:
+        diffuse = kd * bsdf_lambert(nrm, wi)
+    else:
+        diffuse = kd * bsdf_frostbite(nrm, wi, wo, roughness)
+    specular = bsdf_pbr_specular(ks, nrm, wo, wi, roughness*roughness, min_roughness=min_roughness)
+    return diffuse + specular

src/models/geometry/render/renderutils/c_src/bsdf.cu ADDED Viewed

	@@ -0,0 +1,710 @@

+/*
+ * Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ *
+ * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+ * property and proprietary rights in and to this material, related
+ * documentation and any modifications thereto. Any use, reproduction,
+ * disclosure or distribution of this material and related documentation
+ * without an express license agreement from NVIDIA CORPORATION or
+ * its affiliates is strictly prohibited.
+ */
+#include "common.h"
+#include "bsdf.h"
+#define SPECULAR_EPSILON 1e-4f
+//------------------------------------------------------------------------
+// Lambert functions
+__device__ inline float fwdLambert(const vec3f nrm, const vec3f wi)
+{
+    return max(dot(nrm, wi) / M_PI, 0.0f);
+}
+__device__ inline void bwdLambert(const vec3f nrm, const vec3f wi, vec3f& d_nrm, vec3f& d_wi, const float d_out)
+{
+    if (dot(nrm, wi) > 0.0f)
+        bwdDot(nrm, wi, d_nrm, d_wi, d_out / M_PI);
+}
+//------------------------------------------------------------------------
+// Fresnel Schlick
+__device__ inline float fwdFresnelSchlick(const float f0, const float f90, const float cosTheta)
+{
+    float _cosTheta = clamp(cosTheta, SPECULAR_EPSILON, 1.0f - SPECULAR_EPSILON);
+    float scale = powf(1.0f - _cosTheta, 5.0f);
+    return f0 * (1.0f - scale) + f90 * scale;
+}
+__device__ inline void bwdFresnelSchlick(const float f0, const float f90, const float cosTheta, float& d_f0, float& d_f90, float& d_cosTheta, const float d_out)
+{
+    float _cosTheta = clamp(cosTheta, SPECULAR_EPSILON, 1.0f - SPECULAR_EPSILON);
+    float scale = pow(max(1.0f - _cosTheta, 0.0f), 5.0f);
+    d_f0 += d_out * (1.0 - scale);
+    d_f90 += d_out * scale;
+    if (cosTheta >= SPECULAR_EPSILON && cosTheta < 1.0f - SPECULAR_EPSILON)
+    {
+        d_cosTheta += d_out * (f90 - f0) * -5.0f * powf(1.0f - cosTheta, 4.0f);
+    }
+}
+__device__ inline vec3f fwdFresnelSchlick(const vec3f f0, const vec3f f90, const float cosTheta)
+{
+    float _cosTheta = clamp(cosTheta, SPECULAR_EPSILON, 1.0f - SPECULAR_EPSILON);
+    float scale = powf(1.0f - _cosTheta, 5.0f);
+    return f0 * (1.0f - scale) + f90 * scale;
+}
+__device__ inline void bwdFresnelSchlick(const vec3f f0, const vec3f f90, const float cosTheta, vec3f& d_f0, vec3f& d_f90, float& d_cosTheta, const vec3f d_out)
+{
+    float _cosTheta = clamp(cosTheta, SPECULAR_EPSILON, 1.0f - SPECULAR_EPSILON);
+    float scale = pow(max(1.0f - _cosTheta, 0.0f), 5.0f);
+    d_f0 += d_out * (1.0 - scale);
+    d_f90 += d_out * scale;
+    if (cosTheta >= SPECULAR_EPSILON && cosTheta < 1.0f - SPECULAR_EPSILON)
+    {
+        d_cosTheta += sum(d_out * (f90 - f0) * -5.0f * powf(1.0f - cosTheta, 4.0f));
+    }
+}
+//------------------------------------------------------------------------
+// Frostbite diffuse
+__device__ inline float fwdFrostbiteDiffuse(const vec3f nrm, const vec3f wi, const vec3f wo, float linearRoughness)
+{
+    float wiDotN = dot(wi, nrm);
+    float woDotN = dot(wo, nrm);
+    if (wiDotN > 0.0f && woDotN > 0.0f)
+    {
+        vec3f h = safeNormalize(wo + wi);
+        float wiDotH = dot(wi, h);
+        float energyBias = 0.5f * linearRoughness;
+        float energyFactor = 1.0f - (0.51f / 1.51f) * linearRoughness;
+        float f90 = energyBias + 2.f * wiDotH * wiDotH * linearRoughness;
+        float f0 = 1.f;
+        float wiScatter = fwdFresnelSchlick(f0, f90, wiDotN);
+        float woScatter = fwdFresnelSchlick(f0, f90, woDotN);
+        return wiScatter * woScatter * energyFactor;
+    }
+    else return 0.0f;
+}
+__device__ inline void bwdFrostbiteDiffuse(const vec3f nrm, const vec3f wi, const vec3f wo, float linearRoughness, vec3f& d_nrm, vec3f& d_wi, vec3f& d_wo, float &d_linearRoughness, const float d_out)
+{
+    float wiDotN = dot(wi, nrm);
+    float woDotN = dot(wo, nrm);
+    if (wiDotN > 0.0f && woDotN > 0.0f)
+    {
+        vec3f h = safeNormalize(wo + wi);
+        float wiDotH = dot(wi, h);
+        float energyBias = 0.5f * linearRoughness;
+        float energyFactor = 1.0f - (0.51f / 1.51f) * linearRoughness;
+        float f90 = energyBias + 2.f * wiDotH * wiDotH * linearRoughness;
+        float f0 = 1.f;
+        float wiScatter = fwdFresnelSchlick(f0, f90, wiDotN);
+        float woScatter = fwdFresnelSchlick(f0, f90, woDotN);
+        // -------------- BWD --------------
+        // Backprop: return wiScatter * woScatter * energyFactor;
+        float d_wiScatter = d_out * woScatter * energyFactor;
+        float d_woScatter = d_out * wiScatter * energyFactor;
+        float d_energyFactor = d_out * wiScatter * woScatter;
+        // Backprop: float woScatter = fwdFresnelSchlick(f0, f90, woDotN);
+        float d_woDotN = 0.0f, d_f0 = 0.0, d_f90 = 0.0f;
+        bwdFresnelSchlick(f0, f90, woDotN, d_f0, d_f90, d_woDotN, d_woScatter);
+        // Backprop: float wiScatter = fwdFresnelSchlick(fd0, fd90, wiDotN);
+        float d_wiDotN = 0.0f;
+        bwdFresnelSchlick(f0, f90, wiDotN, d_f0, d_f90, d_wiDotN, d_wiScatter);
+        // Backprop: float f90 = energyBias + 2.f * wiDotH * wiDotH * linearRoughness;
+        float d_energyBias = d_f90;
+        float d_wiDotH = d_f90 * 4 * wiDotH * linearRoughness;
+        d_linearRoughness += d_f90 * 2 * wiDotH * wiDotH;
+        // Backprop: float energyFactor = 1.0f - (0.51f / 1.51f) * linearRoughness;
+        d_linearRoughness -= (0.51f / 1.51f) * d_energyFactor;
+        // Backprop: float energyBias = 0.5f * linearRoughness;
+        d_linearRoughness += 0.5 * d_energyBias;
+        // Backprop: float wiDotH = dot(wi, h);
+        vec3f d_h(0);
+        bwdDot(wi, h, d_wi, d_h, d_wiDotH);
+        // Backprop: vec3f h = safeNormalize(wo + wi);
+        vec3f d_wo_wi(0);
+        bwdSafeNormalize(wo + wi, d_wo_wi, d_h);
+        d_wi += d_wo_wi; d_wo += d_wo_wi;
+        bwdDot(wo, nrm, d_wo, d_nrm, d_woDotN);
+        bwdDot(wi, nrm, d_wi, d_nrm, d_wiDotN);
+    }
+}
+//------------------------------------------------------------------------
+// Ndf GGX
+__device__ inline float fwdNdfGGX(const float alphaSqr, const float cosTheta)
+{
+    float _cosTheta = clamp(cosTheta, SPECULAR_EPSILON, 1.0f - SPECULAR_EPSILON);
+    float d = (_cosTheta * alphaSqr - _cosTheta) * _cosTheta + 1.0f;
+    return alphaSqr / (d * d * M_PI);
+}
+__device__ inline void bwdNdfGGX(const float alphaSqr, const float cosTheta, float& d_alphaSqr, float& d_cosTheta, const float d_out)
+{
+    // Torch only back propagates if clamp doesn't trigger
+    float _cosTheta = clamp(cosTheta, SPECULAR_EPSILON, 1.0f - SPECULAR_EPSILON);
+    float cosThetaSqr = _cosTheta * _cosTheta;
+    d_alphaSqr += d_out * (1.0f - (alphaSqr + 1.0f) * cosThetaSqr) / (M_PI * powf((alphaSqr - 1.0) * cosThetaSqr + 1.0f, 3.0f));
+    if (cosTheta > SPECULAR_EPSILON && cosTheta < 1.0f - SPECULAR_EPSILON)
+    {
+        d_cosTheta += d_out * -(4.0f * (alphaSqr - 1.0f) * alphaSqr * cosTheta) / (M_PI * powf((alphaSqr - 1.0) * cosThetaSqr + 1.0f, 3.0f));
+    }
+}
+//------------------------------------------------------------------------
+// Lambda GGX
+__device__ inline float fwdLambdaGGX(const float alphaSqr, const float cosTheta)
+{
+    float _cosTheta = clamp(cosTheta, SPECULAR_EPSILON, 1.0f - SPECULAR_EPSILON);
+    float cosThetaSqr = _cosTheta * _cosTheta;
+    float tanThetaSqr = (1.0 - cosThetaSqr) / cosThetaSqr;
+    float res = 0.5f * (sqrtf(1.0f + alphaSqr * tanThetaSqr) - 1.0f);
+    return res;
+}
+__device__ inline void bwdLambdaGGX(const float alphaSqr, const float cosTheta, float& d_alphaSqr, float& d_cosTheta, const float d_out)
+{
+    float _cosTheta = clamp(cosTheta, SPECULAR_EPSILON, 1.0f - SPECULAR_EPSILON);
+    float cosThetaSqr = _cosTheta * _cosTheta;
+    float tanThetaSqr = (1.0 - cosThetaSqr) / cosThetaSqr;
+    float res = 0.5f * (sqrtf(1.0f + alphaSqr * tanThetaSqr) - 1.0f);
+    d_alphaSqr += d_out * (0.25 * tanThetaSqr) / sqrtf(alphaSqr * tanThetaSqr + 1.0f);
+    if (cosTheta > SPECULAR_EPSILON && cosTheta < 1.0f - SPECULAR_EPSILON)
+        d_cosTheta += d_out * -(0.5 * alphaSqr) / (powf(_cosTheta, 3.0f) * sqrtf(alphaSqr / cosThetaSqr - alphaSqr + 1.0f));
+}
+//------------------------------------------------------------------------
+// Masking GGX
+__device__ inline float fwdMaskingSmithGGXCorrelated(const float alphaSqr, const float cosThetaI, const float cosThetaO)
+{
+    float lambdaI = fwdLambdaGGX(alphaSqr, cosThetaI);
+    float lambdaO = fwdLambdaGGX(alphaSqr, cosThetaO);
+    return 1.0f / (1.0f + lambdaI + lambdaO);
+}
+__device__ inline void bwdMaskingSmithGGXCorrelated(const float alphaSqr, const float cosThetaI, const float cosThetaO, float& d_alphaSqr, float& d_cosThetaI, float& d_cosThetaO, const float d_out)
+{
+    // FWD eval
+    float lambdaI = fwdLambdaGGX(alphaSqr, cosThetaI);
+    float lambdaO = fwdLambdaGGX(alphaSqr, cosThetaO);
+    // BWD eval
+    float d_lambdaIO = -d_out / powf(1.0f + lambdaI + lambdaO, 2.0f);
+    bwdLambdaGGX(alphaSqr, cosThetaI, d_alphaSqr, d_cosThetaI, d_lambdaIO);
+    bwdLambdaGGX(alphaSqr, cosThetaO, d_alphaSqr, d_cosThetaO, d_lambdaIO);
+}
+//------------------------------------------------------------------------
+// GGX specular
+__device__ vec3f fwdPbrSpecular(const vec3f col, const vec3f nrm, const vec3f wo, const vec3f wi, const float alpha, const float min_roughness)
+{
+    float _alpha = clamp(alpha, min_roughness * min_roughness, 1.0f);
+    float alphaSqr = _alpha * _alpha;
+    vec3f h = safeNormalize(wo + wi);
+    float woDotN = dot(wo, nrm);
+    float wiDotN = dot(wi, nrm);
+    float woDotH = dot(wo, h);
+    float nDotH = dot(nrm, h);
+    float D = fwdNdfGGX(alphaSqr, nDotH);
+    float G = fwdMaskingSmithGGXCorrelated(alphaSqr, woDotN, wiDotN);
+    vec3f F = fwdFresnelSchlick(col, 1.0f, woDotH);
+    vec3f w = F * D * G * 0.25 / woDotN;
+    bool frontfacing = (woDotN > SPECULAR_EPSILON) & (wiDotN > SPECULAR_EPSILON);
+    return frontfacing ? w : 0.0f;
+}
+__device__ void bwdPbrSpecular(
+    const vec3f col, const vec3f nrm, const vec3f wo, const vec3f wi, const float alpha, const float min_roughness,
+    vec3f& d_col, vec3f& d_nrm, vec3f& d_wo, vec3f& d_wi, float& d_alpha, const vec3f d_out)
+{
+    ///////////////////////////////////////////////////////////////////////
+    // FWD eval
+    float _alpha = clamp(alpha, min_roughness * min_roughness, 1.0f);
+    float alphaSqr = _alpha * _alpha;
+    vec3f h = safeNormalize(wo + wi);
+    float woDotN = dot(wo, nrm);
+    float wiDotN = dot(wi, nrm);
+    float woDotH = dot(wo, h);
+    float nDotH = dot(nrm, h);
+    float D = fwdNdfGGX(alphaSqr, nDotH);
+    float G = fwdMaskingSmithGGXCorrelated(alphaSqr, woDotN, wiDotN);
+    vec3f F = fwdFresnelSchlick(col, 1.0f, woDotH);
+    vec3f w = F * D * G * 0.25 / woDotN;
+    bool frontfacing = (woDotN > SPECULAR_EPSILON) & (wiDotN > SPECULAR_EPSILON);
+    if (frontfacing)
+    {
+        ///////////////////////////////////////////////////////////////////////
+        // BWD eval
+        vec3f d_F = d_out * D * G * 0.25f / woDotN;
+        float d_D = sum(d_out * F * G * 0.25f / woDotN);
+        float d_G = sum(d_out * F * D * 0.25f / woDotN);
+        float d_woDotN = -sum(d_out * F * D * G * 0.25f / (woDotN * woDotN));
+        vec3f d_f90(0);
+        float d_woDotH(0), d_wiDotN(0), d_nDotH(0), d_alphaSqr(0);
+        bwdFresnelSchlick(col, 1.0f, woDotH, d_col, d_f90, d_woDotH, d_F);
+        bwdMaskingSmithGGXCorrelated(alphaSqr, woDotN, wiDotN, d_alphaSqr, d_woDotN, d_wiDotN, d_G);
+        bwdNdfGGX(alphaSqr, nDotH, d_alphaSqr, d_nDotH, d_D);
+        vec3f d_h(0);
+        bwdDot(nrm, h, d_nrm, d_h, d_nDotH);
+        bwdDot(wo, h, d_wo, d_h, d_woDotH);
+        bwdDot(wi, nrm, d_wi, d_nrm, d_wiDotN);
+        bwdDot(wo, nrm, d_wo, d_nrm, d_woDotN);
+        vec3f d_h_unnorm(0);
+        bwdSafeNormalize(wo + wi, d_h_unnorm, d_h);
+        d_wo += d_h_unnorm;
+        d_wi += d_h_unnorm;
+        if (alpha > min_roughness * min_roughness)
+            d_alpha += d_alphaSqr * 2 * alpha;
+    }
+}
+//------------------------------------------------------------------------
+// Full PBR BSDF
+__device__ vec3f fwdPbrBSDF(const vec3f kd, const vec3f arm, const vec3f pos, const vec3f nrm, const vec3f view_pos, const vec3f light_pos, const float min_roughness, int BSDF)
+{
+    vec3f wo = safeNormalize(view_pos - pos);
+    vec3f wi = safeNormalize(light_pos - pos);
+    float alpha = arm.y * arm.y;
+    vec3f spec_col = (0.04f * (1.0f - arm.z) + kd * arm.z) * (1.0 - arm.x);
+    vec3f diff_col = kd * (1.0f - arm.z);
+    float diff = 0.0f;
+    if (BSDF == 0)
+        diff = fwdLambert(nrm, wi);
+    else
+        diff = fwdFrostbiteDiffuse(nrm, wi, wo, arm.y);
+    vec3f diffuse = diff_col * diff;
+    vec3f specular = fwdPbrSpecular(spec_col, nrm, wo, wi, alpha, min_roughness);
+    return diffuse + specular;
+}
+__device__ void bwdPbrBSDF(
+    const vec3f kd, const vec3f arm, const vec3f pos, const vec3f nrm, const vec3f view_pos, const vec3f light_pos, const float min_roughness, int BSDF,
+    vec3f& d_kd, vec3f& d_arm, vec3f& d_pos, vec3f& d_nrm, vec3f& d_view_pos, vec3f& d_light_pos, const vec3f d_out)
+{
+    ////////////////////////////////////////////////////////////////////////
+    // FWD
+    vec3f _wi = light_pos - pos;
+    vec3f _wo = view_pos - pos;
+    vec3f wi = safeNormalize(_wi);
+    vec3f wo = safeNormalize(_wo);
+    float alpha = arm.y * arm.y;
+    vec3f spec_col = (0.04f * (1.0f - arm.z) + kd * arm.z) * (1.0 - arm.x);
+    vec3f diff_col = kd * (1.0f - arm.z);
+    float diff = 0.0f;
+    if (BSDF == 0)
+        diff = fwdLambert(nrm, wi);
+    else
+        diff = fwdFrostbiteDiffuse(nrm, wi, wo, arm.y);
+    ////////////////////////////////////////////////////////////////////////
+    // BWD
+    float d_alpha(0);
+    vec3f d_spec_col(0), d_wi(0), d_wo(0);
+    bwdPbrSpecular(spec_col, nrm, wo, wi, alpha, min_roughness, d_spec_col, d_nrm, d_wo, d_wi, d_alpha, d_out);
+    float d_diff = sum(diff_col * d_out);
+    if (BSDF == 0)
+        bwdLambert(nrm, wi, d_nrm, d_wi, d_diff);
+    else
+        bwdFrostbiteDiffuse(nrm, wi, wo, arm.y, d_nrm, d_wi, d_wo, d_arm.y, d_diff);
+    // Backprop: diff_col = kd * (1.0f - arm.z)
+    vec3f d_diff_col = d_out * diff;
+    d_kd += d_diff_col * (1.0f - arm.z);
+    d_arm.z -= sum(d_diff_col * kd);
+    // Backprop: spec_col = (0.04f * (1.0f - arm.z) + kd * arm.z) * (1.0 - arm.x)
+    d_kd -= d_spec_col * (arm.x - 1.0f) * arm.z;
+    d_arm.x += sum(d_spec_col * (arm.z * (0.04f - kd) - 0.04f));
+    d_arm.z -= sum(d_spec_col * (kd - 0.04f) * (arm.x - 1.0f));
+    // Backprop: alpha = arm.y * arm.y
+    d_arm.y += d_alpha * 2 * arm.y;
+    // Backprop: vec3f wi = safeNormalize(light_pos - pos);
+    vec3f d__wi(0);
+    bwdSafeNormalize(_wi, d__wi, d_wi);
+    d_light_pos += d__wi;
+    d_pos -= d__wi;
+    // Backprop: vec3f wo = safeNormalize(view_pos - pos);
+    vec3f d__wo(0);
+    bwdSafeNormalize(_wo, d__wo, d_wo);
+    d_view_pos += d__wo;
+    d_pos -= d__wo;
+}
+//------------------------------------------------------------------------
+// Kernels
+__global__ void LambertFwdKernel(LambertKernelParams p)
+{
+    // Calculate pixel position.
+    unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
+    unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
+    unsigned int pz = blockIdx.z;
+    if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
+        return;
+    vec3f nrm = p.nrm.fetch3(px, py, pz);
+    vec3f wi = p.wi.fetch3(px, py, pz);
+    float res = fwdLambert(nrm, wi);
+    p.out.store(px, py, pz, res);
+}
+__global__ void LambertBwdKernel(LambertKernelParams p)
+{
+    // Calculate pixel position.
+    unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
+    unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
+    unsigned int pz = blockIdx.z;
+    if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
+        return;
+    vec3f nrm = p.nrm.fetch3(px, py, pz);
+    vec3f wi = p.wi.fetch3(px, py, pz);
+    float d_out = p.out.fetch1(px, py, pz);
+    vec3f d_nrm(0), d_wi(0);
+    bwdLambert(nrm, wi, d_nrm, d_wi, d_out);
+    p.nrm.store_grad(px, py, pz, d_nrm);
+    p.wi.store_grad(px, py, pz, d_wi);
+}
+__global__ void FrostbiteDiffuseFwdKernel(FrostbiteDiffuseKernelParams p)
+{
+    // Calculate pixel position.
+    unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
+    unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
+    unsigned int pz = blockIdx.z;
+    if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
+        return;
+    vec3f nrm = p.nrm.fetch3(px, py, pz);
+    vec3f wi = p.wi.fetch3(px, py, pz);
+    vec3f wo = p.wo.fetch3(px, py, pz);
+    float linearRoughness = p.linearRoughness.fetch1(px, py, pz);
+    float res = fwdFrostbiteDiffuse(nrm, wi, wo, linearRoughness);
+    p.out.store(px, py, pz, res);
+}
+__global__ void FrostbiteDiffuseBwdKernel(FrostbiteDiffuseKernelParams p)
+{
+    // Calculate pixel position.
+    unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
+    unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
+    unsigned int pz = blockIdx.z;
+    if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
+        return;
+    vec3f nrm = p.nrm.fetch3(px, py, pz);
+    vec3f wi = p.wi.fetch3(px, py, pz);
+    vec3f wo = p.wo.fetch3(px, py, pz);
+    float linearRoughness = p.linearRoughness.fetch1(px, py, pz);
+    float d_out = p.out.fetch1(px, py, pz);
+    float d_linearRoughness = 0.0f;
+    vec3f d_nrm(0), d_wi(0), d_wo(0);
+    bwdFrostbiteDiffuse(nrm, wi, wo, linearRoughness, d_nrm, d_wi, d_wo, d_linearRoughness, d_out);
+    p.nrm.store_grad(px, py, pz, d_nrm);
+    p.wi.store_grad(px, py, pz, d_wi);
+    p.wo.store_grad(px, py, pz, d_wo);
+    p.linearRoughness.store_grad(px, py, pz, d_linearRoughness);
+}
+__global__ void FresnelShlickFwdKernel(FresnelShlickKernelParams p)
+{
+    // Calculate pixel position.
+    unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
+    unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
+    unsigned int pz = blockIdx.z;
+    if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
+        return;
+    vec3f f0 = p.f0.fetch3(px, py, pz);
+    vec3f f90 = p.f90.fetch3(px, py, pz);
+    float cosTheta = p.cosTheta.fetch1(px, py, pz);
+    vec3f res = fwdFresnelSchlick(f0, f90, cosTheta);
+    p.out.store(px, py, pz, res);
+}
+__global__ void FresnelShlickBwdKernel(FresnelShlickKernelParams p)
+{
+    // Calculate pixel position.
+    unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
+    unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
+    unsigned int pz = blockIdx.z;
+    if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
+        return;
+    vec3f f0 = p.f0.fetch3(px, py, pz);
+    vec3f f90 = p.f90.fetch3(px, py, pz);
+    float cosTheta = p.cosTheta.fetch1(px, py, pz);
+    vec3f d_out = p.out.fetch3(px, py, pz);
+    vec3f d_f0(0), d_f90(0);
+    float d_cosTheta(0);
+    bwdFresnelSchlick(f0, f90, cosTheta, d_f0, d_f90, d_cosTheta, d_out);
+    p.f0.store_grad(px, py, pz, d_f0);
+    p.f90.store_grad(px, py, pz, d_f90);
+    p.cosTheta.store_grad(px, py, pz, d_cosTheta);
+}
+__global__ void ndfGGXFwdKernel(NdfGGXParams p)
+{
+    // Calculate pixel position.
+    unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
+    unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
+    unsigned int pz = blockIdx.z;
+    if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
+        return;
+    float alphaSqr = p.alphaSqr.fetch1(px, py, pz);
+    float cosTheta = p.cosTheta.fetch1(px, py, pz);
+    float res = fwdNdfGGX(alphaSqr, cosTheta);
+    p.out.store(px, py, pz, res);
+}
+__global__ void ndfGGXBwdKernel(NdfGGXParams p)
+{
+    // Calculate pixel position.
+    unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
+    unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
+    unsigned int pz = blockIdx.z;
+    if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
+        return;
+    float alphaSqr = p.alphaSqr.fetch1(px, py, pz);
+    float cosTheta = p.cosTheta.fetch1(px, py, pz);
+    float d_out = p.out.fetch1(px, py, pz);
+    float d_alphaSqr(0), d_cosTheta(0);
+    bwdNdfGGX(alphaSqr, cosTheta, d_alphaSqr, d_cosTheta, d_out);
+    p.alphaSqr.store_grad(px, py, pz, d_alphaSqr);
+    p.cosTheta.store_grad(px, py, pz, d_cosTheta);
+}
+__global__ void lambdaGGXFwdKernel(NdfGGXParams p)
+{
+    // Calculate pixel position.
+    unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
+    unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
+    unsigned int pz = blockIdx.z;
+    if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
+        return;
+    float alphaSqr = p.alphaSqr.fetch1(px, py, pz);
+    float cosTheta = p.cosTheta.fetch1(px, py, pz);
+    float res = fwdLambdaGGX(alphaSqr, cosTheta);
+    p.out.store(px, py, pz, res);
+}
+__global__ void lambdaGGXBwdKernel(NdfGGXParams p)
+{
+    // Calculate pixel position.
+    unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
+    unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
+    unsigned int pz = blockIdx.z;
+    if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
+        return;
+    float alphaSqr = p.alphaSqr.fetch1(px, py, pz);
+    float cosTheta = p.cosTheta.fetch1(px, py, pz);
+    float d_out = p.out.fetch1(px, py, pz);
+    float d_alphaSqr(0), d_cosTheta(0);
+    bwdLambdaGGX(alphaSqr, cosTheta, d_alphaSqr, d_cosTheta, d_out);
+    p.alphaSqr.store_grad(px, py, pz, d_alphaSqr);
+    p.cosTheta.store_grad(px, py, pz, d_cosTheta);
+}
+__global__ void maskingSmithFwdKernel(MaskingSmithParams p)
+{
+    // Calculate pixel position.
+    unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
+    unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
+    unsigned int pz = blockIdx.z;
+    if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
+        return;
+    float alphaSqr = p.alphaSqr.fetch1(px, py, pz);
+    float cosThetaI = p.cosThetaI.fetch1(px, py, pz);
+    float cosThetaO = p.cosThetaO.fetch1(px, py, pz);
+    float res = fwdMaskingSmithGGXCorrelated(alphaSqr, cosThetaI, cosThetaO);
+    p.out.store(px, py, pz, res);
+}
+__global__ void maskingSmithBwdKernel(MaskingSmithParams p)
+{
+    // Calculate pixel position.
+    unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
+    unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
+    unsigned int pz = blockIdx.z;
+    if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
+        return;
+    float alphaSqr = p.alphaSqr.fetch1(px, py, pz);
+    float cosThetaI = p.cosThetaI.fetch1(px, py, pz);
+    float cosThetaO = p.cosThetaO.fetch1(px, py, pz);
+    float d_out = p.out.fetch1(px, py, pz);
+    float d_alphaSqr(0), d_cosThetaI(0), d_cosThetaO(0);
+    bwdMaskingSmithGGXCorrelated(alphaSqr, cosThetaI, cosThetaO, d_alphaSqr, d_cosThetaI, d_cosThetaO, d_out);
+    p.alphaSqr.store_grad(px, py, pz, d_alphaSqr);
+    p.cosThetaI.store_grad(px, py, pz, d_cosThetaI);
+    p.cosThetaO.store_grad(px, py, pz, d_cosThetaO);
+}
+__global__ void pbrSpecularFwdKernel(PbrSpecular p)
+{
+    // Calculate pixel position.
+    unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
+    unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
+    unsigned int pz = blockIdx.z;
+    if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
+        return;
+    vec3f col = p.col.fetch3(px, py, pz);
+    vec3f nrm = p.nrm.fetch3(px, py, pz);
+    vec3f wo = p.wo.fetch3(px, py, pz);
+    vec3f wi = p.wi.fetch3(px, py, pz);
+    float alpha = p.alpha.fetch1(px, py, pz);
+    vec3f res = fwdPbrSpecular(col, nrm, wo, wi, alpha, p.min_roughness);
+    p.out.store(px, py, pz, res);
+}
+__global__ void pbrSpecularBwdKernel(PbrSpecular p)
+{
+    // Calculate pixel position.
+    unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
+    unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
+    unsigned int pz = blockIdx.z;
+    if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
+        return;
+    vec3f col = p.col.fetch3(px, py, pz);
+    vec3f nrm = p.nrm.fetch3(px, py, pz);
+    vec3f wo = p.wo.fetch3(px, py, pz);
+    vec3f wi = p.wi.fetch3(px, py, pz);
+    float alpha = p.alpha.fetch1(px, py, pz);
+    vec3f d_out = p.out.fetch3(px, py, pz);
+    float d_alpha(0);
+    vec3f d_col(0), d_nrm(0), d_wo(0), d_wi(0);
+    bwdPbrSpecular(col, nrm, wo, wi, alpha, p.min_roughness, d_col, d_nrm, d_wo, d_wi, d_alpha, d_out);
+    p.col.store_grad(px, py, pz, d_col);
+    p.nrm.store_grad(px, py, pz, d_nrm);
+    p.wo.store_grad(px, py, pz, d_wo);
+    p.wi.store_grad(px, py, pz, d_wi);
+    p.alpha.store_grad(px, py, pz, d_alpha);
+}
+__global__ void pbrBSDFFwdKernel(PbrBSDF p)
+{
+    // Calculate pixel position.
+    unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
+    unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
+    unsigned int pz = blockIdx.z;
+    if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
+        return;
+    vec3f kd = p.kd.fetch3(px, py, pz);
+    vec3f arm = p.arm.fetch3(px, py, pz);
+    vec3f pos = p.pos.fetch3(px, py, pz);
+    vec3f nrm = p.nrm.fetch3(px, py, pz);
+    vec3f view_pos = p.view_pos.fetch3(px, py, pz);
+    vec3f light_pos = p.light_pos.fetch3(px, py, pz);
+    vec3f res = fwdPbrBSDF(kd, arm, pos, nrm, view_pos, light_pos, p.min_roughness, p.BSDF);
+    p.out.store(px, py, pz, res);
+}
+__global__ void pbrBSDFBwdKernel(PbrBSDF p)
+{
+    // Calculate pixel position.
+    unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
+    unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
+    unsigned int pz = blockIdx.z;
+    if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
+        return;
+    vec3f kd = p.kd.fetch3(px, py, pz);
+    vec3f arm = p.arm.fetch3(px, py, pz);
+    vec3f pos = p.pos.fetch3(px, py, pz);
+    vec3f nrm = p.nrm.fetch3(px, py, pz);
+    vec3f view_pos = p.view_pos.fetch3(px, py, pz);
+    vec3f light_pos = p.light_pos.fetch3(px, py, pz);
+    vec3f d_out = p.out.fetch3(px, py, pz);
+    vec3f d_kd(0), d_arm(0), d_pos(0), d_nrm(0), d_view_pos(0), d_light_pos(0);
+    bwdPbrBSDF(kd, arm, pos, nrm, view_pos, light_pos, p.min_roughness, p.BSDF, d_kd, d_arm, d_pos, d_nrm, d_view_pos, d_light_pos, d_out);
+    p.kd.store_grad(px, py, pz, d_kd);
+    p.arm.store_grad(px, py, pz, d_arm);
+    p.pos.store_grad(px, py, pz, d_pos);
+    p.nrm.store_grad(px, py, pz, d_nrm);
+    p.view_pos.store_grad(px, py, pz, d_view_pos);
+    p.light_pos.store_grad(px, py, pz, d_light_pos);
+}